首页 > 代码库 > mxnet教程

mxnet教程

官方教程,讲的还行,我用自己的实例讲解。

1:引入module:

import mxnet as mx
import numpy as np
import cv2
import matplotlib.pyplot as plt
import logging

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

2:创建网络:

# Variables are place holders for input arrays. We give each variable a unique name.
data = http://www.mamicode.com/mx.symbol.Variable(data)

# The input is fed to a fully connected layer that computes Y=WX+b.
# This is the main computation module in the network.
# Each layer also needs an unique name. We‘ll talk more about naming in the next section.
fc1  = mx.symbol.FullyConnected(data = http://www.mamicode.com/data, name=fc1, num_hidden=128)
# Activation layers apply a non-linear function on the previous layer‘s output.
# Here we use Rectified Linear Unit (ReLU) that computes Y = max(X, 0).
act1 = mx.symbol.Activation(data = http://www.mamicode.com/fc1, name=relu1, act_type="relu")

fc2  = mx.symbol.FullyConnected(data = http://www.mamicode.com/act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(data = http://www.mamicode.com/fc2, name=relu2, act_type="relu")

fc3  = mx.symbol.FullyConnected(data = http://www.mamicode.com/act2, name=fc3, num_hidden=10)
# Finally we have a loss layer that compares the network‘s output with label and generates gradient signals.
mlp  = mx.symbol.SoftmaxOutput(data = http://www.mamicode.com/fc3, name = softmax)

3:显示网络:

mx.viz.plot_network(mlp)

不过这个在spyder上无法显示,所以本人使用这个,会在运行目录下创建jpg的图:

mx.viz.plot_network(mlp).view()  

4:加载数据:

由于官方mxnet只用mnist数据来测试,所以:

又由于data很难下下来,所以在example目录下新建data文件夹,在data文件夹中创建mldata文件夹,再放入从github上下载的original_mnist.mat文件

from sklearn.datasets import fetch_mldata
import os,sys
curr_path = sys.path[0]
sys.path = [os.path.join("/home/hu/mxnet-master/example/autoencoder")] + sys.path
import data
X,Y=data.get_mnist()

for i in range(10):
    plt.subplot(1,10,i+1)
    plt.imshow(X[i].reshape((28,28)), cmap=Greys_r)
    plt.axis(off)
plt.show()

X = X.astype(np.float32)/255
X_train = X[:60000]
X_test = X[60000:]
Y_train = Y[:60000]
Y_test = Y[60000:]

5:设置数据迭代器:

mxnet这个数据迭代器创建可以自己写函数,网上可以查得到,mxnet工作其实就是数据一块一块的迭代

batch_size = 100
train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size)
test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size)

6:训练:

网上看到,好像不要这样去训练,因为这样的话,你能够调试的东西就少了

model = mx.model.FeedForward(
    ctx = mx.gpu(0),      # Run on GPU 0
    symbol = mlp,         # Use the network we just defined
    num_epoch = 10,       # Train for 10 epochs
    learning_rate = 0.1,  # Learning rate
    momentum = 0.9,       # Momentum for SGD with momentum
    wd = 0.00001)         # Weight decay for regularization
model.fit(
    X=train_iter,  # Training data set
    eval_data=http://www.mamicode.com/test_iter,  # Testing data set. MXNet computes scores on test set every epoch
    batch_end_callback = mx.callback.Speedometer(batch_size, 200))  # Logging module to print out progress

第二种:

先把数据放入显存,初始化参数,然后在训练(貌似,用这个准确率更高?)

# construct a simple MLP
data = http://www.mamicode.com/mx.symbol.Variable(data)
fc1  = mx.symbol.FullyConnected(data, name=fc1, num_hidden=128)
act1 = mx.symbol.Activation(fc1, name=relu1, act_type="relu")
fc2  = mx.symbol.FullyConnected(act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(fc2, name=relu2, act_type="relu")
fc3  = mx.symbol.FullyConnected(act2, name=fc3, num_hidden=10)
out  = mx.symbol.SoftmaxOutput(fc3, name = softmax)
# construct the module
mod = mx.mod.Module(out)   
mod.bind(data_shapes=train_iter.provide_data,label_shapes=train_iter.provide_label)
mod.init_params()
mod.fit(train_iter, eval_data=test_iter,optimizer_params={learning_rate:0.01, momentum: 0.9},num_epoch=10)

7:用训练好的模型进行来预测:

plt.imshow((X_test[0].reshape((28,28))*255).astype(np.uint8), cmap=Greys_r)
plt.show()
print Result:, model.predict(X_test[0:1])[0].argmax()

8:有模型评估函数:

print Accuracy:, model.score(test_iter)*100, %

9:弄成网页调用函数:

# run hand drawing test
from IPython.display import HTML

def classify(img):
    img = img[len(data:image/png;base64,):].decode(base64)
    img = cv2.imdecode(np.fromstring(img, np.uint8), -1)
    img = cv2.resize(img[:,:,3], (28,28))
    img = img.astype(np.float32).reshape((1, 784))/255.0
    return model.predict(img)[0].argmax()

html = """<style type="text/css">canvas { border: 1px solid black; }</style><div id="board"><canvas id="myCanvas" width="100px" height="100px">Sorry, your browser doesn‘t support canvas technology.</canvas><p><button id="classify" onclick="classify()">Classify</button><button id="clear" onclick="myClear()">Clear</button>Result: <input type="text" id="result_output" size="5" value=""></p></div>"""
script = """<script type="text/JavaScript" src="https://ajax.googleapis.com/ajax/libs/jquery/1.4.2/jquery.min.js?ver=1.4.2"></script><script type="text/javascript">function init() {var myCanvas = document.getElementById("myCanvas");var curColor = $(‘#selectColor option:selected‘).val();if(myCanvas){var isDown = false;var ctx = myCanvas.getContext("2d");var canvasX, canvasY;ctx.lineWidth = 5;$(myCanvas).mousedown(function(e){isDown = true;ctx.beginPath();var parentOffset = $(this).parent().offset(); canvasX = e.pageX - parentOffset.left;canvasY = e.pageY - parentOffset.top;ctx.moveTo(canvasX, canvasY);}).mousemove(function(e){if(isDown != false) {var parentOffset = $(this).parent().offset(); canvasX = e.pageX - parentOffset.left;canvasY = e.pageY - parentOffset.top;ctx.lineTo(canvasX, canvasY);ctx.strokeStyle = curColor;ctx.stroke();}}).mouseup(function(e){isDown = false;ctx.closePath();});}$(‘#selectColor‘).change(function () {curColor = $(‘#selectColor option:selected‘).val();});}init();function handle_output(out) {document.getElementById("result_output").value = http://www.mamicode.com/out.content.data["text/plain"];}function classify() {var kernel = IPython.notebook.kernel;var myCanvas = document.getElementById("myCanvas");data = http://www.mamicode.com/myCanvas.toDataURL(‘image/png‘);document.getElementById("result_output").valuehttp://www.mamicode.com/= "";kernel.execute("classify(‘" + data +"‘)",  { ‘iopub‘ : {‘output‘ : handle_output}}, {silent:false});}function myClear() {var myCanvas = document.getElementById("myCanvas");myCanvas.getContext("2d").clearRect(0, 0, myCanvas.width, myCanvas.height);}</script>"""
HTML(html+script)

10:输出权重:

def norm_stat(d):
    """The statistics you want to see.
    We compute the L2 norm here but you can change it to anything you like."""
    return mx.nd.norm(d)/np.sqrt(d.size)
mon = mx.mon.Monitor(
    100,                 # Print every 100 batches
    norm_stat,           # The statistics function defined above
    pattern=.*weight,  # A regular expression. Only arrays with name matching this pattern will be included.
    sort=True)           # Sort output by name
model = mx.model.FeedForward(ctx = mx.gpu(0), symbol = mlp, num_epoch = 1,
                             learning_rate = 0.1, momentum = 0.9, wd = 0.00001)
model.fit(X=train_iter, eval_data=http://www.mamicode.com/test_iter, monitor=mon,  # Set the monitor here
          batch_end_callback = mx.callback.Speedometer(100, 100))

11:就像之前所说的,数据ilter是能够自己写loop来聚类的

但说实话,自己写的loop如何调用gpu?作者的自己写的例子,也没有调用gpu,我实在是怀疑

epoch迭代次数,ilter是分的数据patch的个数

# ==================Binding=====================
# The symbol we created is only a graph description.
# To run it, we first need to allocate memory and create an executor by ‘binding‘ it.
# In order to bind a symbol, we need at least two pieces of information: context and input shapes.
# Context specifies which device the executor runs on, e.g. cpu, GPU0, GPU1, etc.
# Input shapes define the executor‘s input array dimensions.
# MXNet then run automatic shape inference to determine the dimensions of intermediate and output arrays.

# data iterators defines shapes of its output with provide_data and provide_label property.
input_shapes = dict(train_iter.provide_data+train_iter.provide_label)
print input_shapes, input_shapes
# We use simple_bind to let MXNet allocate memory for us.
# You can also allocate memory youself and use bind to pass it to MXNet.
exe = mlp.simple_bind(ctx=mx.gpu(0), **input_shapes)

# ===============Initialization=================
# First we get handle to input arrays
arg_arrays = dict(zip(mlp.list_arguments(), exe.arg_arrays))
data = arg_arrays[train_iter.provide_data[0][0]]
label = arg_arrays[train_iter.provide_label[0][0]]

# We initialize the weights with uniform distribution on (-0.01, 0.01).
init = mx.init.Uniform(scale=0.01)
for name, arr in arg_arrays.items():
    if name not in input_shapes:
        init(name, arr)
    
# We also need to create an optimizer for updating weights
opt = mx.optimizer.SGD(
    learning_rate=0.1,
    momentum=0.9,
    wd=0.00001,
    rescale_grad=1.0/train_iter.batch_size)
updater = mx.optimizer.get_updater(opt)

# Finally we need a metric to print out training progress
metric = mx.metric.Accuracy()

# Training loop begines
for epoch in range(10):
    train_iter.reset()
    metric.reset()
    t = 0
    for batch in train_iter:
        # Copy data to executor input. Note the [:].
        data[:] = batch.data[0]
        label[:] = batch.label[0]
        
        # Forward
        exe.forward(is_train=True)
        
        # You perform operations on exe.outputs here if you need to.
        # For example, you can stack a CRF on top of a neural network.
        
        # Backward
        exe.backward()
        
        # Update
        for i, pair in enumerate(zip(exe.arg_arrays, exe.grad_arrays)):
            weight, grad = pair
            updater(i, grad, weight)
        metric.update(batch.label, exe.outputs)
        t += 1
        if t % 100 == 0:
            print epoch:, epoch, iter:, t, metric:, metric.get()

12:新的层

输入的数据,输出数据个数都要好好申明

# Define custom softmax operator
class NumpySoftmax(mx.operator.NumpyOp):
    def __init__(self):
        # Call the parent class constructor. 
        # Because NumpySoftmax is a loss layer, it doesn‘t need gradient input from layers above.
        super(NumpySoftmax, self).__init__(need_top_grad=False)
    
    def list_arguments(self):
        # Define the input to NumpySoftmax.
        return [data, label]

    def list_outputs(self):
        # Define the output.
        return [output]

    def infer_shape(self, in_shape):
        # Calculate the dimensions of the output (and missing inputs) from (some) input shapes.
        data_shape = in_shape[0]  # shape of first argument ‘data‘
        label_shape = (in_shape[0][0],)  # ‘label‘ should be one dimensional and has batch_size instances.
        output_shape = in_shape[0] # ‘output‘ dimension is the same as the input.
        return [data_shape, label_shape], [output_shape]

    def forward(self, in_data, out_data):
        x = in_data[0]  # ‘data‘
        y = out_data[0]  # ‘output‘
        
        # Compute softmax
        y[:] = np.exp(x - x.max(axis=1).reshape((x.shape[0], 1)))
        y /= y.sum(axis=1).reshape((x.shape[0], 1))

    def backward(self, out_grad, in_data, out_data, in_grad):
        l = in_data[1]  # ‘label‘
        l = l.reshape((l.size,)).astype(np.int)  # cast to int
        y = out_data[0]  # ‘output‘
        dx = in_grad[0]  # gradient for ‘data‘
        
        # Compute gradient
        dx[:] = y
        dx[np.arange(l.shape[0]), l] -= 1.0

numpy_softmax = NumpySoftmax()

data = mx.symbol.Variable(data)
fc1 = mx.symbol.FullyConnected(data = http://www.mamicode.com/data, name=fc1, num_hidden=128)
act1 = mx.symbol.Activation(data = http://www.mamicode.com/fc1, name=relu1, act_type="relu")
fc2 = mx.symbol.FullyConnected(data = http://www.mamicode.com/act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(data = http://www.mamicode.com/fc2, name=relu2, act_type="relu")
fc3 = mx.symbol.FullyConnected(data = http://www.mamicode.com/act2, name=fc3, num_hidden=10)
# Use the new operator we just defined instead of the standard softmax operator.
mlp = numpy_softmax(data=http://www.mamicode.com/fc3, name = softmax)

model = mx.model.FeedForward(ctx = mx.gpu(0), symbol = mlp, num_epoch = 2,
                             learning_rate = 0.1, momentum = 0.9, wd = 0.00001)
model.fit(X=train_iter, eval_data=http://www.mamicode.com/test_iter,
          batch_end_callback = mx.callback.Speedometer(100, 100))

13:新层加新的迭代:

我创建在example/mytest文件夹下面

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Thu Mar 30 15:35:02 2017

@author: root
"""
from __future__ import print_function
import sys
import os
# code to automatically download dataset
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
sys.path = [os.path.join(curr_path, "../autoencoder")] + sys.path
import mxnet as mx
import numpy as np
import data
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans
import model
from autoencoder import AutoEncoderModel
from solver import Solver, Monitor
import logging
import time
global YT
import scipy.io as sio  
import matplotlib.pyplot as plt 
# ==================start setting My-layer=====================
class NumpySoftmax(mx.operator.NumpyOp):
    def __init__(self):
        # Call the parent class constructor. 
        # Because NumpySoftmax is a loss layer, it doesn‘t need gradient input from layers above.
        super(NumpySoftmax, self).__init__(need_top_grad=False)
    
    def list_arguments(self):
        # Define the input to NumpySoftmax.
        return [data, label]

    def list_outputs(self):
        # Define the output.
        return [output]

    def infer_shape(self, in_shape):
        # Calculate the dimensions of the output (and missing inputs) from (some) input shapes.
        data_shape = in_shape[0]  # shape of first argument ‘data‘
        label_shape = (in_shape[0][0],)  # ‘label‘ should be one dimensional and has batch_size instances.
        output_shape = in_shape[0] # ‘output‘ dimension is the same as the input.
        return [data_shape, label_shape], [output_shape]

    def forward(self, in_data, out_data):
        alpha=1.0
        z = in_data[0]
        q= out_data[0]  # ‘output‘
        kmeans = KMeans(n_clusters=10, random_state=170).fit(z)
        mu=kmeans.cluster_centers_        
        # Compute softmax
        mask = 1.0/(1.0+cdist(z, mu)**2/alpha)
        q[:] = mask**((alpha+1.0)/2.0)
        q[:] = (q.T/q.sum(axis=1)).T

    def backward(self, out_grad, in_data, out_data, in_grad):
        alpha=1.0
        x = in_data[0]  # ‘label‘
        y = out_data[0]  # ‘output‘
        dx = in_grad[0]  # gradient for ‘data‘
        kmeans = KMeans(n_clusters=10, random_state=170).fit(x)
        mu=kmeans.cluster_centers_ 
        mask = 1.0/(1.0+cdist(x, mu)**2/alpha)
        p = mask**((alpha+1.0)/2.0)
        mask*= (alpha+1.0)/alpha*(p-y)
        dx[:] = (x.T*mask.sum(axis=1)).T - mask.dot(mu)
#======================end setting==========================
# ==================start of the process of data=http://www.mamicode.com/====================
X, Y = data.get_mnist()
X_train = X[:60000]
X_test = X[60000:]
Y_train = Y[:60000]
Y_test = Y[60000:]
numpy_softmax = NumpySoftmax()
batch_size = 100
#the office code to create iter
train_iter = mx.io.NDArrayIter(X_train, Y_train, batch_size=batch_size)
test_iter = mx.io.NDArrayIter(X_test, Y_test, batch_size=batch_size)
input_shapes = dict(train_iter.provide_data+train_iter.provide_label)
# ==================end of the process=====================
# ==================start of setting the net=====================
data = http://www.mamicode.com/mx.symbol.Variable(data)
fc1 = mx.symbol.FullyConnected(data = http://www.mamicode.com/data, name=fc1, num_hidden=128)
act1 = mx.symbol.Activation(data = http://www.mamicode.com/fc1, name=relu1, act_type="relu")
fc2 = mx.symbol.FullyConnected(data = http://www.mamicode.com/act1, name = fc2, num_hidden = 64)
act2 = mx.symbol.Activation(data = http://www.mamicode.com/fc2, name=relu2, act_type="relu")
fc3 = mx.symbol.FullyConnected(data = http://www.mamicode.com/act2, name=fc3, num_hidden=10)
mlp = numpy_softmax(data=http://www.mamicode.com/fc3, name = softmax)
mx.viz.plot_network(mlp).view()  
# ==================start of setting the net=====================
exe = mlp.simple_bind(ctx=mx.gpu(0), **input_shapes)
# ===============Initialization=================
# First we get handle to input arrays
arg_arrays = dict(zip(mlp.list_arguments(), exe.arg_arrays))
data = arg_arrays[train_iter.provide_data[0][0]]
label = arg_arrays[train_iter.provide_label[0][0]]

# We initialize the weights with uniform distribution on (-0.01, 0.01).
init = mx.init.Uniform(scale=0.01)
for name, arr in arg_arrays.items():
    if name not in input_shapes:
        init(name, arr)
    
# We also need to create an optimizer for updating weights
opt = mx.optimizer.SGD(
    learning_rate=0.1,
    momentum=0.9,
    wd=0.00001,
    rescale_grad=1.0/train_iter.batch_size)
updater = mx.optimizer.get_updater(opt)

# Finally we need a metric to print out training progress
metric = mx.metric.Accuracy()

# Training loop begines
for epoch in range(10):
    train_iter.reset()
    metric.reset()
    t = 0
    for batch in train_iter:
        # Copy data to executor input. Note the [:].
        data[:] = batch.data[0]
        label[:] = batch.label[0]
        
        # Forward
        exe.forward(is_train=True)
        
        # You perform operations on exe.outputs here if you need to.
        # For example, you can stack a CRF on top of a neural network.
        
        # Backward
        exe.backward()
        
        # Update
        for i, pair in enumerate(zip(exe.arg_arrays, exe.grad_arrays)):
            weight, grad = pair
            updater(i, grad, weight)
        metric.update(batch.label, exe.outputs)
        t += 1
        if t % 100 == 0:
            print(epoch:, epoch, iter:, t, metric:, metric.get())

 

mxnet教程