首页 > 代码库 > CS224d assignment 1【Neural Network Basics】

CS224d assignment 1【Neural Network Basics】

refer to:

机器学习公开课笔记(5):神经网络(Neural Network)

CS224d笔记3——神经网络

深度学习与自然语言处理(4)_斯坦福cs224d 大作业测验1与解答

CS224d Problem set 1作业

softmax:

def softmax(x):      assert len(x.shape) > 1    x -= np.max(x, axis=1, keepdims=True)    x = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)        return x

sigmoid & sigmoid_grad:

def sigmoid(x):    result = 1.0 / (1.0 + np.exp(-x))    return resultdef sigmoid_grad(f):    f=f*(1.0-f)        return f

gradcheck_naive:

def gradcheck_naive(f, x):    """     Gradient check for a function f     - f should be a function that takes a single argument and outputs the         cost and its gradients    - x is the point (numpy array) to check the gradient at    """     rndstate = random.getstate()    random.setstate(rndstate)      fx, grad = f(x) # Evaluate function value at original point    h = 1e-4    # Iterate over all indexes in x    it = np.nditer(x, flags=[‘multi_index‘], op_flags=[‘readwrite‘])    while not it.finished:        ix = it.multi_index        ### try modifying x[ix] with h defined above to compute numerical gradients        ### make sure you call random.setstate(rndstate) before calling f(x) each         ### time, this will make it         ### possible to test cost functions with built in randomness later        ### YOUR CODE HERE:        old_val = x[ix]        x[ix] = old_val - h        random.setstate(rndstate)        ( fxh1, _ ) = f(x)        x[ix] = old_val + h        random.setstate(rndstate)        ( fxh2, _ ) = f(x)        numgrad = (fxh2 - fxh1)/(2*h)        x[ix] = old_val        ### END YOUR CODE        # Compare gradients        reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))        if reldiff > 1e-5:            print "Gradient check failed."            print "First gradient error found at index %s" % str(ix)            print "Your gradient: %f \t Numerical gradient: %f" % (grad[ix], numgrad)            return            it.iternext() # Step to next dimension    print "Gradient check passed!"

neural.py

import numpy as npimport randomfrom q1_softmax import softmaxfrom q2_sigmoid import sigmoid, sigmoid_gradfrom q2_gradcheck import gradcheck_naivedef forward_backward_prop(data, labels, params, dimensions):    """     Forward and backward propagation for a two-layer sigmoidal network         Compute the forward propagation and for the cross entropy cost,    and backward propagation for the gradients for all parameters.    """    ### Unpack network parameters (do not modify)    ofs = 0    Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])    W1 = np.reshape(params[ofs:ofs+ Dx * H], (Dx, H))    ofs += Dx * H    b1 = np.reshape(params[ofs:ofs + H], (1, H))    ofs += H    W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))    ofs += H * Dy    b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))    N, D = data.shape    # data --> N x D    # W1 --> D x H    # b1 --> 1 x H    # W2 --> H x V    # b2 --> 1 x V    # labels --> N x V        ### YOUR CODE HERE: forward propagation    Z1 = np.dot(data, W1) + b1 # N x H    A1 = sigmoid(Z1) # N x H    Z2 = np.dot(A1, W2) + b2 # N x V    A2 = softmax(Z2) # N x V    # cross entropy cost    #first method    #B = np.exp(Z2) # N x V    #b = np.sum(B, axis=1) + 1e-8 # N x 1    #z = np.log(b) # N x 1    #cost = np.sum(z) - np.sum(Z2 * labels)    #cost /= N    #second method    cost = - np.sum(np.log(A2[labels == 1]))/N    ### END YOUR CODE    #cost = b2[0,-1]        ### YOUR CODE HERE: backward propagation            formula:    delta2 = A2 - labels # N x V                                 delta2=A2-y    gradb2 = np.sum(delta2, axis=0) # 1 x V                      gradb2<--delta2     gradb2 /= N # 1 x V    gradW2 = np.dot(A1.T, delta2) # H x V                        gradW2=A1.T*delta2                      gradW2 /= N # H x V    delta1 =  sigmoid_grad(A1) * np.dot(delta2, W2.T)# N x H     delta1=f‘(A1)*delta2*W2.T    gradb1 = np.sum(delta1, axis=0) # 1 x H                      gradb1<--delta1    gradb1 /= N # 1 x H    gradW1 = np.dot(data.T, delta1) # D x H                      gradW1=X.T*delta1    gradW1 /= N # D x H    ### END YOUR CODE            ### Stack gradients (do not modify)    grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),         gradW2.flatten(), gradb2.flatten()))        return cost, graddef sanity_check():    """    Set up fake data and parameters for the neural network, and test using     gradcheck.    """    print "Running sanity check..."    N = 20    dimensions = [10, 5, 10]    data = http://www.mamicode.com/np.random.randn(N, dimensions[0])   # each row will be a datum 20*10"__main__":    sanity_check()

CS224d assignment 1【Neural Network Basics】