首页 > 代码库 > 神经网络快速来一发

神经网络快速来一发

都觉得神经网络很牛逼,那我们从零来一发吧。

先温习一下NN: http://deeplearning.stanford.edu/wiki/index.php/Neural_Networks

再温习一下BP算法:http://deeplearning.stanford.edu/wiki/index.php/Backpropagation_Algorithm

然后代码实现之

 

  1 #include <iostream>  2 #include <cmath>  3 #include <cstdlib>  4 #include <cassert>  5 using namespace std;  6   7 // http://www.cnblogs.com/yeahgis/archive/2012/07/13/2590485.html  8 // 高斯分布的随机数,均值为0,方差为1  9 double gaussrand() 10 { 11     static double V1, V2, S; 12     static int phase = 0; 13     double X; 14       15     if ( phase == 0 ) { 16         do { 17             double U1 = (double)rand() / RAND_MAX; 18             double U2 = (double)rand() / RAND_MAX; 19               20             V1 = 2 * U1 - 1; 21             V2 = 2 * U2 - 1; 22             S = V1 * V1 + V2 * V2; 23         } while(S >= 1 || S == 0); 24           25         X = V1 * sqrt(-2 * log(S) / S); 26     } else 27         X = V2 * sqrt(-2 * log(S) / S); 28           29     phase = 1 - phase; 30   31     return X; 32 } 33  34 // 不做内存释放,搞简单一点 35 typedef shared_ptr<double> DoublePtr; 36 inline DoublePtr newDoubleArray(int size) 37 { 38     double *p = new double[size]; 39     return DoublePtr(p, default_delete<double[]>()); 40 } 41  42 // 简单的矩阵(复制的时候只复制meta,不复制实际数据) 43 struct Matrix 44 { 45     int row, col, size; 46     DoublePtr data; 47  48     Matrix(int _row=1, int _col=1) : row(_row), col(_col) 49     { 50         size = row * col; 51         data =http://www.mamicode.com/ newDoubleArray(size); 52         memset(data.get(), 0, sizeof(double) * size); 53     } 54  55     inline double* operator[](int i) { 56         assert(i < row); 57         return data.get() + i * col; 58     } 59 }; 60  61 // 打印矩阵内容 62 ostream& operator<<(ostream& out, Matrix w) 63 { 64     out << "[ (" << w.row << " x " << w.col << ")" << endl; 65     for(int i = 0;i < w.row;i++) { 66         out << "\t["; 67         for(int j = 0;j < w.col;j++) { 68             if(j > 0) out << ","; 69             out << w[i][j]; 70         } 71         out << "]" << endl; 72     } 73     out << "]"; 74     return out; 75 } 76  77 // 简单的向量(复制的时候只复制meta,不复制实际数据) 78 struct Vector 79 { 80     int size; 81     DoublePtr data; 82  83     Vector(int _size=1) : size(_size) 84     { 85         data =http://www.mamicode.com/ newDoubleArray(size); 86         memset(data.get(), 0, sizeof(double) * size); 87     } 88  89     inline double &operator[](int x) 90     { 91         assert(x < size); 92         return data.get()[x]; 93     } 94 }; 95  96 // 打印向量内容 97 ostream& operator<<(ostream& out, Vector v) 98 { 99     out << "[ (" << v.size << ") ";100     for(int i = 0;i < v.size;i++) {101         if(i > 0) out << ",";102         out << v[i];103     }104     out << "]";105     return out;106 }107 108 Vector operator*(Matrix w, Vector v)109 {110     Vector ret(w.row);111     for(int i = 0;i < w.row;i++) {112         for(int j = 0;j < w.col;j++) {113             ret[i] += w[i][j] * v[j];114         }115     }116     return ret;117 }118 119 // 点乘120 Vector operator*(Vector x, Vector y)121 {122     Vector ret(x.size);123     for(int i = 0;i < x.size;i++) {124         ret[i] = x[i] * y[i];125     }126     return ret;127 }128 129 // w转置,然后和v相乘130 Vector TandMul(Matrix w, Vector v)131 {132     Vector ret(w.col);133     for(int i = 0;i < w.col;i++) {134         for(int j = 0;j < w.row;j++) {135             ret[i] += w[j][i] * v[j];136         }137     }138     return ret;139 }140 141 Vector operator+(Vector x, Vector y)142 {143     Vector ret(x.size);144     for(int i = 0;i < x.size;i++) {145         ret[i] = x[i] + y[i];146     }147     return ret;148 }149 150 Vector operator*(double x, Vector y)151 {152     Vector ret(y.size);153     for(int i = 0;i < y.size;i++) {154         ret[i] = x * y[i];155     }156     return ret;157 }158 159 Vector operator*(Vector x, double y)160 {161     return y * x;162 }163 164 // Cost函数165 struct CostFun166 {167     virtual double calc(Vector x, Vector y)168     {169         return 0;170     }171 172     virtual double operator()(Vector x, Vector y)173     {174         return calc(x,y);175     }176 177     virtual Vector propagateDelta(Vector output, Vector y)178     {179         return Vector(output.size);180     }181 };182 183 // 方差Cost函数184 struct SqrCostFun: CostFun185 {186     virtual double calc(Vector x, Vector y)187     {188         double ret = 0;189         for(int i = 0;i < x.size;i++) {190             double t = x[i] - y[i];191             ret += t * t;192         }193         return ret / 2;194     }195 196     virtual Vector propagateDelta(Vector output, Vector y)197     {198         // -(y - output)199         return -1 * y + output;200     }201 };202 203 // 单例204 SqrCostFun SqrCostFunSingleton;205 206 // 激活函数207 struct Activator208 {209     // forward210     virtual double forward(double v) 211     {212         return v;213     }214 215     virtual double operator()(double v)216     {217         return forward(v);218     }219 220     virtual Vector operator()(Vector v)221     {222         Vector ret(v.size);223         for(int i = 0;i < v.size;i++) {224             ret[i] = forward(v[i]);225         }226         return ret;227     }228 229     // 求导数230     virtual double derive(double v)231     {232         return 1;233     }234 235     virtual Vector derive(Vector v)236     {237         Vector ret(v.size);238         for(int i = 0;i < ret.size;i++) {239             ret[i] = derive(v[i]);240         }241         return ret;242     }243 };244 245 // Sigmoid激活函数246 struct SigmoidActivator : Activator247 {248     virtual double forward(double v)249     {250         return 1 / (1 + exp(-v));251     }252 253     virtual double derive(double v)254     {255         double t = exp(-v);256         return t / ( (1 + t) * (1 + t) );257     }258 };259 260 // 单例261 SigmoidActivator SigmoidActivatorSingleton;262 263 // NN的一层264 // 1. 输入不算一层265 // 2. 层的w矩阵是从前面一层到当前层的w,和NG的定义有些出入266 // 3. 层的b是前面一层到当前层的b,和NG的定义有些出入267 struct Layer268 {269     // 上一层的输出的个数,不包括bias270     int inSize;271     // 当前层的输出272     int outSize;273 274     Activator &activator;275     Matrix w;276     Vector b;277 278     void initWeights(double *p, int size)279     {280         // 采用 (0, 0.01)的正太分布初始化281         for(int i = 0;i < size;i++) {282             p[i] = gaussrand() * 0.01;283         }284     }285 286     Layer(int _inSize=1, int _outSize=1, Activator &_activator= SigmoidActivatorSingleton):287         inSize(_inSize),288         outSize(_outSize),289         w(_outSize, _inSize),290         b(_outSize),291         activator(_activator)292     {293         initWeights(w.data.get(), w.size);294         initWeights(b.data.get(), b.size);295     }296 297     // 最后一次forward计算之后保存的激活值298     Vector a;299     Vector z;300     // in是上一层的输出301     Vector operator()(Vector in)302     {303         z = w * in + b;304         return a = activator(z);305     }306 307     // 最后一次反向传播计算之后保存的delta值308     Vector delta;309     Vector propagateDelta()310     {311         return TandMul(w, delta);312     }313 314     // alpha是学习率315     // prevA是上一层的输出316     void updateParameters(double alpha, Vector prevA)317     {318         b = b + (-alpha) * delta;319         Matrix nw(w.row, w.col);320         for(int i = 0;i < w.row;i++) {321             for(int j = 0;j < w.col;j++) {322                 nw[i][j] = w[i][j] - alpha * prevA[j] * delta[i];323             }324         }325         w = nw;326     }327 };328 329 ostream& operator<<(ostream& out, Layer& layer)330 {331     out << "Layer {" << endl;332     out << "w = " << layer.w << endl;333     out << "b = " << layer.b << endl;334     out << "z = " << layer.z << endl;335     out << "a = " << layer.a << endl;336     out << "delta = " << layer.delta << endl;337     out << "}" << endl;338     return out;339 }340 341 Vector forward(Layer layerList[], int nLayer, Vector input)342 {343     Vector tmp = input;344     for(int i = 0;i < nLayer;i++) {345         tmp = layerList[i](tmp);346     }347     return tmp;348 }349 350 void backward(Layer layerList[], int nLayer, Vector input, Vector y, CostFun& costFun, double alpha)351 {352     // 反向传播delta353     Layer &lastLayer = layerList[nLayer - 1];354     // Sqr cost function为例是: -(y - a) f‘(z)355     lastLayer.delta = costFun.propagateDelta(lastLayer.a, y) * lastLayer.activator.derive(lastLayer.z);356 357     for(int i = nLayer - 2;i >= 0;i--) {358         Layer &layer = layerList[i];359         Layer &nextLayer = layerList[i + 1];360         layer.delta = nextLayer.propagateDelta() * layer.activator.derive(layer.z);361     }362 363     // 更新所有的w和b364     for(int i = 0;i < nLayer;i++) {365         layerList[i].updateParameters(alpha, i == 0 ? input : layerList[i - 1].a);366     }367 }368 369 int main()370 {371     srand(100);372 373     // NN网络结构374     Layer layerList[] = {375         Layer(2, 2), // 隐藏层,input size376         Layer(2, 1), // 输出层,output size377     };378 379     // Cost fun380     CostFun &costFun = SqrCostFunSingleton;381 382     // 不包括输入层在内的层的个数383     int nLayer = sizeof(layerList) / sizeof(layerList[0]);384     int nInput = layerList[0].inSize;385     int nOuptut = layerList[nLayer - 1].outSize;386 387     // 测试xor388     int xs[4][2] = {389         {0,0},390         {0,1},391         {1,0},392         {1,1}393     };394     int ys[4] = {395         0,396         1,397         1,398         0399     };400 401     for(int step = 0;step < 100000;step++) {402         double avgError = 0;403         for(int i = 0;i < 4;i++) {404             Vector x(2);405             for(int j = 0;j < 2;j++) {406                 x[j] = xs[i][j];407             }408 409             Vector y(1);410             y[0] = ys[i];411 412             Vector output = forward(layerList, nLayer, x);413             double error = SqrCostFunSingleton(output, y);414             avgError += error;415 416             backward(layerList, nLayer, x, y, SqrCostFunSingleton, 0.1);417         }418         avgError /= 4;419 420         cout << "after " << step << " steps, error = " << avgError << endl;421     }422 423     return 0;424 }

 

实现大概花了4个多小时,最麻烦的是很难调试,测试了一些例子,找到了几个typo,至少能正确分类xor了。NG提到一个办法用来检验算法的正确性,看来还是需要的,特别是如果后面希望用这个代码为基础去做别的东西的话。下次搞: http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization

 

神经网络快速来一发