首页 > 代码库 > 神经网络快速来一发
神经网络快速来一发
都觉得神经网络很牛逼,那我们从零来一发吧。
先温习一下NN: http://deeplearning.stanford.edu/wiki/index.php/Neural_Networks
再温习一下BP算法:http://deeplearning.stanford.edu/wiki/index.php/Backpropagation_Algorithm
然后代码实现之
1 #include <iostream> 2 #include <cmath> 3 #include <cstdlib> 4 #include <cassert> 5 using namespace std; 6 7 // http://www.cnblogs.com/yeahgis/archive/2012/07/13/2590485.html 8 // 高斯分布的随机数,均值为0,方差为1 9 double gaussrand() 10 { 11 static double V1, V2, S; 12 static int phase = 0; 13 double X; 14 15 if ( phase == 0 ) { 16 do { 17 double U1 = (double)rand() / RAND_MAX; 18 double U2 = (double)rand() / RAND_MAX; 19 20 V1 = 2 * U1 - 1; 21 V2 = 2 * U2 - 1; 22 S = V1 * V1 + V2 * V2; 23 } while(S >= 1 || S == 0); 24 25 X = V1 * sqrt(-2 * log(S) / S); 26 } else 27 X = V2 * sqrt(-2 * log(S) / S); 28 29 phase = 1 - phase; 30 31 return X; 32 } 33 34 // 不做内存释放,搞简单一点 35 typedef shared_ptr<double> DoublePtr; 36 inline DoublePtr newDoubleArray(int size) 37 { 38 double *p = new double[size]; 39 return DoublePtr(p, default_delete<double[]>()); 40 } 41 42 // 简单的矩阵(复制的时候只复制meta,不复制实际数据) 43 struct Matrix 44 { 45 int row, col, size; 46 DoublePtr data; 47 48 Matrix(int _row=1, int _col=1) : row(_row), col(_col) 49 { 50 size = row * col; 51 data =http://www.mamicode.com/ newDoubleArray(size); 52 memset(data.get(), 0, sizeof(double) * size); 53 } 54 55 inline double* operator[](int i) { 56 assert(i < row); 57 return data.get() + i * col; 58 } 59 }; 60 61 // 打印矩阵内容 62 ostream& operator<<(ostream& out, Matrix w) 63 { 64 out << "[ (" << w.row << " x " << w.col << ")" << endl; 65 for(int i = 0;i < w.row;i++) { 66 out << "\t["; 67 for(int j = 0;j < w.col;j++) { 68 if(j > 0) out << ","; 69 out << w[i][j]; 70 } 71 out << "]" << endl; 72 } 73 out << "]"; 74 return out; 75 } 76 77 // 简单的向量(复制的时候只复制meta,不复制实际数据) 78 struct Vector 79 { 80 int size; 81 DoublePtr data; 82 83 Vector(int _size=1) : size(_size) 84 { 85 data =http://www.mamicode.com/ newDoubleArray(size); 86 memset(data.get(), 0, sizeof(double) * size); 87 } 88 89 inline double &operator[](int x) 90 { 91 assert(x < size); 92 return data.get()[x]; 93 } 94 }; 95 96 // 打印向量内容 97 ostream& operator<<(ostream& out, Vector v) 98 { 99 out << "[ (" << v.size << ") ";100 for(int i = 0;i < v.size;i++) {101 if(i > 0) out << ",";102 out << v[i];103 }104 out << "]";105 return out;106 }107 108 Vector operator*(Matrix w, Vector v)109 {110 Vector ret(w.row);111 for(int i = 0;i < w.row;i++) {112 for(int j = 0;j < w.col;j++) {113 ret[i] += w[i][j] * v[j];114 }115 }116 return ret;117 }118 119 // 点乘120 Vector operator*(Vector x, Vector y)121 {122 Vector ret(x.size);123 for(int i = 0;i < x.size;i++) {124 ret[i] = x[i] * y[i];125 }126 return ret;127 }128 129 // w转置,然后和v相乘130 Vector TandMul(Matrix w, Vector v)131 {132 Vector ret(w.col);133 for(int i = 0;i < w.col;i++) {134 for(int j = 0;j < w.row;j++) {135 ret[i] += w[j][i] * v[j];136 }137 }138 return ret;139 }140 141 Vector operator+(Vector x, Vector y)142 {143 Vector ret(x.size);144 for(int i = 0;i < x.size;i++) {145 ret[i] = x[i] + y[i];146 }147 return ret;148 }149 150 Vector operator*(double x, Vector y)151 {152 Vector ret(y.size);153 for(int i = 0;i < y.size;i++) {154 ret[i] = x * y[i];155 }156 return ret;157 }158 159 Vector operator*(Vector x, double y)160 {161 return y * x;162 }163 164 // Cost函数165 struct CostFun166 {167 virtual double calc(Vector x, Vector y)168 {169 return 0;170 }171 172 virtual double operator()(Vector x, Vector y)173 {174 return calc(x,y);175 }176 177 virtual Vector propagateDelta(Vector output, Vector y)178 {179 return Vector(output.size);180 }181 };182 183 // 方差Cost函数184 struct SqrCostFun: CostFun185 {186 virtual double calc(Vector x, Vector y)187 {188 double ret = 0;189 for(int i = 0;i < x.size;i++) {190 double t = x[i] - y[i];191 ret += t * t;192 }193 return ret / 2;194 }195 196 virtual Vector propagateDelta(Vector output, Vector y)197 {198 // -(y - output)199 return -1 * y + output;200 }201 };202 203 // 单例204 SqrCostFun SqrCostFunSingleton;205 206 // 激活函数207 struct Activator208 {209 // forward210 virtual double forward(double v) 211 {212 return v;213 }214 215 virtual double operator()(double v)216 {217 return forward(v);218 }219 220 virtual Vector operator()(Vector v)221 {222 Vector ret(v.size);223 for(int i = 0;i < v.size;i++) {224 ret[i] = forward(v[i]);225 }226 return ret;227 }228 229 // 求导数230 virtual double derive(double v)231 {232 return 1;233 }234 235 virtual Vector derive(Vector v)236 {237 Vector ret(v.size);238 for(int i = 0;i < ret.size;i++) {239 ret[i] = derive(v[i]);240 }241 return ret;242 }243 };244 245 // Sigmoid激活函数246 struct SigmoidActivator : Activator247 {248 virtual double forward(double v)249 {250 return 1 / (1 + exp(-v));251 }252 253 virtual double derive(double v)254 {255 double t = exp(-v);256 return t / ( (1 + t) * (1 + t) );257 }258 };259 260 // 单例261 SigmoidActivator SigmoidActivatorSingleton;262 263 // NN的一层264 // 1. 输入不算一层265 // 2. 层的w矩阵是从前面一层到当前层的w,和NG的定义有些出入266 // 3. 层的b是前面一层到当前层的b,和NG的定义有些出入267 struct Layer268 {269 // 上一层的输出的个数,不包括bias270 int inSize;271 // 当前层的输出272 int outSize;273 274 Activator &activator;275 Matrix w;276 Vector b;277 278 void initWeights(double *p, int size)279 {280 // 采用 (0, 0.01)的正太分布初始化281 for(int i = 0;i < size;i++) {282 p[i] = gaussrand() * 0.01;283 }284 }285 286 Layer(int _inSize=1, int _outSize=1, Activator &_activator= SigmoidActivatorSingleton):287 inSize(_inSize),288 outSize(_outSize),289 w(_outSize, _inSize),290 b(_outSize),291 activator(_activator)292 {293 initWeights(w.data.get(), w.size);294 initWeights(b.data.get(), b.size);295 }296 297 // 最后一次forward计算之后保存的激活值298 Vector a;299 Vector z;300 // in是上一层的输出301 Vector operator()(Vector in)302 {303 z = w * in + b;304 return a = activator(z);305 }306 307 // 最后一次反向传播计算之后保存的delta值308 Vector delta;309 Vector propagateDelta()310 {311 return TandMul(w, delta);312 }313 314 // alpha是学习率315 // prevA是上一层的输出316 void updateParameters(double alpha, Vector prevA)317 {318 b = b + (-alpha) * delta;319 Matrix nw(w.row, w.col);320 for(int i = 0;i < w.row;i++) {321 for(int j = 0;j < w.col;j++) {322 nw[i][j] = w[i][j] - alpha * prevA[j] * delta[i];323 }324 }325 w = nw;326 }327 };328 329 ostream& operator<<(ostream& out, Layer& layer)330 {331 out << "Layer {" << endl;332 out << "w = " << layer.w << endl;333 out << "b = " << layer.b << endl;334 out << "z = " << layer.z << endl;335 out << "a = " << layer.a << endl;336 out << "delta = " << layer.delta << endl;337 out << "}" << endl;338 return out;339 }340 341 Vector forward(Layer layerList[], int nLayer, Vector input)342 {343 Vector tmp = input;344 for(int i = 0;i < nLayer;i++) {345 tmp = layerList[i](tmp);346 }347 return tmp;348 }349 350 void backward(Layer layerList[], int nLayer, Vector input, Vector y, CostFun& costFun, double alpha)351 {352 // 反向传播delta353 Layer &lastLayer = layerList[nLayer - 1];354 // Sqr cost function为例是: -(y - a) f‘(z)355 lastLayer.delta = costFun.propagateDelta(lastLayer.a, y) * lastLayer.activator.derive(lastLayer.z);356 357 for(int i = nLayer - 2;i >= 0;i--) {358 Layer &layer = layerList[i];359 Layer &nextLayer = layerList[i + 1];360 layer.delta = nextLayer.propagateDelta() * layer.activator.derive(layer.z);361 }362 363 // 更新所有的w和b364 for(int i = 0;i < nLayer;i++) {365 layerList[i].updateParameters(alpha, i == 0 ? input : layerList[i - 1].a);366 }367 }368 369 int main()370 {371 srand(100);372 373 // NN网络结构374 Layer layerList[] = {375 Layer(2, 2), // 隐藏层,input size376 Layer(2, 1), // 输出层,output size377 };378 379 // Cost fun380 CostFun &costFun = SqrCostFunSingleton;381 382 // 不包括输入层在内的层的个数383 int nLayer = sizeof(layerList) / sizeof(layerList[0]);384 int nInput = layerList[0].inSize;385 int nOuptut = layerList[nLayer - 1].outSize;386 387 // 测试xor388 int xs[4][2] = {389 {0,0},390 {0,1},391 {1,0},392 {1,1}393 };394 int ys[4] = {395 0,396 1,397 1,398 0399 };400 401 for(int step = 0;step < 100000;step++) {402 double avgError = 0;403 for(int i = 0;i < 4;i++) {404 Vector x(2);405 for(int j = 0;j < 2;j++) {406 x[j] = xs[i][j];407 }408 409 Vector y(1);410 y[0] = ys[i];411 412 Vector output = forward(layerList, nLayer, x);413 double error = SqrCostFunSingleton(output, y);414 avgError += error;415 416 backward(layerList, nLayer, x, y, SqrCostFunSingleton, 0.1);417 }418 avgError /= 4;419 420 cout << "after " << step << " steps, error = " << avgError << endl;421 }422 423 return 0;424 }
实现大概花了4个多小时,最麻烦的是很难调试,测试了一些例子,找到了几个typo,至少能正确分类xor了。NG提到一个办法用来检验算法的正确性,看来还是需要的,特别是如果后面希望用这个代码为基础去做别的东西的话。下次搞: http://deeplearning.stanford.edu/wiki/index.php/Gradient_checking_and_advanced_optimization
神经网络快速来一发
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。