Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void MLP(Vector*I, Matrix*[] W, Vector*[] B){
- Vector* H1 = vec_sigmoid(imatVecAddMul(W[0], I, 1, B[0], 1));
- Vector* H2 = vec_sigmoid(imatVecAddMul(W[1], H1, 1, B[1], 1));
- Vector* output = vec_sigmoid(imatVecAddMul(W[2], H2, 1, B[2], 1));
- return maxat(output); //returns the index of the biggest value in output
- }
- mnum cost(int rightLabel, vector* neuralNetOut){
- mnum output = 0;
- for(int i = 0; i < 62; i++){
- output += neuralNetOut[i];
- }
- return output;
- }
- Vector* vec_sigmoid(Vector* v){
- Vector* output = emptyVec(vecd(v));
- for(int i = 0; i < vecd(v); i++) matsat(output,i,sigmoid(matat(v,i)));
- return output;
- }
- mnum sigmoid(mnun x){
- return 1/(1+exp(-x));
- }
- mnum sigmoid_prime(mnun x){
- mnun s = sigmoid(x);
- return s*(1-s);
- }
- mnum vec_sigmoid_prime(Vector* v, mnun x){
- Vector* output = emptyVec(vecd(v));
- for (int i = 0; i < vecd(v); ++i)
- {
- vecsat(output,i,sigmoid_prime(vecat(v,i)));
- }
- return output;
- }
- mnum mse(int[] rightLabels, vector*[] neuralNetOuts, int sizeBatch){ //mean square error
- mnum output = 0;
- for(int i = 0; i < sizeBatch; i++){
- output += cost(rightLabel[i], neuralNetOuts[i]);
- }
- return output/sizeBatch;
- }
- //don't mind this, we'll update it little by little
- void MLP_L(Vector*[] I, int nbTrainingSamples, Vector* rightLabels[], Matrix*[] W, Vector*[] B, mnum learning_rate){
- //init
- //gradients for weights
- Matrix* dc_dw2 = emptyMat(matm(W[2]), matn(W[2]));
- Matrix* dc_dw1 = emptyMat(matm(W[1]), matn(W[1]));
- Matrix* dc_dw0 = emptyMat(matm(W[0]), matn(W[0]));
- //gradients for matrices
- Vector* dc_db2_t = emptyVec(vecd(B[2]));
- Vector* dc_db1_t = emptyVec(vecd(B[1]));
- Vector* dc_db0_t = emptyVec(vecd(B[0]));
- //computations
- for(int t = 0; t < nbTrainingSamples; t++){
- //init
- //local gradients for weights
- Vector* dc_dw2_t = emptyMat(matm(W[2]), matn(W[2]));
- Vector* dc_dw1_t = emptyMat(matm(W[1]), matn(W[1]));
- Vector* dc_dw0_t = emptyMat(matm(W[0]), matn(W[0]));
- //local gradients for biases
- Vector* dc_db2_t = emptyVec(vecd(B[2]));
- Vector* dc_db1_t = emptyVec(vecd(B[1]));
- Vector* dc_db0_t = emptyVec(vecd(B[0]));
- //forward
- Vector* z[3];
- z[0] = imatVecAddMul(W[0], I, 1, B[0], 1); //dim vec(B[0])
- Vector* H1 = vec_sigmoid(z[0]); //dim vec(B[0])
- z[1] = imatVecAddMul(W[1], H1, 1, B[1], 1); //dim vec(B[1])
- Vector* H2 = vec_sigmoid(z[1]); //dim vec(B[1])
- z[2] = imatVecAddMul(W[2], H2, 1, B[2], 1); //dim vec(B[2])
- Vector* output = vec_sigmoid(z[2]); //dim vec(B[2])
- //backprop
- //backprop Output layer
- let dc_do = imatMul((ivmatVMatAdd(rightLabels[t], -1, output)), 2); //dim vec(output)
- let do_dz2 = vec_sigmoid_prime(z[2]);
- let dc_dz2 = ivecVecHadamul(dc_do, do_dz2,1); //62.1
- let dz2_dw2 = H2; //189x1
- let dz2_db2 = 1; //mnum
- let dz2_dh2 = W[2]; //62x189
- dc_dw2_t = imatMatMul(dc_dz2, imatTranspose(dz2_dw2), 1); //62x189 = 62x1 * 1x189
- dc_db2_t = vecMul(dc_dz2, dz2_db2); // 62x1 = 62x1 * 1
- //backprop H2 layer
- let dh2_dz1 = vec_sigmoid_prime(z[1]); // 189x1
- let dc_dz1 = ivecVecHadamul(imatTranspose(imatMatMul(imatTranspose(dc_dz2), dz2_dh2), 1), dh2_dz1, 1); // 189x1 = imatTranspose(imatTranspose(62x1) * 62x189) * 189x1 = 189x1*189x1
- let dz1_dw1 = H1; //378x1
- let dz1_db1 = 1; //mnum
- let dz1_dh1 = W[1]; // 189x378
- dc_dw1_t = imatMatMul(dc_dz1, imatTranspose(dz1_dw1),1); // 189x378 = 189x1 * imatTranspose(378x1)
- dc_db1_t = vecMul(dc_dz1, dz1_db1); //189x1 = 189x1 * 1
- //backprop H1 layer
- let dh1_dz0 = vec_sigmoid_prime(z[0]); //378x1
- let dc_dz0 = ivecVecHadamul(imatTranspose(imatMatMul(imatTranspose(dc_dz1), dz1_dh1, 1)), dh1_dz0, 1); // 378x1 = imatTranspose(imatTranspose(189x1) * 189x378) * 378x1 = 378x1 * 378x1
- let dz0_dw0 = I; //784x1
- let dz0_db0 = 1; //mnum
- dc_dw0_t = imatMatMul(dc_dz0, imatTranspose(dz0_dw0), 1); //378x784 = 378x1 * imatTranspose(784x1)
- dc_db0_t = vecMul(dc_dz0, dz0_db0); // 378x1 = 378x1 * 1
- //updating grads
- //precomputation
- mnum lambda = 1/nbTrainingSamples;
- //updating weights
- dc_dw2 = ivmatVMatAdd(matMul(dc_dw2_t, lambda), 1, dc_dw2);
- dc_dw1 = ivmatVMatAdd(matMul(dc_dw1_t, lambda), 1, dc_dw1);
- dc_dw0 = ivmatVMatAdd(matMul(dc_dw0_t, lambda), 1, dc_dw0);
- //updating biases
- dc_db2 = ivmatVMatAdd(matMul(dc_db2_t, lambda), 1, dc_db2);
- dc_db1 = ivmatVMatAdd(matMul(dc_db1_t, lambda), 1, dc_db1);
- dc_db0 = ivmatVMatAdd(matMul(dc_db0_t, lambda), 1, dc_db0);
- }
- //learning
- //updating weights
- W[2] = ivmatVMatAdd(ivmatVMatAdd(matMul(dc_dw2, -learning_rate), 1, W[2]);
- W[1] = ivmatVMatAdd(ivmatVMatAdd(matMul(dc_dw1, -learning_rate), 1, W[1]);
- W[0] = ivmatVMatAdd(ivmatVMatAdd(matMul(dc_dw0, -learning_rate), 1, W[0]);
- //updating biases
- B[2] = ivmatVMatAdd(ivmatVMatAdd(matMul(dc_db2, -learning_rate), 1, B[2]);
- B[1] = ivmatVMatAdd(ivmatVMatAdd(matMul(dc_db1, -learning_rate), 1, B[1]);
- B[0] = ivmatVMatAdd(ivmatVMatAdd(matMul(dc_db0, -learning_rate), 1, B[0]);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement