Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <cmath>
- using namespace std;
- void print2DMatrix(vector<vector<double>> Z) {
- for (size_t h = 0; h < Z.size(); ++h) {
- for (size_t w = 0; w < Z[0].size(); ++w) {
- cout << Z[h][w] << ' ';
- }
- }
- cout << endl;
- }
- void print3DMatrix(vector<vector<vector<double>>> Z) {
- for (size_t c = 0; c < Z[0][0].size(); ++c) {
- for (size_t h = 0; h < Z.size(); ++h) {
- for (size_t w = 0; w < Z[0].size(); ++w) {
- cout << Z[h][w][c] << ' ';
- }
- }
- }
- cout << endl;
- }
- void print4DMatrix(vector<vector<vector<vector<double>>>> W) {
- for (size_t c_new = 0; c_new < W[0][0][0].size(); ++c_new) {
- for (size_t c_prev = 0; c_prev < W[0][0].size(); ++c_prev) {
- for (size_t h = 0; h < W.size(); ++h) {
- for (size_t w = 0; w < W[0].size(); ++w) {
- cout << W[h][w][c_prev][c_new] << ' ';
- }
- }
- }
- }
- cout << endl;
- }
- struct layerDesc {
- string type;
- double alpha;
- int S;
- int H;
- int K;
- int P;
- vector<double> biases;
- vector<double> db;
- vector<vector<vector<vector<double>>>> W;
- vector<vector<vector<vector<double>>>> dW;
- vector<vector<vector<double>>> A;
- vector<vector<vector<double>>> dZ;
- };
- int N, D, L;
- vector<vector<vector<double>>> input;
- vector<layerDesc *> network;
- vector<vector<vector<double>>> bias(vector<vector<vector<double>>> &A, vector<double> &biases) {
- vector<vector<vector<double>>> res(A.size(),
- vector<vector<double>>(A[0].size(),
- vector<double>(A[0][0].size())));
- for (size_t h = 0; h < A.size(); ++h) {
- for (size_t w = 0; w < A[0].size(); ++w) {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- res[h][w][c] = A[h][w][c] + biases[c];
- }
- }
- }
- return res;
- }
- void biasBack(int layerNum) {
- auto Aprev = network[layerNum - 1]->A;
- auto dZ = network[layerNum]->dZ;
- auto biases = network[layerNum]->biases;
- int h_new = dZ.size();
- int w_new = dZ[0].size();
- int c_new = dZ[0][0].size();
- vector<double> db(biases.size(), 0.);
- for (int h = 0; h < h_new; ++h) {
- for (int w = 0; w < w_new; ++w) {
- for (int c = 0; c < c_new; ++c) {
- db[c] += dZ[h][w][c];
- }
- }
- }
- network[layerNum - 1]->dZ = dZ; /// link
- network[layerNum]->db = db;
- }
- double matrixMax(vector<vector<vector<double>>> &A,
- int vert_start, int vert_end, int horiz_start, int horiz_end, int c) {
- double ans = A[vert_start][horiz_start][c];
- for (size_t h = vert_start; h < vert_end; ++h) {
- for (size_t w = horiz_start; w < horiz_end; ++w) {
- ans = A[h][w][c] > ans ? A[h][w][c] : ans;
- }
- }
- return ans;
- }
- vector<vector<vector<double>>> pool(vector<vector<vector<double>>> &A, int f) {
- int h_prev = A.size();
- int w_prev = A[0].size();
- int c_prev = A[0][0].size();
- int h_new = int(1 + (h_prev - f) / f);
- int w_new = int(1 + (w_prev - f) / f);
- int c_new = c_prev;
- vector<vector<vector<double>>> res(h_new,
- vector<vector<double>>(w_new,vector<double>(c_new, 0.)));
- for (int h = 0; h < h_new; ++h) {
- int vert_start = h * f;
- int vert_end = vert_start + f;
- for (int w = 0; w < w_new; ++w) {
- int horiz_start = w * f;
- int horiz_end = horiz_start + f;
- for (size_t c = 0; c < c_new; ++c) {
- res[h][w][c] = matrixMax(A, vert_start, vert_end, horiz_start, horiz_end, c);
- }
- }
- }
- return res;
- }
- vector<vector<double>> getPoolMaxMask(vector<vector<vector<double>>> &X,
- int vert_start, int horiz_start, int f, int cFix) {
- vector<vector<double>> res(f, vector<double>(f, 0.));
- double max = X[vert_start][horiz_start][cFix];
- for (size_t h = vert_start; h < vert_start + f; ++h) {
- for (size_t w = horiz_start; w < horiz_start + f; ++w) {
- max = X[h][w][cFix] > max ? X[h][w][cFix] : max;
- }
- }
- for (size_t h = 0; h < f; ++h) {
- for (size_t w = 0; w < f; ++w) {
- res[h][w] = X[vert_start + h][horiz_start + w][cFix] == max ? 1. : 0.;
- }
- }
- return res;
- }
- void mulMaskMatrix(vector<vector<vector<double>>> &dAprev,
- double dZhwc, vector<vector<double>> &mask,
- int vert_start, int horiz_start, int f, int cFix) {
- for (size_t h = vert_start; h < vert_start + f; ++h) {
- for (size_t w = horiz_start; w < horiz_start + f; ++w) {
- dAprev[h][w][cFix] += dZhwc * mask[h - vert_start][w - horiz_start];
- }
- }
- }
- void poolBack(int layerNum) {
- auto Aprev = network[layerNum - 1]->A;
- auto dZ = network[layerNum]->dZ;
- int f = network[layerNum]->S;
- int h_prev = Aprev.size();
- int w_prev = Aprev[0].size();
- int c_prev = Aprev[0][0].size();
- int h_new = dZ.size();
- int w_new = dZ[0].size();
- int c_new = dZ[0][0].size();
- vector<vector<vector<double>>> dAprev(h_prev,
- vector<vector<double>>(w_prev, vector<double>(c_prev, 0.)));
- for (int h = 0; h < h_new; ++h) {
- for (int w = 0; w < w_new; ++w) {
- for (size_t c = 0; c < c_new; ++c) {
- int vert_start = h * f;
- int horiz_start = w * f;
- auto mask = getPoolMaxMask(Aprev, vert_start, horiz_start, f, c);
- mulMaskMatrix(dAprev, dZ[h][w][c], mask, vert_start, horiz_start, f, c);
- }
- }
- }
- network[layerNum - 1]->dZ = dAprev;
- }
- vector<vector<vector<double>>> relu(vector<vector<vector<double>>> &A, double alpha) {
- vector<vector<vector<double>>> res(A.size(),
- vector<vector<double>>(A[0].size(),
- vector<double>(A[0][0].size())));
- for (size_t h = 0; h < A.size(); ++h) {
- for (size_t w = 0; w < A[0].size(); ++w) {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- res[h][w][c] = A[h][w][c] >= 0 ? A[h][w][c] : alpha * A[h][w][c];
- }
- }
- }
- return res;
- }
- void reluBack(int layerNum) {
- auto A = network[layerNum - 1]->A;
- auto dZ = network[layerNum]->dZ;
- double alpha = network[layerNum]->alpha;
- vector<vector<vector<double>>> dA(dZ.size(),
- vector<vector<double>>(dZ[0].size(), vector<double>(dZ[0][0].size())));
- for (size_t h = 0; h < dZ.size(); ++h) {
- for (size_t w = 0; w < dZ[0].size(); ++w) {
- for (size_t c = 0; c < dZ[0][0].size(); ++c) {
- dA[h][w][c] = A[h][w][c] < 0 ? alpha * dZ[h][w][c] : dZ[h][w][c];
- }
- }
- }
- network[layerNum - 1]->dZ = dA;
- }
- vector<vector<vector<double>>> fillPad(vector<vector<vector<double>>> &A, int pad, const string& type) {
- vector<vector<vector<double>>> res(A.size() + 2 * pad,
- vector<vector<double>>(A[0].size() + 2 * pad,
- vector<double>(A[0][0].size())));
- // A в центре
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- for (size_t i = 0; i < A.size(); ++i) {
- for (size_t j = 0; j < A[0].size(); ++j) {
- res[i + pad][j + pad][c] = A[i][j][c];
- }
- }
- }
- if (type == "cnvm") {
- for (size_t c = 0; c < res[0][0].size(); ++c) {
- for (size_t i = 0; i < res.size(); ++i) {
- for (size_t j = 0; j < res[0].size(); ++j) {
- size_t ii = i < pad ? pad - i : i < res.size() - pad ? i - pad :
- A.size() - 2 - (pad - (res.size() - i));
- size_t jj = j < pad ? pad - j : j < res[0].size() - pad ? j - pad :
- A[0].size() - 2 - (pad - (res[0].size() - j));
- res[i][j][c] = A[ii][jj][c];
- }
- }
- }
- } else if (type == "cnve") {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- // сверху снизу без углов
- for (size_t i = 0; i < pad; ++i) {
- for (size_t j = pad; j < res.size() - pad; ++j) {
- res[i][j][c] = A[0][j - pad][c];
- res[res.size() - 1 - i][j][c] = A[A.size() - 1][j - pad][c];
- }
- }
- // слева справа
- for (size_t i = 0; i < res.size(); ++i) {
- for (size_t j = 0; j < pad; ++j) {
- res[i][j][c] = res[i][pad][c];
- res[i][res.size() - 1 - j][c] = res[i][res.size() - 1 - pad][c];
- }
- }
- }
- } else if (type == "cnvc") {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- // сверху снизу без углов
- for (size_t i = 0; i < pad; ++i) {
- for (size_t j = pad; j < res.size() - pad; ++j) {
- res[i][j][c] = A[A.size() - pad + i][j - pad][c];
- res[res.size() - 1 - i][j][c] = A[pad - 1 - i][j - pad][c];
- }
- }
- // слева справа
- for (size_t i = 0; i < res.size(); ++i) {
- for (size_t j = 0; j < pad; ++j) {
- res[i][j][c] = res[i][res.size() - 2 * pad + j][c];
- res[i][res.size() - 1 - j][c] = res[i][2 * pad - 1 - j][c];
- }
- }
- }
- }
- return res;
- }
- vector<vector<vector<double>>> clearPad(vector<vector<vector<double>>> &A, int pad, const string& type) {
- vector<vector<vector<double>>> res(A.size() - 2 * pad,
- vector<vector<double>>(A[0].size() - 2 * pad,
- vector<double>(A[0][0].size())));
- if (type == "cnvm") {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- for (size_t i = 0; i < A.size(); ++i) {
- for (size_t j = 0; j < A[0].size(); ++j) {
- size_t ii = i < pad ? pad - i : i < A.size() - pad ? i - pad :
- res.size() - 2 - (pad - (A.size() - i));
- size_t jj = j < pad ? pad - j : j < A[0].size() - pad ? j - pad :
- res[0].size() - 2 - (pad - (A[0].size() - j));
- res[ii][jj][c] += A[i][j][c];
- }
- }
- }
- } else if (type == "cnve") {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- for (size_t i = 0; i < A.size(); ++i) {
- for (size_t j = 0; j < A[0].size(); ++j) {
- size_t ii = i < pad ? 0 : i < A.size() - pad ? i - pad : res.size() - 1;
- size_t jj = j < pad ? 0 : j < A[0].size() - pad ? j - pad : res[0].size() - 1;
- res[ii][jj][c] += A[i][j][c];
- }
- }
- }
- } else if (type == "cnvc") {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- for (size_t i = 0; i < A.size(); ++i) {
- for (size_t j = 0; j < A[0].size(); ++j) {
- size_t ii = i < pad ? res.size() - pad + i : i < A.size() - pad ? i - pad :
- (pad - (A.size() - i));
- size_t jj = j < pad ? res[0].size() - pad + j : j < A[0].size() - pad ? j - pad :
- (pad - (A[0].size() - j));
- res[ii][jj][c] += A[i][j][c];
- }
- }
- }
- }
- return res;
- }
- double convSingleStep(vector<vector<vector<double>>> &A,
- vector<vector<vector<vector<double>>>> &W,
- int vert_start, int horiz_start, int fixC) {
- double ans = 0.;
- for (size_t h = 0; h < W.size(); ++h) {
- for (size_t w = 0; w < W[0].size(); ++w) {
- for (size_t c = 0; c < A[0][0].size(); ++c) {
- ans += A[vert_start + h][horiz_start + w][c] * W[h][w][c][fixC];
- }
- }
- }
- return ans;
- }
- vector<vector<vector<double>>> conv(vector<vector<vector<double>>> &A,
- vector<vector<vector<vector<double>>>> &W, int S, int P, const string& type) {
- int f = W.size();
- int h_prev = A.size();
- int w_prev = A[0].size();
- int c_prev = A[0][0].size();
- int h_new = int((h_prev - f + 2 * P) / S) + 1;
- int w_new = int((w_prev - f + 2 * P) / S) + 1;
- int c_new = W[0][0][0].size();
- vector<vector<vector<double>>> res(h_new,
- vector<vector<double>>(w_new, vector<double>(c_new, 0.)));
- auto Apad = fillPad(A, P, type);
- // for (size_t i = 0; i < A.size(); ++i) {
- // for (size_t j = 0; j < A.size(); ++j) {
- // cout << A[i][j][0] << ' ';
- // }
- // cout << '\n';
- // }
- // cout << '\n' << '\n';
- //
- // for (size_t i = 0; i < Apad.size(); ++i) {
- // for (size_t j = 0; j < Apad.size(); ++j) {
- // cout << Apad[i][j][0] << ' ';
- // }
- // cout << '\n';
- // }
- for (int h = 0; h < h_new; ++h) {
- int vert_start = h * S;
- for (int w = 0; w < w_new; ++w) {
- int horiz_start = w * S;
- for (int c = 0; c < c_new; ++c) {
- res[h][w][c] = convSingleStep(Apad, W,
- vert_start, horiz_start, c);
- }
- }
- }
- return res;
- }
- void convBackStep(vector<vector<vector<double>>> &AprevPad,
- vector<vector<vector<double>>> &dAprevPad,
- vector<vector<vector<vector<double>>>> &W,
- vector<vector<vector<vector<double>>>> &dW,
- double dZhwc, int vert_start, int horiz_start, int fixC) {
- for (size_t h = 0; h < W.size(); ++h) {
- for (size_t w = 0; w < W[0].size(); ++w) {
- for (size_t c = 0; c < W[0][0].size(); ++c) {
- dAprevPad[vert_start + h][horiz_start + w][c] += W[h][w][c][fixC] * dZhwc;
- dW[h][w][c][fixC] += AprevPad[vert_start + h][horiz_start + w][c] * dZhwc;
- }
- }
- }
- }
- void convBack(int layerNum, const string& type) {
- auto Aprev = network[layerNum - 1]->A;
- auto dZ = network[layerNum]->dZ;
- auto W = network[layerNum]->W;
- int S = network[layerNum]->S;
- int P = network[layerNum]->P;
- int h_prev = Aprev.size();
- int w_prev = Aprev[0].size();
- int c_prev = Aprev[0][0].size();
- int f = W.size();
- int h_new = dZ.size();
- int w_new = dZ[0].size();
- int c_new = dZ[0][0].size();
- vector<vector<vector<double>>> dAprev(h_prev,
- vector<vector<double>>(w_prev, vector<double>(c_prev, 0.)));
- vector<vector<vector<vector<double>>>> dW(f, vector<vector<vector<double>>>(f,
- vector<vector<double>>(c_prev, vector<double>(c_new, 0.))));
- auto AprevPad = fillPad(Aprev, P, type);
- auto dAprevPad = fillPad(dAprev, P, type);
- for (int h = 0; h < h_new; ++h) {
- for (int w = 0; w < w_new; ++w) {
- for (int c = 0; c < c_new; ++c) {
- int vert_start = h * S;
- int horiz_start = w * S;
- convBackStep(AprevPad, dAprevPad, W, dW, dZ[h][w][c], vert_start, horiz_start, c);
- }
- }
- }
- network[layerNum - 1]->dZ = clearPad(dAprevPad, P, type);
- network[layerNum]->dW = dW;
- // for (size_t i = 0; i < AprevPad.size(); ++i) {
- // for (size_t j = 0; j < AprevPad.size(); ++j) {
- // cout << dAprevPad[i][j][0] << ' ';
- // }
- // cout << '\n';
- // }
- // cout << '\n' << '\n';
- }
- layerDesc *forward(string &s, int layerNum) {
- auto layer = new layerDesc();
- layer->type = s;
- if (s == "relu") {
- int alpha;
- cin >> alpha;
- layer->alpha = 1. / (double) alpha;
- layer->A = relu(network[layerNum - 1]->A, layer->alpha);
- } else if (s == "pool") {
- cin >> layer->S;
- layer->A = pool(network[layerNum - 1]->A, layer->S);
- } else if (s == "bias") {
- int deep = network[layerNum - 1]->A[0][0].size();
- // int deep = D;
- layer->biases.resize(deep);
- int x;
- for (size_t i = 0; i < deep; ++i) {
- cin >> x;
- layer->biases[i] = (double) x;
- }
- layer->A = bias(network[layerNum - 1]->A, layer->biases);
- } else if (s == "cnvm" || s == "cnve" || s == "cnvc") {
- int deep = network[layerNum - 1]->A[0][0].size();
- // int deep = D;
- cin >> layer->H >> layer->K >> layer->S >> layer->P;
- layer->W.assign(layer->K, vector<vector<vector<double>>>(layer->K,
- vector<vector<double>>(deep, vector<double>(layer->H))));
- int x;
- for (size_t h = 0; h < layer->H; ++h) {
- for (size_t d = 0; d < deep; ++d) {
- for (size_t kh = 0; kh < layer->K; ++kh) {
- for (size_t kw = 0; kw < layer->K; ++kw) {
- cin >> x;
- layer->W[kh][kw][d][h] = (double) x;
- }
- }
- }
- }
- layer->A = conv(network[layerNum - 1]->A, layer->W, layer->S, layer->P, s);
- }
- return layer;
- }
- void backward(int layerNum) {
- auto layer = network[layerNum];
- if (layer->type == "relu") {
- reluBack(layerNum);
- } else if (layer->type == "pool") {
- poolBack(layerNum);
- } else if (layer->type == "bias") {
- biasBack(layerNum);
- } else if (layer->type == "cnvm" || layer->type == "cnve" || layer->type == "cnvc") {
- convBack(layerNum, layer->type);
- }
- }
- void readOutDerivative() {
- auto Z = network[L]->A;
- network[L]->dZ.assign(Z.size(),
- vector<vector<double>>(Z[0].size(), vector<double>(Z[0][0].size())));
- int x;
- for (size_t c = 0; c < Z[0][0].size(); ++c) {
- for (size_t h = 0; h < Z.size(); ++h) {
- for (size_t w = 0; w < Z[0].size(); ++w) {
- cin >> x;
- network[L]->dZ[h][w][c] = (double) x;
- }
- }
- }
- }
- void printDerivatives(int layerNum) {
- auto layer = network[layerNum];
- if (layer->type == "cnvm" || layer->type == "cnve" || layer->type == "cnvc") {
- // cout << "cnvm" << '\n';
- print4DMatrix(layer->dW);
- } else if (layer->type == "bias") {
- // cout << "bias" << '\n';
- for (double i : layer->db) {
- cout << i << ' ';
- }
- cout << endl;
- }
- }
- void printing(int layerNum) {
- auto layer = network[layerNum];
- auto A = network[layerNum]->A;
- // cout << "A Layer: " << layerNum << endl;
- // cout << " ";
- // for (size_t c = 0; c < A[0][0].size(); ++c) {
- // for (size_t h = 0; h < A.size(); ++h) {
- // for (size_t w = 0; w < A[0].size(); ++w) {
- // cout << A[h][w][c] << ' ';
- // }
- // }
- // }
- // cout << endl;
- // A = network[layerNum]->dZ;
- // cout << "dZ Layer: " << layerNum << endl;
- // for (size_t c = 0; c < A[0][0].size(); ++c) {
- // for (size_t h = 0; h < A.size(); ++h) {
- // for (size_t w = 0; w < A[0].size(); ++w) {
- // cout << A[h][w][c] << ' ';
- // }
- // }
- // }
- // cout << endl;
- }
- int main() {
- //ios_base::sync_with_stdio(false);
- cin >> N >> D;
- input.assign(N, vector<vector<double>>(N, vector<double>(D)));
- int x;
- for (size_t c = 0; c < D; ++c) {
- for (size_t h = 0; h < N; ++h) {
- for (size_t w = 0; w < N; ++w) {
- cin >> x;
- input[h][w][c] = (double) x;
- }
- }
- }
- cin >> L;
- network.resize(L + 1);
- network[0] = new layerDesc();
- network[0]->type = "start";
- network[0]->A = input;
- for (size_t i = 1; i <= L; ++i) {
- string s;
- cin >> s;
- network[i] = forward(s, i);
- }
- readOutDerivative();
- print3DMatrix(network[L]->A);
- for (size_t i = L; i > 0; --i) {
- backward(i);
- }
- print3DMatrix(network[0]->dZ);
- for (size_t i = 1; i <= L; ++i) {
- printDerivatives(i);
- }
- cout << '\n' << '\n';
- for (size_t i = 0; i <= L; ++i) {
- printing(i);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement