Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) {
- //Serial.println("Enter karatsuba");
- uint16_t d01[KARATSUBA_N / 2 - 1];
- uint16_t d0123[KARATSUBA_N / 2 - 1];
- uint16_t d23[KARATSUBA_N / 2 - 1];
- uint16_t result_d01[KARATSUBA_N - 1];
- // Inizializzazione delle variabili
- memset(result_d01, 0, sizeof(result_d01));
- memset(d01, 0, sizeof(d01));
- memset(d0123, 0, sizeof(d0123));
- memset(d23, 0, sizeof(d23));
- memset(result_final, 0, (2 * KARATSUBA_N - 1) * sizeof(uint16_t));
- for (int i = 0; i < KARATSUBA_N / 4; i++) {
- uint16_t acc1 = a_1[i]; // a0
- uint16_t acc2 = a_1[i + KARATSUBA_N / 4]; // a1
- uint16_t acc3 = a_1[i + 2 * KARATSUBA_N / 4];// a2
- uint16_t acc4 = a_1[i + 3 * KARATSUBA_N / 4];// a3
- for (int j = 0; j < KARATSUBA_N / 4; j++) {
- //Nutro il wtd
- if (j == 16 ) {
- ESP.wdtFeed();
- }
- uint16_t acc5 = b_1[j]; // b0
- uint16_t acc6 = b_1[j + KARATSUBA_N / 4];// b1
- // Utilizzare registri per accumulare i risultati
- uint16_t mul1 = OVERFLOWING_MUL(acc1, acc5);
- uint16_t mul2 = OVERFLOWING_MUL(acc2, acc6);
- result_final[i + j] += mul1;
- result_final[i + j + 2 * KARATSUBA_N / 4] += mul2;
- uint16_t acc7 = acc5 + acc6; // b01
- uint16_t acc8 = acc1 + acc2; // a01
- d01[i + j] += acc7 * acc8;
- acc7 = b_1[j + 2 * KARATSUBA_N / 4]; // b2
- acc8 = b_1[j + 3 * KARATSUBA_N / 4]; // b3
- uint16_t mul3 = OVERFLOWING_MUL(acc7, acc3);
- uint16_t mul4 = OVERFLOWING_MUL(acc8, acc4);
- result_final[i + j + 4 * KARATSUBA_N / 4] += mul3;
- result_final[i + j + 6 * KARATSUBA_N / 4] += mul4;
- uint16_t acc9 = acc3 + acc4;
- uint16_t acc10 = acc7 + acc8;
- d23[i + j] += OVERFLOWING_MUL(acc9, acc10);
- acc5 += acc7; // b02
- acc7 = acc1 + acc3; // a02
- result_d01[i + j] += OVERFLOWING_MUL(acc5, acc7);
- acc6 += acc8; // b13
- acc8 = acc2 + acc4;
- result_d01[i + j + 2 * KARATSUBA_N / 4] += OVERFLOWING_MUL(acc6, acc8);
- acc5 += acc6;
- acc7 += acc8;
- d0123[i + j] += OVERFLOWING_MUL(acc5, acc7);
- }
- }
- // 2nd last stage
- for (int i = 0; i < KARATSUBA_N / 2 - 1; i++) {
- d0123[i] -= result_d01[i] + result_d01[i + 2 * KARATSUBA_N / 4];
- d01[i] -= result_final[i] + result_final[i + 2 * KARATSUBA_N / 4];
- d23[i] -= result_final[i + 4 * KARATSUBA_N / 4] + result_final[i + 6 * KARATSUBA_N / 4];
- }
- for (int i = 0; i < KARATSUBA_N / 2 - 1; i++) {
- result_d01[i + KARATSUBA_N / 4] += d0123[i];
- result_final[i + KARATSUBA_N / 4] += d01[i];
- result_final[i + 5 * KARATSUBA_N / 4] += d23[i];
- }
- //2
- // Last stage
- for (int i = 0; i < KARATSUBA_N - 1; i++) {
- result_d01[i] -= result_final[i] + result_final[i + KARATSUBA_N];
- }
- //2
- for (int i = 0; i < KARATSUBA_N - 1; i++) {
- result_final[i + KARATSUBA_N / 2] += result_d01[i];
- }
- //Serial.println("Exit karatsuba");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement