Advertisement
Guest User

karatsuba

a guest
Aug 5th, 2024
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.41 KB | None | 0 0
  1. static void karatsuba_simple(const uint16_t *a_1, const uint16_t *b_1, uint16_t *result_final) {
  2. //Serial.println("Enter karatsuba");
  3. uint16_t d01[KARATSUBA_N / 2 - 1];
  4. uint16_t d0123[KARATSUBA_N / 2 - 1];
  5. uint16_t d23[KARATSUBA_N / 2 - 1];
  6. uint16_t result_d01[KARATSUBA_N - 1];
  7.  
  8. // Inizializzazione delle variabili
  9. memset(result_d01, 0, sizeof(result_d01));
  10. memset(d01, 0, sizeof(d01));
  11. memset(d0123, 0, sizeof(d0123));
  12. memset(d23, 0, sizeof(d23));
  13. memset(result_final, 0, (2 * KARATSUBA_N - 1) * sizeof(uint16_t));
  14.  
  15.  
  16.  
  17. for (int i = 0; i < KARATSUBA_N / 4; i++) {
  18.  
  19. uint16_t acc1 = a_1[i]; // a0
  20. uint16_t acc2 = a_1[i + KARATSUBA_N / 4]; // a1
  21. uint16_t acc3 = a_1[i + 2 * KARATSUBA_N / 4];// a2
  22. uint16_t acc4 = a_1[i + 3 * KARATSUBA_N / 4];// a3
  23. for (int j = 0; j < KARATSUBA_N / 4; j++) {
  24.  
  25. //Nutro il wtd
  26. if (j == 16 ) {
  27. ESP.wdtFeed();
  28. }
  29.  
  30. uint16_t acc5 = b_1[j]; // b0
  31. uint16_t acc6 = b_1[j + KARATSUBA_N / 4];// b1
  32.  
  33. // Utilizzare registri per accumulare i risultati
  34. uint16_t mul1 = OVERFLOWING_MUL(acc1, acc5);
  35. uint16_t mul2 = OVERFLOWING_MUL(acc2, acc6);
  36.  
  37. result_final[i + j] += mul1;
  38. result_final[i + j + 2 * KARATSUBA_N / 4] += mul2;
  39. uint16_t acc7 = acc5 + acc6; // b01
  40. uint16_t acc8 = acc1 + acc2; // a01
  41. d01[i + j] += acc7 * acc8;
  42.  
  43. acc7 = b_1[j + 2 * KARATSUBA_N / 4]; // b2
  44. acc8 = b_1[j + 3 * KARATSUBA_N / 4]; // b3
  45. uint16_t mul3 = OVERFLOWING_MUL(acc7, acc3);
  46. uint16_t mul4 = OVERFLOWING_MUL(acc8, acc4);
  47.  
  48. result_final[i + j + 4 * KARATSUBA_N / 4] += mul3;
  49. result_final[i + j + 6 * KARATSUBA_N / 4] += mul4;
  50.  
  51. uint16_t acc9 = acc3 + acc4;
  52. uint16_t acc10 = acc7 + acc8;
  53. d23[i + j] += OVERFLOWING_MUL(acc9, acc10);
  54.  
  55. acc5 += acc7; // b02
  56. acc7 = acc1 + acc3; // a02
  57. result_d01[i + j] += OVERFLOWING_MUL(acc5, acc7);
  58.  
  59. acc6 += acc8; // b13
  60. acc8 = acc2 + acc4;
  61. result_d01[i + j + 2 * KARATSUBA_N / 4] += OVERFLOWING_MUL(acc6, acc8);
  62. acc5 += acc6;
  63. acc7 += acc8;
  64. d0123[i + j] += OVERFLOWING_MUL(acc5, acc7);
  65.  
  66. }
  67. }
  68.  
  69.  
  70.  
  71. // 2nd last stage
  72. for (int i = 0; i < KARATSUBA_N / 2 - 1; i++) {
  73. d0123[i] -= result_d01[i] + result_d01[i + 2 * KARATSUBA_N / 4];
  74. d01[i] -= result_final[i] + result_final[i + 2 * KARATSUBA_N / 4];
  75. d23[i] -= result_final[i + 4 * KARATSUBA_N / 4] + result_final[i + 6 * KARATSUBA_N / 4];
  76. }
  77.  
  78.  
  79.  
  80.  
  81. for (int i = 0; i < KARATSUBA_N / 2 - 1; i++) {
  82. result_d01[i + KARATSUBA_N / 4] += d0123[i];
  83. result_final[i + KARATSUBA_N / 4] += d01[i];
  84. result_final[i + 5 * KARATSUBA_N / 4] += d23[i];
  85. }
  86.  
  87. //2
  88.  
  89.  
  90. // Last stage
  91. for (int i = 0; i < KARATSUBA_N - 1; i++) {
  92. result_d01[i] -= result_final[i] + result_final[i + KARATSUBA_N];
  93. }
  94.  
  95. //2
  96.  
  97. for (int i = 0; i < KARATSUBA_N - 1; i++) {
  98. result_final[i + KARATSUBA_N / 2] += result_d01[i];
  99. }
  100. //Serial.println("Exit karatsuba");
  101.  
  102. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement