Advertisement
Guest User

Untitled

a guest
Mar 18th, 2019
63
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.78 KB | None | 0 0
  1. #include <stdlib.h>
  2. #include <printf.h>
  3.  
  4. // macros to generate the lookup table (at compile-time)
  5. #define P2(n) n, n^1, n^1, n
  6. #define P4(n) P2(n), P2(n^1), P2(n^1), P2(n)
  7. #define P6(n) P4(n), P4(n^1), P4(n^1), P4(n)
  8. #define FIND_PARITY P6(0), P6(1), P6(1), P6(0)
  9.  
  10. // lookup-table to store the parity of each index of the table
  11. // The macro FIND_PARITY generates the table
  12. unsigned int lookup[256] = { FIND_PARITY };
  13.  
  14.  
  15. int findParity(long long v)
  16. {
  17.     v ^= v >> 32;
  18.     v ^= v >> 16;
  19.     v ^= v >> 8;
  20.     return lookup[v & 0xff];
  21. }
  22.  
  23. void printMatrix(int n, int* matrix){
  24.     int i,j;
  25.     for(i=0; i < n; i++){
  26.         for(j=0; j< n; j++){
  27.             printf("%d ",matrix[i + j*n]);
  28.         }
  29.         printf("\n");
  30.     }
  31. }
  32.  
  33. /* Optimized implementation of matrix multiplication
  34.  * This routine performs a matrix multiplication operation
  35.  * C := C + A * B
  36.  * where A, B, and C are n-by-n matrices stored in column-major format.
  37.  * On exit, A and B maintain their input values.
  38.  * DO NOT change the API of the function matmul_optimized()
  39. */
  40.  
  41.  
  42. void matmul_optimized(int n, int* A, int* B, int* C)
  43. {
  44. //    this specifies the amount of longs needed per row / column when the matrix is compressed
  45.     int longNs = n / 64;
  46.  
  47.     if((n % 64) != 0){
  48.         longNs++;
  49.     }
  50.  
  51.  
  52.     long long *rowsA;
  53.     long long *columnsB;
  54.  
  55.     rowsA = calloc((n * longNs), 64);
  56.     columnsB = calloc((n * longNs), 64);
  57.  
  58.     int i, j, chunk, in, chunkSize, jPlusChunkSize;
  59.     int iTimeslongNs;
  60.     //int nTimes64 = n * 64;
  61.     long long currentRowsA;
  62.     long long currentColumnsB;
  63.  
  64. //    compressing the matrices
  65.     for (i = 0; i < n; ++i) {
  66.         in = i * n;
  67.         chunkSize = 0;  
  68.         iTimeslongNs = i * longNs;
  69.         for(chunk=0; chunk < longNs; chunk++) {
  70.             currentRowsA = rowsA[iTimeslongNs + chunk];
  71.             currentColumnsB = columnsB[iTimeslongNs + chunk];
  72.             for (j = 0; j < 64 && j < n; ++j) {
  73.                 jPlusChunkSize = j + chunkSize;
  74.                 currentRowsA <<= 1;
  75.                 currentRowsA |= A[i  + jPlusChunkSize*n];
  76.  
  77.                 currentColumnsB <<= 1;
  78.                 currentColumnsB |= B[jPlusChunkSize  + in];
  79.             }
  80.         rowsA[iTimeslongNs + chunk] = currentRowsA;
  81.         columnsB[iTimeslongNs + chunk] = currentColumnsB;
  82.             chunkSize += 64;
  83.         }
  84.     }
  85.  
  86.  
  87. //    1 0 0 1 1
  88.  
  89.  
  90.     int jTimesLongNs;
  91.     int* cij;
  92.  
  93.     for(i=0; i<n; i++){
  94.     iTimeslongNs = i * longNs;
  95.         for(j=0;j<n;j++){
  96.         jTimesLongNs = j * longNs;
  97.             cij = &C[i + j*n];
  98.             for(chunk=0;chunk<longNs;chunk++){
  99.                 *cij ^= findParity((rowsA[iTimeslongNs + chunk]) & (columnsB[jTimesLongNs + chunk]));
  100.             }
  101.         }
  102.     }
  103.  
  104.     free(rowsA);
  105.     free(columnsB);
  106. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement