Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdlib.h>
- #include <printf.h>
- // macros to generate the lookup table (at compile-time)
- #define P2(n) n, n^1, n^1, n
- #define P4(n) P2(n), P2(n^1), P2(n^1), P2(n)
- #define P6(n) P4(n), P4(n^1), P4(n^1), P4(n)
- #define FIND_PARITY P6(0), P6(1), P6(1), P6(0)
- // lookup-table to store the parity of each index of the table
- // The macro FIND_PARITY generates the table
- unsigned int lookup[256] = { FIND_PARITY };
- int findParity(long long v)
- {
- v ^= v >> 32;
- v ^= v >> 16;
- v ^= v >> 8;
- return lookup[v & 0xff];
- }
- void printMatrix(int n, int* matrix){
- int i,j;
- for(i=0; i < n; i++){
- for(j=0; j< n; j++){
- printf("%d ",matrix[i + j*n]);
- }
- printf("\n");
- }
- }
- /* Optimized implementation of matrix multiplication
- * This routine performs a matrix multiplication operation
- * C := C + A * B
- * where A, B, and C are n-by-n matrices stored in column-major format.
- * On exit, A and B maintain their input values.
- * DO NOT change the API of the function matmul_optimized()
- */
- void matmul_optimized(int n, int* A, int* B, int* C)
- {
- // this specifies the amount of longs needed per row / column when the matrix is compressed
- int longNs = n / 64;
- if((n % 64) != 0){
- longNs++;
- }
- long long *rowsA;
- long long *columnsB;
- rowsA = calloc((n * longNs), 64);
- columnsB = calloc((n * longNs), 64);
- int i, j, chunk, in, chunkSize, jPlusChunkSize;
- int iTimeslongNs;
- //int nTimes64 = n * 64;
- long long currentRowsA;
- long long currentColumnsB;
- // compressing the matrices
- for (i = 0; i < n; ++i) {
- in = i * n;
- chunkSize = 0;
- iTimeslongNs = i * longNs;
- for(chunk=0; chunk < longNs; chunk++) {
- currentRowsA = rowsA[iTimeslongNs + chunk];
- currentColumnsB = columnsB[iTimeslongNs + chunk];
- for (j = 0; j < 64 && j < n; ++j) {
- jPlusChunkSize = j + chunkSize;
- currentRowsA <<= 1;
- currentRowsA |= A[i + jPlusChunkSize*n];
- currentColumnsB <<= 1;
- currentColumnsB |= B[jPlusChunkSize + in];
- }
- rowsA[iTimeslongNs + chunk] = currentRowsA;
- columnsB[iTimeslongNs + chunk] = currentColumnsB;
- chunkSize += 64;
- }
- }
- // 1 0 0 1 1
- int jTimesLongNs;
- int* cij;
- for(i=0; i<n; i++){
- iTimeslongNs = i * longNs;
- for(j=0;j<n;j++){
- jTimesLongNs = j * longNs;
- cij = &C[i + j*n];
- for(chunk=0;chunk<longNs;chunk++){
- *cij ^= findParity((rowsA[iTimeslongNs + chunk]) & (columnsB[jTimesLongNs + chunk]));
- }
- }
- }
- free(rowsA);
- free(columnsB);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement