Advertisement
Eubie

SSE vector+=

Feb 28th, 2013
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.46 KB | None | 0 0
  1. #include <cstdio>
  2. #include <cstdlib>
  3. #include <cstring>
  4. #include <map>
  5. #include <unordered_map>
  6. #include <vector>
  7. #include <fstream>
  8. #include <iterator>
  9. #include <algorithm>
  10. #include <list>
  11. #include <sstream>
  12. #include <iostream>
  13. #include <bitset>
  14. #include <array>
  15. #include <ctype.h>
  16. #include <ctime>
  17.  
  18. #include <emmintrin.h>
  19.  
  20. using namespace std;
  21.  
  22. #define FAST_SSE
  23.  
  24. inline void addToDoubleVectorSSE(const double * what, const double * toWhat, volatile double * dest, const unsigned int len)
  25. {
  26.     __m128d * _what         = (__m128d*)what;
  27.     __m128d * _toWhat       = (__m128d*)toWhat;
  28.     __m128d * _toWhatBase   = (__m128d*)toWhat;
  29.  
  30.     __m128d _dest1;
  31.     __m128d _dest2;
  32.  
  33. #ifdef FAST_SSE
  34.     for ( register unsigned int i = 0; i < len; i+= 4, _what += 2, _toWhat += 2, _toWhatBase+=2 )
  35.     {
  36.         _toWhatBase = _toWhat;
  37.         _dest1      = _mm_add_pd( *_what, *_toWhat );
  38.         _dest2      = _mm_add_pd( *(_what+1), *(_toWhat+1));
  39.  
  40.         *_toWhatBase = _dest1;
  41.         *(_toWhatBase+1) = _dest2;
  42.     }
  43. #else
  44.     for ( register unsigned int i = 0; i < len; i+= 4 )
  45.     {
  46.         _toWhatBase = _toWhat;
  47.         _dest1      = _mm_add_pd( *_what++, *_toWhat++ );
  48.         _dest2      = _mm_add_pd( *_what++, *_toWhat++ );
  49.  
  50.         *_toWhatBase++ = _dest1;
  51.         *_toWhatBase++ = _dest2;
  52.     }
  53. #endif
  54. }
  55.  
  56. #define ARR_LEN   1000
  57. #define ARR_COUNT 1000
  58. #define REP_COUNT 10000
  59.  
  60. int main(int argc, const char* argv[]) {
  61.     double ** a = ( double **) _mm_malloc( sizeof(double*) * ARR_COUNT, 16 );
  62.     double ** b = ( double **) _mm_malloc( sizeof(double*) * ARR_COUNT, 16 );
  63.     double ** c = ( double **) _mm_malloc( sizeof(double*) * ARR_COUNT, 16 );
  64.  
  65.     for (int i =0 ; i != ARR_COUNT ; i++)
  66.     {
  67.         a[i] = ( double *) _mm_malloc( sizeof(double) * ARR_LEN, 16 );
  68.         b[i] = ( double *) _mm_malloc( sizeof(double) * ARR_LEN, 16 );
  69.         c[i] = ( double *) _mm_malloc( sizeof(double) * ARR_LEN, 16 );
  70.     }
  71.  
  72.     for (int i =0 ; i != 1000 ; i++)
  73.         for (int j =0 ; j != 1000 ; j++)
  74.         {
  75.             a[i][j] = i*i;
  76.             b[i][j] = sqrt((double)i);
  77.             c[i][j] = 0.0;
  78.         }
  79.    
  80.  
  81.     clock_t start = clock();
  82.     for (int i = 0 ; i != REP_COUNT ; i++)
  83.         for ( int j = 0; j < ARR_COUNT; j++ )
  84.             addToDoubleVectorSSE(a[j], b[j], c[j], ARR_LEN);
  85.  
  86.     cout << clock() - start << endl;
  87.     getchar();
  88.  
  89.     for (int i =0 ; i != ARR_COUNT; i++)
  90.     {
  91.         _mm_free((void*) a[i]);
  92.         _mm_free((void*) b[i]);
  93.         _mm_free((void*) c[i]);
  94.     }
  95.     _mm_free((void*) a);
  96.     _mm_free((void*) b);
  97.     _mm_free((void*) c);
  98.  
  99.     return 0;
  100.     }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement