Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cstdio>
- #include <cstdlib>
- #include <cstring>
- #include <map>
- #include <unordered_map>
- #include <vector>
- #include <fstream>
- #include <iterator>
- #include <algorithm>
- #include <list>
- #include <sstream>
- #include <iostream>
- #include <bitset>
- #include <array>
- #include <ctype.h>
- #include <ctime>
- #include <emmintrin.h>
- using namespace std;
- #define FAST_SSE
- inline void addToDoubleVectorSSE(const double * what, const double * toWhat, volatile double * dest, const unsigned int len)
- {
- __m128d * _what = (__m128d*)what;
- __m128d * _toWhat = (__m128d*)toWhat;
- __m128d * _toWhatBase = (__m128d*)toWhat;
- __m128d _dest1;
- __m128d _dest2;
- #ifdef FAST_SSE
- for ( register unsigned int i = 0; i < len; i+= 4, _what += 2, _toWhat += 2, _toWhatBase+=2 )
- {
- _toWhatBase = _toWhat;
- _dest1 = _mm_add_pd( *_what, *_toWhat );
- _dest2 = _mm_add_pd( *(_what+1), *(_toWhat+1));
- *_toWhatBase = _dest1;
- *(_toWhatBase+1) = _dest2;
- }
- #else
- for ( register unsigned int i = 0; i < len; i+= 4 )
- {
- _toWhatBase = _toWhat;
- _dest1 = _mm_add_pd( *_what++, *_toWhat++ );
- _dest2 = _mm_add_pd( *_what++, *_toWhat++ );
- *_toWhatBase++ = _dest1;
- *_toWhatBase++ = _dest2;
- }
- #endif
- }
- #define ARR_LEN 1000
- #define ARR_COUNT 1000
- #define REP_COUNT 10000
- int main(int argc, const char* argv[]) {
- double ** a = ( double **) _mm_malloc( sizeof(double*) * ARR_COUNT, 16 );
- double ** b = ( double **) _mm_malloc( sizeof(double*) * ARR_COUNT, 16 );
- double ** c = ( double **) _mm_malloc( sizeof(double*) * ARR_COUNT, 16 );
- for (int i =0 ; i != ARR_COUNT ; i++)
- {
- a[i] = ( double *) _mm_malloc( sizeof(double) * ARR_LEN, 16 );
- b[i] = ( double *) _mm_malloc( sizeof(double) * ARR_LEN, 16 );
- c[i] = ( double *) _mm_malloc( sizeof(double) * ARR_LEN, 16 );
- }
- for (int i =0 ; i != 1000 ; i++)
- for (int j =0 ; j != 1000 ; j++)
- {
- a[i][j] = i*i;
- b[i][j] = sqrt((double)i);
- c[i][j] = 0.0;
- }
- clock_t start = clock();
- for (int i = 0 ; i != REP_COUNT ; i++)
- for ( int j = 0; j < ARR_COUNT; j++ )
- addToDoubleVectorSSE(a[j], b[j], c[j], ARR_LEN);
- cout << clock() - start << endl;
- getchar();
- for (int i =0 ; i != ARR_COUNT; i++)
- {
- _mm_free((void*) a[i]);
- _mm_free((void*) b[i]);
- _mm_free((void*) c[i]);
- }
- _mm_free((void*) a);
- _mm_free((void*) b);
- _mm_free((void*) c);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement