Advertisement
Guest User

Untitled

a guest
Jun 25th, 2017
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 1.82 KB | None | 0 0
  1. double sumDBIndicies(double* input1, IndiciesInput indicies)
  2. {
  3.     double accumulate = 0;
  4.     for(unsigned int i = 0; i < indicies.nSingleIndicies; i++)
  5.     {
  6.         accumulate += input1[indicies.singleIndicies[i]];
  7.     }
  8.     for(unsigned int i = 0; i < indicies.nRanges; i++)
  9.     {
  10.         unsigned int masked = indicies.rangeLengths[i] & ~0x15;
  11.         double* address = indicies.rangeStartPositions[i] + input1;
  12.         for(; address < indicies.rangeStartPositions[i] + input1 + masked; address+=16)
  13.         {
  14.             __m128d result1 = _mm_add_pd(_mm_load_pd(address), _mm_loadu_pd(address+2));
  15.             __m128d result2 = _mm_add_pd(_mm_load_pd(address+4), _mm_loadu_pd(address+6));
  16.             __m128d result3 = _mm_add_pd(_mm_load_pd(address+8), _mm_loadu_pd(address+10));
  17.             __m128d result4 = _mm_add_pd(_mm_load_pd(address+12), _mm_loadu_pd(address+14));
  18.            
  19.             //now aggregate those
  20.             __m128d result5 = _mm_add_pd(result1, result2);
  21.             __m128d result6 = _mm_add_pd(result3, result4);
  22.            
  23.             __m128d result7 = _mm_add_pd(result5, result6);
  24.            
  25.             double output[8];
  26.             aligned = (output + 4) & ~0x16;
  27.             _mm_store_pd(aligned, result7);
  28.             accumulate += aligned[0] + aligned[1];
  29.  
  30.         }
  31.         switch(indicies.rangeLengths[i] % 16)
  32.         {
  33.             case 15: accumulate += address + 14;
  34.             case 14: accumulate += address + 13;
  35.             case 13: accumulate += address + 12;
  36.             case 12: accumulate += address + 11;
  37.             case 11: accumulate += address + 10;
  38.             case 10: accumulate += address + 9;
  39.             case 9:  accumulate += address + 8;
  40.             case 8:  accumulate += address + 7;
  41.             case 7:  accumulate += address + 6;
  42.             case 6:  accumulate += address + 5;
  43.             case 5:  accumulate += address + 4;
  44.             case 4:  accumulate += address + 3;
  45.             case 3:  accumulate += address + 2;
  46.             case 2:  accumulate += address + 1;
  47.             case 1:  accumulate += address + 0;
  48.             case 0: break;
  49.            
  50.         }
  51.     }
  52.     return accumulate;
  53. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement