Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- double sumDBIndicies(double* input1, IndiciesInput indicies)
- {
- double accumulate = 0;
- for(unsigned int i = 0; i < indicies.nSingleIndicies; i++)
- {
- accumulate += input1[indicies.singleIndicies[i]];
- }
- for(unsigned int i = 0; i < indicies.nRanges; i++)
- {
- unsigned int masked = indicies.rangeLengths[i] & ~0x15;
- double* address = indicies.rangeStartPositions[i] + input1;
- for(; address < indicies.rangeStartPositions[i] + input1 + masked; address+=16)
- {
- __m128d result1 = _mm_add_pd(_mm_load_pd(address), _mm_loadu_pd(address+2));
- __m128d result2 = _mm_add_pd(_mm_load_pd(address+4), _mm_loadu_pd(address+6));
- __m128d result3 = _mm_add_pd(_mm_load_pd(address+8), _mm_loadu_pd(address+10));
- __m128d result4 = _mm_add_pd(_mm_load_pd(address+12), _mm_loadu_pd(address+14));
- //now aggregate those
- __m128d result5 = _mm_add_pd(result1, result2);
- __m128d result6 = _mm_add_pd(result3, result4);
- __m128d result7 = _mm_add_pd(result5, result6);
- double output[8];
- aligned = (output + 4) & ~0x16;
- _mm_store_pd(aligned, result7);
- accumulate += aligned[0] + aligned[1];
- }
- switch(indicies.rangeLengths[i] % 16)
- {
- case 15: accumulate += address + 14;
- case 14: accumulate += address + 13;
- case 13: accumulate += address + 12;
- case 12: accumulate += address + 11;
- case 11: accumulate += address + 10;
- case 10: accumulate += address + 9;
- case 9: accumulate += address + 8;
- case 8: accumulate += address + 7;
- case 7: accumulate += address + 6;
- case 6: accumulate += address + 5;
- case 5: accumulate += address + 4;
- case 4: accumulate += address + 3;
- case 3: accumulate += address + 2;
- case 2: accumulate += address + 1;
- case 1: accumulate += address + 0;
- case 0: break;
- }
- }
- return accumulate;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement