Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <smmintrin.h>
- #include <stdio.h>
- #include <stdint.h>
- long dp32(const int32_t * const pA, const int32_t * const pB, long SIZE){
- long valor = 0, i=0;
- __m128i vsum, vecPi, vecCi, vecQCi;
- vsum = _mm_set1_epi32(0);
- int sumDot[1];
- for( ; i<SIZE-3 ;i+=4){
- vecPi = _mm_loadu_si128((__m128i *)&(pA)[i] );
- vecCi = _mm_loadu_si128((__m128i *)&(pB)[i] );
- vecQCi = _mm_mullo_epi32(vecPi,vecCi);
- vsum = _mm_add_epi32(vsum,vecQCi);
- }
- vsum = _mm_hadd_epi32(vsum, vsum);
- vsum = _mm_hadd_epi32(vsum, vsum);
- sumDot[0] = (int32_t)_mm_extract_epi32(vsum, 0);
- for( ; i<SIZE; i++)
- valor += pA[i] * pB[i];
- return valor += sumDot[0];
- }
- long dp16(const int16_t * const pA, const int16_t * const pB, long SIZE){
- long valor = 0, i=0;
- __m128i vsum, vecPi, vecCi, vecQCi;
- vsum = _mm_set1_epi32(0);
- int sumDot[1];
- for( ; i<SIZE-7 ;i+=8){
- vecPi = _mm_loadu_si128((__m128i *)&(pA)[i] );
- vecCi = _mm_loadu_si128((__m128i *)&(pB)[i] );
- vecQCi = _mm_madd_epi16(vecPi,vecCi);
- vsum = _mm_add_epi32(vsum, vecQCi);
- }
- vsum = _mm_hadd_epi32(vsum, vsum);
- vsum = _mm_hadd_epi32(vsum, vsum);
- sumDot[0] = (int32_t)_mm_extract_epi32(vsum, 0);
- for( ; i<SIZE; i++)
- valor += pA[i] * pB[i];
- return valor += sumDot[0];
- }
- //#define SIZE 9
- //#define VECA 0, -1, 2, -3, 4, -5, 6, -7, 8
- //#define VECB -9, 10, 11, -12, 13, 14, -15, 16, 17
- #define SIZE 40
- #define VECA 169, -341, 487, -50, 116, -66, 170, -217, -236, 72, -296, -469, -178, -1, 61, -26, -107, 593, -558, 125, 111, 111, 184, 141, -228, -490, -357, 535, 206, -85, -29, -216, -490, -265, 12, 250, 306, -291, -46, 180
- #define VECB 398, 305, 268, -125, -96, -214, -284, 108, -37, 2, -402, -228, 243, 33, 76, 265, 3, -558, -323, -552, 419, 408, -217, -2, -440, -375, 153, -108, -79, -80, 299, 81, -385, 80, 53, 294, 170, -380, -164, -172
- /* 0 -10 +22 +36 +52 -70 -90-112+136 */
- /* -10 +58 -18 -202 +136 */
- /* +48 -220 +136 */
- /* -172 +136 */
- /* -36 */
- int main(){
- int32_t arrA32[] = {VECA};
- int16_t arrA16[] = {VECA};
- int32_t arrB32[] = {VECB};
- int16_t arrB16[] = {VECB};
- long v32 = dp32(arrA32, arrB32, SIZE);
- long v16 = dp16(arrA16, arrB16, SIZE);
- if(v32 == v16){
- printf("Success! (32: %ld == %ld : 16)\n", v32, v16);
- }else{
- printf("Failure! (32: %ld != %ld : 16)\n", v32, v16);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement