Advertisement
Guest User

Untitled

a guest
Nov 24th, 2014
149
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.64 KB | None | 0 0
  1. int popcount5(unsigned* array, int len)
  2. {
  3. int i;
  4. int val, result = 0;
  5. int SSE_mask[] = { 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f };
  6. int SSE_LUTb[] = { 0x02010100, 0x03020201, 0x03020201, 0x04030302 };
  7.  
  8. if (len & 0x3)
  9. printf("leyendo 128b pero len no múltiplo de 4\n");
  10. for (i = 0; i < len; i += 4) {
  11. asm( "movdqu %[x], %%xmm0 \n\t"
  12. "movdqa %%xmm0, %%xmm1 \n\t" // dos copias de x
  13. "movdqu %[m], %%xmm6 \n\t"// máscara
  14. "psrlw $4, %%xmm1 \n\t"
  15. "pand %%xmm6, %%xmm0 \n\t"//; xmm0 – nibbles inferiores
  16. "pand %%xmm6, %%xmm1 \n\t"//; xmm1 – nibbles superiores
  17.  
  18. "movdqu %[l], %%xmm2 \n\t"//; ...como pshufb sobrescribe LUT
  19. "movdqa %%xmm2, %%xmm3 \n\t"//; ...queremos 2 copias
  20. "pshufb %%xmm0, %%xmm2 \n\t"//; xmm2 = vector popcount inferiores
  21. "pshufb %%xmm1, %%xmm3 \n\t"//; xmm3 = vector popcount superiores
  22.  
  23. "paddb %%xmm2, %%xmm3 \n\t"//; xmm3 - vector popcount bytes
  24. "pxor %%xmm0, %%xmm0 \n\t"//; xmm0 = 0,0,0,0
  25. "psadbw %%xmm0, %%xmm3 \n\t"//;xmm3 = [pcnt bytes0..7|pcnt bytes8..15]
  26. "movhlps %%xmm3, %%xmm0 \n\t"//;xmm3 = [ 0 |pcnt bytes0..7 ]
  27. "paddd %%xmm3, %%xmm0 \n\t"//;xmm0 = [ no usado |pcnt bytes0..15]
  28. "movd %%xmm0, %[val] \n\t"
  29. : [val]"=r" (val)
  30. : [x] "m" (array[i]),
  31. [m] "m" (SSE_mask[0]),
  32. [l] "m" (SSE_LUTb[0])
  33. );
  34. result += val;
  35. }
  36. return result;
  37. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement