Advertisement
Guest User

Untitled

a guest
Apr 18th, 2014
35
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.74 KB | None | 0 0
  1. char a[16]={1,0,0,1 ,0,0,1,0, 0,1,0,0, 0,0,0,1};
  2. char sum1 = a[0] + a[1] + a[2] + a[3];
  3. char sum2 = a[4] + a[5] + a[6] + a[7];
  4. char sum3 = a[8] + a[9] + a[10] + a[11];
  5. char sum4 = a[12] + a[13] + a[14] + a[15];
  6.  
  7. void myfunc( const char *vec1, const char *vec2, char *vec3, int *counts, int n){
  8. __m128i *r1 = (__m128i*)vec1;
  9. __m128i *r2 = (__m128i*)vec2;
  10. char *a = vec3;
  11. char temp[16] __attribute__ ((aligned (16)));
  12.  
  13. for ( int i = 0; i < n; i+=16, r1++, r2++, a+=16 ) {
  14. _mm_store_si128((__m128i*)a, _mm_and_si128(*r1, *r2));
  15.  
  16. _mm_store_si128((__m128i*)temp, _mm_or_si128(*r1, *r2));
  17.  
  18. char size = a[0]+a[1]+a[2]+a[3];
  19. if( size == 0 ){
  20. memcpy(a, temp, 4*sizeof(char));
  21. counts[k]++;
  22. }
  23. k++;
  24.  
  25. size = a[4]+a[5]+a[6]+a[7];
  26. if( size == 0 ){
  27. memcpy(a+4, temp+4, 4*sizeof(char));
  28. counts[k]++;
  29. }
  30. k++;
  31.  
  32. size = a[8]+a[9]+a[10]+a[11];
  33. if( size == 0 ){
  34. memcpy(a+8, temp+8, 4*sizeof(char));
  35. counts[k]++;
  36. }
  37. k++;
  38.  
  39. size = a[12]+a[13]+a[14]+a[15];
  40. if( size == 0 ){
  41. memcpy(a+12, temp+12, 4*sizeof(char));
  42. counts[k]++;
  43. }
  44. k++;
  45. }
  46. }
  47.  
  48. for ( int i = 0; i < n; i+=16, r1++, r2++, a+=16, k+=4 ) {
  49. _mm_store_si128((__m128i*)a, _mm_and_si128(*r1, *r2));
  50. _mm_store_si128((__m128i*)temp, _mm_or_si128(*r1, *r2));
  51.  
  52. __m128i a4 = _mm_load_si128((__m128i*)a);
  53. __m128i tmp4 = _mm_load_si128((__m128i*)tmp);
  54. __m128i counts4 = _mm_load_si128((__m128i*)&counts[k]);
  55.  
  56. __m128i test = _mm_cmpeq_epi32(_mm_set1_epi32(0), a4);
  57. a4 = _mm_add_epi32(a4, _mm_and_si128(tmp4,test));
  58. counts4 = _mm_sub_epi32(counts4, test);
  59.  
  60. _mm_store_si128((__m128i*)a, a4);
  61. _mm_store_si128((__m128i*)counts, counts4);
  62. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement