Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h> // para printf()
- #include <stdlib.h> // para exit()
- #include <sys/time.h> // para gettimeofday(), struct timeval
- #define TEST 0
- #define COPY_PASTE_CALC 1
- #if ! TEST
- #define NBITS 20
- #define SIZE (1<<NBITS) // tamaño suficiente para tiempo apreciable
- int lista[SIZE];
- #define RESULT (NBITS*(1<<NBITS-1));
- #else
- #define SIZE 4
- int lista[SIZE]={0x80000000, 0x00100000, 0x00000800, 0x00000001};
- #define RESULT 4
- /*
- #define SIZE 8
- int lista[SIZE]={0x7FFFFFFF, 0xFFEFFFFF, 0xFFFFF7FF, 0xFFFFFFFE, 0x01000024, 0x00356700, 0x8900AC00, 0x00BD00EF};
- #define RESULT 156
- #define SIZE 8
- int lista[SIZE]={0x0, 0x10204080, 0x3590AC06, 0x70B0D0E0, 0xFFFFFFFF, 0x12345678, 0x9ABCDEF0, 0xCAFELBEEF};
- #define RESULT 116
- */
- #endif
- int resultado=0;
- int popcount1(unsigned * array, int len)
- {
- int i, j, result=0;
- unsigned x;
- for (i=0; i<len; i++){
- x = array[i];
- for (j=0; j<8*sizeof(unsigned); j++){
- result+= x & 0x1;
- x >>=1;
- };
- };
- return result;
- }
- int popcount2(unsigned * array, int len)
- {
- int i, result=0;
- unsigned x;
- for (i=0; i<len; i++){
- x = array[i];
- do{
- result+= x & 0x1;
- x >>=1;
- }while (x);
- };
- return result;
- }
- int popcount3(unsigned * array, int len)
- {
- int i,j;
- unsigned x;
- int result=0;
- for(i=0; i<len; i++)
- {
- x = array[i];
- asm("\n"
- "ini3: \n\t" // Seguir mientras que x!=0
- " shr %[x] \n\t" //LSB en CF
- " adc $0x0, %[r] \n\t"
- " test %[x], %[x] \n\t" //adc
- " jnz ini3 \n\t"
- : [r]"+r" (result)
- : [x]"r" (x) );
- }
- return result;
- }
- int popcount4(unsigned* array, int len) {
- int i, k;
- int result = 0;
- for (i = 0; i < len; i++) {
- int val = 0;
- unsigned x = array[i];
- for (k = 0; k < 8; k++) {
- val += x & 0x01010101;
- x >>= 1;
- }
- //val += (val >> 32);
- val += (val >> 16);
- val += (val >> 8);
- result += (val & 0xff);
- }
- return result;
- }
- int popcount5(unsigned* array, int len) {
- int val, result = 0;
- int SSE_mask[] = { 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f };
- int SSE_LUTb[] = { 0x02010100, 0x03020201, 0x03020201, 0x04030302 };
- if (len & 0x3)
- printf("leyendo 128b pero len no múltiplo de 4\n");
- for (int i = 0; i < len; i += 4) {
- asm("movdqu %[x], %%xmm0 \n\t"
- "movdqa %%xmm0, %%xmm1 \n\t"
- "movdqu %[m], %%xmm6 \n\t"
- "psrlw $4, %%xmm1 \n\t"
- "pand %%xmm6, %%xmm0 \n\t"
- "pand %%xmm6, %%xmm1 \n\t"
- "movdqu %[l], %%xmm2 \n\t"
- "movdqa %%xmm2, %%xmm3 \n\t"
- "pshufb %%xmm0, %%xmm2 \n\t"
- "pshufb %%xmm1, %%xmm3 \n\t"
- "paddb %%xmm2, %%xmm3 \n\t"
- "pxor %%xmm0, %%xmm0 \n\t"
- "psadbw %%xmm0, %%xmm3 \n\t"
- "movhlps %%xmm3, %%xmm0 \n\t"
- "paddd %%xmm3, %%xmm0 \n\t"
- "movd %%xmm0, %[val] \n\t"
- : [val]"=r" (val)
- : [x] "m" (array[i]),
- [m] "m" (SSE_mask[0]),
- [l] "m" (SSE_LUTb[0])
- );
- result += val;
- }
- return result;
- }
- void crono(int (*func)(), char* msg){
- struct timeval tv1,tv2; // gettimeofday() secs-usecs
- long tv_usecs; // y sus cuentas
- gettimeofday(&tv1,NULL);
- resultado = func(lista, SIZE);
- gettimeofday(&tv2,NULL);
- tv_usecs=(tv2.tv_sec -tv1.tv_sec )*1E6+
- (tv2.tv_usec-tv1.tv_usec);
- printf("resultado = %d\t", resultado);
- printf("%s:%9ld us\n", msg, tv_usecs);
- }
- int main()
- {
- #if ! TEST
- int i; // inicializar array
- for (i=0; i<SIZE; i++) // se queda en cache
- lista[i]=i;
- #endif
- crono(popcount1, "popcount1 (en lenguaje C - for)");
- crono(popcount2, "popcount2 (en lenguaje C - while)");
- crono(popcount3, "popcount3 (leng. ASM - cuerpo while)");
- crono(popcount4, "popcount4 (L.CS:APP 3.49 - group8b)");
- /* crono(popcount5, "popcount5 (asm SSE3 - pshufb 128b)"); */
- crono(popcount5, "popcount5 (internet searched)");
- #if ! COPY_PASTE_CALC
- printf("N*(N+1)/2 = %d\n", (SIZE-1)*(SIZE/2)); /*OF*/
- printf("calculado = %d\n", RESULT); /*OF*/
- #endif
- exit(0);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement