Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include "asm.h"
- int getSum(int ar[], int n) {
- asm("movl $0, %%eax;\
- movl %[n], %%ecx;\
- movl $0, %%edx;\
- cmp %%eax, %%ecx;\
- jle pexit1;\
- lp:;\
- dec %%ecx;\
- addl (%[ar], %%rcx, 4), %%eax;\
- cmp %%edx, %%ecx;\
- jg lp;\
- pexit1:;"
- :
- : [ar]"r"(ar), [n]"r"(n)
- : "eax", "ecx", "edx"
- );
- }
- int getSumFastSSE(int *ar, int n) {
- asm("movl %[n], %%ecx;\
- xorpd %%xmm0, %%xmm0;\
- movl $0, %%eax;\
- cmp $3, %%ecx;\
- jle pexit31;\
- subq $4, %%rcx;\
- lp3:\
- movdqu (%[ar], %%rcx, 4), %%xmm1;\
- paddd %%xmm1, %%xmm0;\
- subq $4, %%rcx;\
- jge lp3;\
- phaddd %%xmm0, %%xmm0;\
- phaddd %%xmm0, %%xmm0;\
- movq %%xmm0, %%rax;\
- addq $4, %%rcx;\
- pexit31:;\
- cmp $0, %%ecx;\
- jle pexit3;\
- lp4:\
- addl -4(%[ar], %%rcx, 4), %%eax;\
- dec %%rcx;\
- jnz lp4;\
- pexit3:;"
- :
- : [ar]"r"(ar), [n]"r"(n)
- : "rcx", "xmm0", "rax", "xmm1"
- );
- }
- int getSumFastAVX(int *ar, int n) {
- asm("movl %[n], %%ecx;\
- vxorpd %%ymm0, %%ymm0, %%ymm0;\
- movl $0, %%eax;\
- cmp $7, %%ecx;\
- jle pexit71;\
- subq $8, %%rcx;\
- lp7:;\
- vpaddd (%[ar], %%rcx, 4), %%ymm0, %%ymm0;\
- subq $8, %%rcx;\
- jge lp7;\
- addq $8, %%rcx;\
- pexit71:;\
- cmp $0, %%ecx;\
- jle pexit7;\
- lp71:\
- addl -4(%[ar], %%rcx, 4), %%eax;\
- dec %%rcx;\
- jnz lp71;\
- pexit7:;\
- push %%rbp;\
- mov %%rsp, %%rbp;\
- subq $32, %%rsp;\
- vmovdqu %%ymm0, (%%rsp);\
- movl $8, %%ecx;\
- lp72:\
- addl -4(%%rsp, %%rcx, 4), %%eax;\
- dec %%ecx;\
- jnz lp72;\
- mov %%rbp, %%rsp;\
- pop %%rbp;\
- ret;"
- :
- : [ar]"r"(ar), [n]"r"(n)
- : "rcx", "xmm0", "rax", "xmm1"
- );
- }
- double getSum(double ar[], int n) {
- asm("movl %[n], %%ecx;\
- xorpd %%xmm0, %%xmm0;\
- movl $0, %%eax;\
- cmp %%eax, %%ecx;\
- jle pexit2;\
- lp1:\
- addsd -8(%[ar], %%rcx, 8), %%xmm0;\
- loop lp1;\
- movq %%xmm0, %%rax;\
- pexit2:;"
- :
- : [ar]"r"(ar), [n]"r"(n)
- : "ecx", "xmm0", "rax"
- );
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement