Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- .intel_syntax noprefix
- .text
- .global dot_product
- dot_product:
- push ebp
- mov ebp, esp
- push ebx
- mov ebx, [esp + 8]
- mov edx, [esp + 12]
- mov eax, [esp + 8]
- cycle:
- cmp eax, 4
- jb add_last
- sub eax, 4
- movups xmm1, [ebx + 4 * eax]
- movups xmm2, [edx + 4 * eax]
- mulps xmm1, xmm2
- haddps xmm1, xmm1
- haddps xmm1, xmm1
- addss xmm0, xmm1
- jmp cycle
- add_last:
- cmp eax, 0
- je exit
- sub eax, 1
- movss xmm1, [ebx + 4 * eax]
- movss xmm2, [edx + 4 * eax]
- mulss xmm1, xmm2
- addss xmm0, xmm1
- jmp add_last
- exit:
- sub esp, 4
- movss [esp], xmm0
- fld dword ptr [esp]
- add esp, 4
- pop ebx
- mov esp, ebp
- pop ebp
- ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement