Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <xmmintrin.h>
- float vector_dot(__m128 v1, __m128 v2)
- {
- __m128 resp = _mm_mul_ps(v1, v2);
- float res;
- _mm_store_ss(&res, resp);
- return res;
- };
- gcc -O -S test.c -msse4.1
- .file "test.c"
- .text
- .globl _vector_dot
- .def _vector_dot; .scl 2; .type 32; .endef
- _vector_dot:
- LFB503:
- .cfi_startproc
- pushl %ebp
- .cfi_def_cfa_offset 8
- .cfi_offset 5, -8
- movl %esp, %ebp
- .cfi_def_cfa_register 5
- andl $-16, %esp
- subl $16, %esp
- mulps %xmm1, %xmm0
- movss %xmm0, 12(%esp)
- flds 12(%esp)
- leave
- .cfi_restore 5
- .cfi_def_cfa 4, 4
- ret
- .cfi_endproc
- LFE503:
- .ident "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
- .file "test.c"
- .text
- .globl vector_dot
- .def vector_dot; .scl 2; .type 32; .endef
- .seh_proc vector_dot
- vector_dot:
- .seh_endprologue
- movaps (%rdx), %xmm0
- mulps (%rcx), %xmm0
- ret
- .seh_endproc
- .ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 7.3.0"
- float values1[] __attribute__((aligned(16))) = { 1.3f, 5.4f, -4.f, 5. } ;
- __m128 vec1 = _mm_load_ps(values1);
- float values2[] __attribute__((aligned(16))) = {0.5f, -43.5f, 0, 0 };
- __m128 vec2 = _mm_load_ps(values2);
- float dot = vector_dot(vec1, vec2);
- 0x401614 <+ 180> 0f 28 45 e0 movaps -0x20(%rbp),%xmm0
- 0x401618 <+ 184> 0f 29 85 40 ff ff ff movaps %xmm0,-0xc0(%rbp)
- 0x40161f <+ 191> 0f 28 45 d0 movaps -0x30(%rbp),%xmm0
- 0x401623 <+ 195> 0f 29 85 30 ff ff ff movaps %xmm0,-0xd0(%rbp)
- 0x40162a <+ 202> 48 8d 95 30 ff ff ff lea -0xd0(%rbp),%rdx
- 0x401631 <+ 209> 48 8d 85 40 ff ff ff lea -0xc0(%rbp),%rax
- 0x401638 <+ 216> 48 89 c1 mov %rax,%rcx
- 0x40163b <+ 219> e8 d4 29 00 00 callq 0x404014 <vector_dot>
- 0x401640 <+ 224> 66 0f 7e c0 movd %xmm0,%eax
- 0x401644 <+ 228> 89 45 cc mov %eax,-0x34(%rbp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement