Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.08 KB | None | 0 0
  1. #include <xmmintrin.h>
  2.  
  3. float vector_dot(__m128 v1, __m128 v2)
  4. {
  5. __m128 resp = _mm_mul_ps(v1, v2);
  6. float res;
  7. _mm_store_ss(&res, resp);
  8. return res;
  9. };
  10.  
  11. gcc -O -S test.c -msse4.1
  12.  
  13. .file "test.c"
  14. .text
  15. .globl _vector_dot
  16. .def _vector_dot; .scl 2; .type 32; .endef
  17. _vector_dot:
  18. LFB503:
  19. .cfi_startproc
  20. pushl %ebp
  21. .cfi_def_cfa_offset 8
  22. .cfi_offset 5, -8
  23. movl %esp, %ebp
  24. .cfi_def_cfa_register 5
  25. andl $-16, %esp
  26. subl $16, %esp
  27. mulps %xmm1, %xmm0
  28. movss %xmm0, 12(%esp)
  29. flds 12(%esp)
  30. leave
  31. .cfi_restore 5
  32. .cfi_def_cfa 4, 4
  33. ret
  34. .cfi_endproc
  35. LFE503:
  36. .ident "GCC: (MinGW.org GCC-6.3.0-1) 6.3.0"
  37.  
  38. .file "test.c"
  39. .text
  40. .globl vector_dot
  41. .def vector_dot; .scl 2; .type 32; .endef
  42. .seh_proc vector_dot
  43. vector_dot:
  44. .seh_endprologue
  45. movaps (%rdx), %xmm0
  46. mulps (%rcx), %xmm0
  47. ret
  48. .seh_endproc
  49. .ident "GCC: (x86_64-posix-seh-rev0, Built by MinGW-W64 project) 7.3.0"
  50.  
  51. float values1[] __attribute__((aligned(16))) = { 1.3f, 5.4f, -4.f, 5. } ;
  52. __m128 vec1 = _mm_load_ps(values1);
  53.  
  54. float values2[] __attribute__((aligned(16))) = {0.5f, -43.5f, 0, 0 };
  55. __m128 vec2 = _mm_load_ps(values2);
  56.  
  57. float dot = vector_dot(vec1, vec2);
  58.  
  59. 0x401614 <+ 180> 0f 28 45 e0 movaps -0x20(%rbp),%xmm0
  60. 0x401618 <+ 184> 0f 29 85 40 ff ff ff movaps %xmm0,-0xc0(%rbp)
  61. 0x40161f <+ 191> 0f 28 45 d0 movaps -0x30(%rbp),%xmm0
  62. 0x401623 <+ 195> 0f 29 85 30 ff ff ff movaps %xmm0,-0xd0(%rbp)
  63. 0x40162a <+ 202> 48 8d 95 30 ff ff ff lea -0xd0(%rbp),%rdx
  64. 0x401631 <+ 209> 48 8d 85 40 ff ff ff lea -0xc0(%rbp),%rax
  65. 0x401638 <+ 216> 48 89 c1 mov %rax,%rcx
  66. 0x40163b <+ 219> e8 d4 29 00 00 callq 0x404014 <vector_dot>
  67. 0x401640 <+ 224> 66 0f 7e c0 movd %xmm0,%eax
  68. 0x401644 <+ 228> 89 45 cc mov %eax,-0x34(%rbp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement