Guest User

Untitled

a guest
Jun 18th, 2018
65
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.95 KB | None | 0 0
  1. typedef float float4 __attribute__((ext_vector_type(8)));
  2.  
  3. float4 sum(float4 *arr, int n) {
  4. float4 t = 0.f;
  5. for(int i=0; i<n; ++i)
  6. t += arr[i];
  7.  
  8. return t;
  9. }
  10.  
  11. // clang -O3
  12.  
  13. LBB0_9: ## =>This Inner Loop Header: Depth=1
  14. addps -80(%rsi), %xmm1
  15. addps -96(%rsi), %xmm0
  16. addps -64(%rsi), %xmm0
  17. addps -48(%rsi), %xmm1
  18. addps -16(%rsi), %xmm1
  19. addps -32(%rsi), %xmm0
  20. addps (%rsi), %xmm0
  21. addps 16(%rsi), %xmm1
  22. addq $4, %rcx
  23. subq $-128, %rsi
  24. cmpq %rcx, %rdx
  25. jne LBB0_9
  26.  
  27. // clang -O3 -march=skylake
  28.  
  29. LBB0_9: ## =>This Inner Loop Header: Depth=1
  30. vaddps -224(%rsi), %ymm0, %ymm0
  31. vaddps -192(%rsi), %ymm0, %ymm0
  32. vaddps -160(%rsi), %ymm0, %ymm0
  33. vaddps -128(%rsi), %ymm0, %ymm0
  34. vaddps -96(%rsi), %ymm0, %ymm0
  35. vaddps -64(%rsi), %ymm0, %ymm0
  36. vaddps -32(%rsi), %ymm0, %ymm0
  37. vaddps (%rsi), %ymm0, %ymm0
  38. addq $8, %rcx
  39. addq $256, %rsi ## imm = 0x100
  40. cmpq %rcx, %rdx
  41. jne LBB0_9
Add Comment
Please, Sign In to add comment