Advertisement
Guest User

GCC assembly of SOR (with unrolling set to 2)

a guest
Feb 1st, 2015
287
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. 0000000000000030 <SOR_execute>:
  2. 30: 41 55 push %r13
  3. 32: c5 fb 10 15 00 00 00 vmovsd 0x0(%rip),%xmm2 # 3a <SOR_execute+0xa>
  4. 39: 00
  5. 3a: 85 c9 test %ecx,%ecx
  6. 3c: 41 54 push %r12
  7. 3e: c5 fb 59 0d 00 00 00 vmulsd 0x0(%rip),%xmm0,%xmm1 # 46 <SOR_execute+0x16>
  8. 45: 00
  9. 46: 41 89 cd mov %ecx,%r13d
  10. 49: 55 push %rbp
  11. 4a: c5 6b 5c e0 vsubsd %xmm0,%xmm2,%xmm12
  12. 4e: 53 push %rbx
  13. 4f: 0f 8e 4b 01 00 00 jle 1a0 <SOR_execute+0x170>
  14. 55: 89 fb mov %edi,%ebx
  15. 57: 89 f7 mov %esi,%edi
  16. 59: 48 89 d6 mov %rdx,%rsi
  17. 5c: 8d 43 fd lea -0x3(%rbx),%eax
  18. 5f: 45 31 e4 xor %r12d,%r12d
  19. 62: 8d 57 fd lea -0x3(%rdi),%edx
  20. 65: 48 8d 2c c5 08 00 00 lea 0x8(,%rax,8),%rbp
  21. 6c: 00
  22. 6d: 48 8d 14 d5 08 00 00 lea 0x8(,%rdx,8),%rdx
  23. 74: 00
  24. 75: 0f 1f 00 nopl (%rax)
  25. 78: 83 fb 02 cmp $0x2,%ebx
  26. 7b: 0f 8e 12 01 00 00 jle 193 <SOR_execute+0x163>
  27. 81: 31 c9 xor %ecx,%ecx
  28. 83: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
  29. 88: 83 ff 02 cmp $0x2,%edi
  30. 8b: 4c 8b 44 0e 08 mov 0x8(%rsi,%rcx,1),%r8
  31. 90: 4c 8b 1c 0e mov (%rsi,%rcx,1),%r11
  32. 94: 4c 8b 54 0e 10 mov 0x10(%rsi,%rcx,1),%r10
  33. 99: 0f 8e e7 00 00 00 jle 186 <SOR_execute+0x156>
  34. 9f: c4 c1 7b 10 43 08 vmovsd 0x8(%r11),%xmm0
  35. a5: 4c 8d 4a f8 lea -0x8(%rdx),%r9
  36. a9: b8 08 00 00 00 mov $0x8,%eax
  37. ae: c4 c1 1b 59 70 08 vmulsd 0x8(%r8),%xmm12,%xmm6
  38. b4: 49 c1 e9 03 shr $0x3,%r9
  39. b8: c4 c1 7b 58 5a 08 vaddsd 0x8(%r10),%xmm0,%xmm3
  40. be: 41 83 e1 01 and $0x1,%r9d
  41. c2: 48 39 d0 cmp %rdx,%rax
  42. c5: c4 c1 63 58 20 vaddsd (%r8),%xmm3,%xmm4
  43. ca: c4 c1 5b 58 68 10 vaddsd 0x10(%r8),%xmm4,%xmm5
  44. d0: c4 e2 c9 99 e9 vfmadd132sd %xmm1,%xmm6,%xmm5
  45. d5: c4 c1 7b 11 68 08 vmovsd %xmm5,0x8(%r8)
  46. db: 0f 84 a5 00 00 00 je 186 <SOR_execute+0x156>
  47. e1: 4d 85 c9 test %r9,%r9
  48. e4: 74 3a je 120 <SOR_execute+0xf0>
  49. e6: c4 c1 7b 10 7b 10 vmovsd 0x10(%r11),%xmm7
  50. ec: b8 10 00 00 00 mov $0x10,%eax
  51. f1: c4 41 1b 59 58 10 vmulsd 0x10(%r8),%xmm12,%xmm11
  52. f7: 48 39 d0 cmp %rdx,%rax
  53. fa: c4 41 43 58 42 10 vaddsd 0x10(%r10),%xmm7,%xmm8
  54. 100: c4 41 3b 58 48 08 vaddsd 0x8(%r8),%xmm8,%xmm9
  55. 106: c4 41 33 58 50 18 vaddsd 0x18(%r8),%xmm9,%xmm10
  56. 10c: c4 62 a1 99 d1 vfmadd132sd %xmm1,%xmm11,%xmm10
  57. 111: c4 41 7b 11 50 10 vmovsd %xmm10,0x10(%r8)
  58. 117: 74 6d je 186 <SOR_execute+0x156>
  59. 119: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
  60. 120: c4 41 7b 10 6c 03 08 vmovsd 0x8(%r11,%rax,1),%xmm13
  61. 127: c4 c1 1b 59 44 00 08 vmulsd 0x8(%r8,%rax,1),%xmm12,%xmm0
  62. 12e: c4 41 13 58 74 02 08 vaddsd 0x8(%r10,%rax,1),%xmm13,%xmm14
  63. 135: c4 c1 1b 59 7c 00 10 vmulsd 0x10(%r8,%rax,1),%xmm12,%xmm7
  64. 13c: c4 41 0b 58 3c 00 vaddsd (%r8,%rax,1),%xmm14,%xmm15
  65. 142: c4 c1 03 58 54 00 10 vaddsd 0x10(%r8,%rax,1),%xmm15,%xmm2
  66. 149: c4 e2 f9 99 d1 vfmadd132sd %xmm1,%xmm0,%xmm2
  67. 14e: c4 c1 7b 11 54 00 08 vmovsd %xmm2,0x8(%r8,%rax,1)
  68. 155: c4 c1 7b 10 5c 03 10 vmovsd 0x10(%r11,%rax,1),%xmm3
  69. 15c: c4 c1 63 58 64 02 10 vaddsd 0x10(%r10,%rax,1),%xmm3,%xmm4
  70. 163: c4 c1 5b 58 6c 00 08 vaddsd 0x8(%r8,%rax,1),%xmm4,%xmm5
  71. 16a: c4 c1 53 58 74 00 18 vaddsd 0x18(%r8,%rax,1),%xmm5,%xmm6
  72. 171: c4 e2 c1 99 f1 vfmadd132sd %xmm1,%xmm7,%xmm6
  73. 176: c4 c1 7b 11 74 00 10 vmovsd %xmm6,0x10(%r8,%rax,1)
  74. 17d: 48 83 c0 10 add $0x10,%rax
  75. 181: 48 39 d0 cmp %rdx,%rax
  76. 184: 75 9a jne 120 <SOR_execute+0xf0>
  77. 186: 48 83 c1 08 add $0x8,%rcx
  78. 18a: 48 39 e9 cmp %rbp,%rcx
  79. 18d: 0f 85 f5 fe ff ff jne 88 <SOR_execute+0x58>
  80. 193: 41 83 c4 01 add $0x1,%r12d
  81. 197: 45 39 ec cmp %r13d,%r12d
  82. 19a: 0f 85 d8 fe ff ff jne 78 <SOR_execute+0x48>
  83. 1a0: 5b pop %rbx
  84. 1a1: 5d pop %rbp
  85. 1a2: 41 5c pop %r12
  86. 1a4: 41 5d pop %r13
  87. 1a6: c3 retq
Advertisement
RAW Paste Data Copied
Advertisement