Advertisement
Guest User

Untitled

a guest
Mar 4th, 2017
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.46 KB | None | 0 0
  1. 0000000000400550 <main>:
  2. //sets up the arrays and fields
  3. 400550: a9bd7bfd stp x29, x30, [sp,#-48]! // backup registers 29 and 30 onto the stack
  4. 400554: 910003fd mov x29, sp // move current sp into register x29
  5. 400558: d281f400 mov x0, #0xfa0 // #4000 (allocation size)
  6. 40055c: a90153f3 stp x19, x20, [sp,#16] // backup some more registers
  7. 400560: a9025bf5 stp x21, x22, [sp,#32] // even more backing up (can't this be done more effeciantly?
  8. 400564: 97ffffdf bl 4004e0 <malloc@plt> //allocate arr1
  9. 400568: aa0003f4 mov x20, x0 // move pointer for arr1 into register x20
  10. 40056c: d281f400 mov x0, #0xfa0 // #4000
  11. 400570: 97ffffdc bl 4004e0 <malloc@plt> // allocate arr2
  12. 400574: aa0003f5 mov x21, x0 // move pointer for arr2 into register x21
  13. 400578: d281f400 mov x0, #0xfa0 // #4000
  14. 40057c: 97ffffd9 bl 4004e0 <malloc@plt> // allocate sum array
  15. 400580: aa0003f6 mov x22, x0 // move sum pointer into register x22
  16. //start of arr1 filling loop
  17. 400584: d2800013 mov x19, #0x0 // #0 (offset to current int)
  18. 400588: 97ffffde bl 400500 <rand@plt> // generate a random number
  19. 40058c: b8336a80 str w0, [x20,x19] // set value for arr1[counter]
  20. 400590: 91001273 add x19, x19, #0x4 // increment counter by 4 (one int in size)
  21. 400594: f13e827f cmp x19, #0xfa0 // compare to 4000
  22. 400598: 54ffff81 b.ne 400588 <main+0x38> // jump to start of loop
  23. //start of arr2 filling loop
  24. 40059c: d2800013 mov x19, #0x0 // #0 (reset offset)
  25. 4005a0: 97ffffd8 bl 400500 <rand@plt> // generate random #
  26. 4005a4: b8336aa0 str w0, [x21,x19] // set value for arr2[counter]
  27. 4005a8: 91001273 add x19, x19, #0x4 // increment counter by 4 (one int)
  28. 4005ac: f13e827f cmp x19, #0xfa0 // compare to 4000
  29. 4005b0: 54ffff81 b.ne 4005a0 <main+0x50> // jump if not equals (I assume ne is less costly then a less then)
  30.  
  31. //these ops are assumed to do alignment stuff for vector code
  32. 4005b4: d3420e82 ubfx x2, x20, #2, #2 // x20 is arr1
  33. 4005b8: cb0203e2 neg x2, x2
  34. 4005bc: 92400442 and x2, x2, #0x3
  35. 4005c0: b4000f62 cbz x2, 4007ac <main+0x25c> // pushes 1000 into x7, and copies x2 into x8. Then jumps to 40060c
  36.  
  37. 4005c4: b9400281 ldr w1, [x20] // gets arr1[0]
  38. 4005c8: b94002a0 ldr w0, [x21] // gets arr2[0]
  39. 4005cc: f100045f cmp x2, #0x1 // compares remainder to 1?
  40. 4005d0: 0b000020 add w0, w1, w0 // adds the two integers
  41. 4005d4: b90002c0 str w0, [x22] // sets sum[0]
  42. 4005d8: 54000fe9 b.ls 4007d4 <main+0x284> // jump if lower or same as 1, if so it pushes 999 into x7, and sets x2 to 1. Then jumps to 40060c
  43. 4005dc: b9400681 ldr w1, [x20,#4] // gets arr1[1]
  44. 4005e0: b94006a0 ldr w0, [x21,#4] // gets arr2[1]
  45. 4005e4: f100085f cmp x2, #0x2 // compares remainder to 2
  46. 4005e8: 0b000020 add w0, w1, w0 // adds the two integers
  47. 4005ec: b90006c0 str w0, [x22,#4] // sets sum[1]
  48. 4005f0: 54000f89 b.ls 4007e0 <main+0x290> // jump if lower or same as 2, if so it pushes 998 into x7, and sets x2 to 2. Then jumps to 40060c
  49. 4005f4: b9400a81 ldr w1, [x20,#8] // gets arr1[2]
  50. 4005f8: b9400aa0 ldr w0, [x21,#8] // gets arr2[2]
  51. 4005fc: d2807ca7 mov x7, #0x3e5 // #997
  52. 400600: 0b000020 add w0, w1, w0 // adds two integers
  53. 400604: b9000ac0 str w0, [x22,#8] // sets sum[2]
  54. 400608: d2800068 mov x8, #0x3 // #3
  55.  
  56. //end of basic alignment stuff, start of vectorized summing
  57. 40060c: d2807d06 mov x6, #0x3e8 // #1000 set x6 to total # of ints
  58. 400610: cb0200c6 sub x6, x6, x2 // subtract processed count from total
  59. 400614: d342fcc0 lsr x0, x6, #2 // logical shift right, bottom two bits
  60. 400618: d37ef442 lsl x2, x2, #2 // logical shift left, bottom two bits
  61. 40061c: 8b020284 add x4, x20, x2 // calculate the aligned number for arr1
  62. 400620: 8b0202a3 add x3, x21, x2 // calaulate the aligned position for arr2
  63. 400624: d37ef405 lsl x5, x0, #2
  64. 400628: 8b0202c2 add x2, x22, x2
  65. 40062c: d2800001 mov x1, #0x0 // #0
  66. 400630: 4cdf7861 ld1 {v1.4s}, [x3], #16
  67. 400634: 4cdf7880 ld1 {v0.4s}, [x4], #16
  68. 400638: 4ea08420 add v0.4s, v1.4s, v0.4s
  69. 40063c: 91000421 add x1, x1, #0x1
  70. 400640: 4c9f7840 st1 {v0.4s}, [x2], #16
  71. 400644: eb00003f cmp x1, x0
  72. 400648: 54ffff43 b.cc 400630 <main+0xe0>
  73. 40064c: eb0500df cmp x6, x5
  74. 400650: 8b050101 add x1, x8, x5
  75. 400654: cb0500e5 sub x5, x7, x5
  76. 400658: 540002c0 b.eq 4006b0 <main+0x160>
  77. 40065c: d37ef420 lsl x0, x1, #2
  78. 400660: b8606a83 ldr w3, [x20,x0]
  79. 400664: b8606aa2 ldr w2, [x21,x0]
  80. 400668: f10004bf cmp x5, #0x1
  81. 40066c: 0b020062 add w2, w3, w2
  82. 400670: b8206ac2 str w2, [x22,x0]
  83. 400674: 91000420 add x0, x1, #0x1
  84. 400678: 540001c0 b.eq 4006b0 <main+0x160>
  85. 40067c: d37ef400 lsl x0, x0, #2
  86. 400680: b8606a83 ldr w3, [x20,x0]
  87. 400684: b8606aa2 ldr w2, [x21,x0]
  88. 400688: f10008bf cmp x5, #0x2
  89. 40068c: 0b020062 add w2, w3, w2
  90. 400690: b8206ac2 str w2, [x22,x0]
  91. 400694: 91000820 add x0, x1, #0x2
  92. 400698: 540000c0 b.eq 4006b0 <main+0x160>
  93. 40069c: d37ef400 lsl x0, x0, #2
  94. 4006a0: b8606a82 ldr w2, [x20,x0]
  95. 4006a4: b8606aa1 ldr w1, [x21,x0]
  96. 4006a8: 0b010041 add w1, w2, w1
  97. 4006ac: b8206ac1 str w1, [x22,x0]
  98. 4006b0: d3420ec2 ubfx x2, x22, #2, #2
  99. 4006b4: cb0203e2 neg x2, x2
  100. 4006b8: 92400442 and x2, x2, #0x3
  101. 4006bc: b40007e2 cbz x2, 4007b8 <main+0x268>
  102. 4006c0: f100045f cmp x2, #0x1
  103. 4006c4: b98002c3 ldrsw x3, [x22]
  104. 4006c8: 54000929 b.ls 4007ec <main+0x29c>
  105. 4006cc: b98006c0 ldrsw x0, [x22,#4]
  106. 4006d0: f100085f cmp x2, #0x2
  107. 4006d4: 8b000063 add x3, x3, x0
  108. 4006d8: 54000789 b.ls 4007c8 <main+0x278>
  109. 4006dc: b9800ac0 ldrsw x0, [x22,#8]
  110. 4006e0: d2807ca6 mov x6, #0x3e5 // #997
  111. 4006e4: 8b000063 add x3, x3, x0
  112. 4006e8: d2800067 mov x7, #0x3 // #3
  113. 4006ec: d2807d05 mov x5, #0x3e8 // #1000
  114. 4006f0: cb0200a5 sub x5, x5, x2
  115. 4006f4: d342fca0 lsr x0, x5, #2
  116. 4006f8: d37ef404 lsl x4, x0, #2
  117. 4006fc: 8b020ac2 add x2, x22, x2, lsl #2
  118. 400700: d2800001 mov x1, #0x0 // #0
  119. 400704: 4f000401 movi v1.4s, #0x0
  120. 400708: 4cdf7840 ld1 {v0.4s}, [x2], #16
  121. 40070c: 0f20a402 sxtl v2.2d, v0.2s
  122. 400710: 91000421 add x1, x1, #0x1
  123. 400714: 4ee18441 add v1.2d, v2.2d, v1.2d
  124. 400718: 4f20a400 sxtl2 v0.2d, v0.4s
  125. 40071c: eb01001f cmp x0, x1
  126. 400720: 4ee18401 add v1.2d, v0.2d, v1.2d
  127. 400724: 54ffff28 b.hi 400708 <main+0x1b8>
  128. 400728: 5ef1b821 addp d1, v1.2d
  129. 40072c: 4e083c21 mov x1, v1.d[0]
  130. 400730: 4e183c22 mov x2, v1.d[1]
  131. 400734: eb0400bf cmp x5, x4
  132. 400738: 8b0400e0 add x0, x7, x4
  133. 40073c: 8b010061 add x1, x3, x1
  134. 400740: cb0400c4 sub x4, x6, x4
  135. 400744: 540001a0 b.eq 400778 <main+0x228>
  136. 400748: b8a07ac2 ldrsw x2, [x22,x0,lsl #2]
  137. 40074c: f100049f cmp x4, #0x1
  138. 400750: 8b020021 add x1, x1, x2
  139. 400754: 91000402 add x2, x0, #0x1
  140. 400758: 54000100 b.eq 400778 <main+0x228>
  141. 40075c: b8a27ac2 ldrsw x2, [x22,x2,lsl #2]
  142. 400760: f100089f cmp x4, #0x2
  143. 400764: 8b020021 add x1, x1, x2
  144. 400768: 91000800 add x0, x0, #0x2
  145. 40076c: 54000060 b.eq 400778 <main+0x228>
  146. 400770: b8a07ac0 ldrsw x0, [x22,x0,lsl #2]
  147. 400774: 8b000021 add x1, x1, x0
  148. 400778: 90000000 adrp x0, 400000 <_init-0x4a8>
  149. 40077c: 9127e000 add x0, x0, #0x9f8
  150. 400780: 97ffff70 bl 400540 <printf@plt>
  151. 400784: aa1403e0 mov x0, x20
  152. 400788: 97ffff6a bl 400530 <free@plt>
  153. 40078c: aa1503e0 mov x0, x21
  154. 400790: 97ffff68 bl 400530 <free@plt>
  155. 400794: aa1603e0 mov x0, x22
  156. 400798: 97ffff66 bl 400530 <free@plt>
  157. 40079c: a94153f3 ldp x19, x20, [sp,#16]
  158. 4007a0: a9425bf5 ldp x21, x22, [sp,#32]
  159. 4007a4: a8c37bfd ldp x29, x30, [sp],#48
  160. 4007a8: d65f03c0 ret
  161.  
  162. 4007ac: d2807d07 mov x7, #0x3e8 // #1000
  163. 4007b0: aa0203e8 mov x8, x2
  164. 4007b4: 17ffff96 b 40060c <main+0xbc>
  165.  
  166. 4007b8: d2807d06 mov x6, #0x3e8 // #1000
  167. 4007bc: aa0203e7 mov x7, x2
  168. 4007c0: aa0203e3 mov x3, x2
  169. 4007c4: 17ffffca b 4006ec <main+0x19c>
  170.  
  171. 4007c8: d2807cc6 mov x6, #0x3e6 // #998
  172. 4007cc: d2800047 mov x7, #0x2 // #2
  173. 4007d0: 17ffffc7 b 4006ec <main+0x19c>
  174.  
  175. 4007d4: d2807ce7 mov x7, #0x3e7 // #999
  176. 4007d8: d2800028 mov x8, #0x1 // #1
  177. 4007dc: 17ffff8c b 40060c <main+0xbc>
  178.  
  179. 4007e0: d2807cc7 mov x7, #0x3e6 // #998
  180. 4007e4: d2800048 mov x8, #0x2 // #2
  181. 4007e8: 17ffff89 b 40060c <main+0xbc>
  182.  
  183. 4007ec: d2807ce6 mov x6, #0x3e7 // #999
  184. 4007f0: d2800027 mov x7, #0x1 // #1
  185. 4007f4: 17ffffbe b 4006ec <main+0x19c>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement