Advertisement
Willem142

20190709-CircleAssemblyTweaked

Jul 9th, 2019
355
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.13 KB | None | 0 0
  1. .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3062.0
  2. Raytracer2.Benchmarking.CircleTest.Intersection()
  3. for (int j = 0, l = n; j < l; j++)
  4. ^^^^^^^^^
  5. xor edi,edi
  6. for (int j = 0, l = n; j < l; j++)
  7. ^^^^^
  8. mov ebx,dword ptr [rsi+20h]
  9. for (int j = 0, l = n; j < l; j++)
  10. ^^^^^
  11. test ebx,ebx
  12. jle M00_L01
  13. rs[j].Intersect(c);
  14. ^^^^^^^^^^^^^^^^^^^
  15. M00_L00
  16.  
  17. mov rcx,qword ptr [rsi+8]
  18. cmp edi,dword ptr [rcx+8]
  19. jae 00007ffe`3aa65f8f
  20. movsxd rdx,edi
  21. lea rdx,[rdx+rdx*4]
  22. lea rcx,[rcx+rdx*4+10h]
  23. mov rdx,qword ptr [rsi+10h]
  24. call Raytracer2.Primitives.Infinite.Ray.Intersect(Raytracer2.Primitives.Circle)
  25. for (int j = 0, l = n; j < l; j++)
  26. ^^^
  27. inc edi
  28. cmp edi,ebx
  29. jl M00_L00
  30. }
  31. ^
  32. M00_L01
  33.  
  34. add rsp,20h
  35.  
  36. Raytracer2.Primitives.Infinite.Ray.Intersect(Raytracer2.Primitives.Circle)
  37. float dx = this.direction.X;
  38. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  39. lea rax,[rsi+8]
  40. mov rcx,rax
  41. vmovss xmm1,dword ptr [rcx]
  42. vmovss xmm2,dword ptr [rsi+4]
  43. cmp dword ptr [rsi],esi
  44. vmovss xmm3,dword ptr [rax+4]
  45. lea rax,[rdx+10h]
  46. mov rcx,rax
  47. vmovss xmm4,dword ptr [rcx]
  48. vmovss xmm5,dword ptr [rax+4]
  49. vmovss xmm6,dword ptr [rdx+8]
  50. vsubss xmm0,xmm0,xmm4
  51. vsubss xmm2,xmm2,xmm5
  52. float r = 2 * p * dx;
  53. ^^^^^^^^^^^^^^^^^^^^^
  54. vmovaps xmm4,xmm2
  55. vmulss xmm4,xmm4,dword ptr [00007ffe`3aa66110]
  56. vmulss xmm4,xmm4,xmm3
  57. vmovaps xmm5,xmm1
  58. vmulss xmm5,xmm5,xmm1
  59. vmulss xmm3,xmm3,xmm3
  60. vaddss xmm3,xmm3,xmm5
  61. vmovaps xmm5,xmm0
  62. vmulss xmm5,xmm5,xmm0
  63. vmulss xmm2,xmm2,xmm2
  64. vaddss xmm5,xmm5,xmm2
  65. vmulss xmm6,xmm6,xmm6
  66. vmovaps xmm2,xmm5
  67. vsubss xmm2,xmm2,xmm6
  68. vmulss xmm0,xmm0,dword ptr [00007ffe`3aa66114]
  69. vmulss xmm0,xmm0,xmm1
  70. vaddss xmm0,xmm0,xmm4
  71. vmovaps xmm1,xmm0
  72. vmulss xmm1,xmm1,xmm0
  73. vmovaps xmm4,xmm3
  74. vmulss xmm4,xmm4,dword ptr [00007ffe`3aa66118]
  75. vmulss xmm4,xmm4,xmm2
  76. vsubss xmm1,xmm1,xmm4
  77. vcvtss2sd xmm1,xmm1,xmm1
  78. vsqrtsd xmm1,xmm0,xmm1
  79. vcvtsd2ss xmm1,xmm1,xmm1
  80. vmovss xmm2,dword ptr [00007ffe`3aa6611c]
  81. vxorps xmm0,xmm0,xmm2
  82. vmovaps xmm2,xmm0
  83. vaddss xmm2,xmm2,xmm1
  84. vmulss xmm3,xmm3,dword ptr [00007ffe`3aa66120]
  85. vdivss xmm2,xmm2,xmm3
  86. vsubss xmm0,xmm0,xmm1
  87. vmovaps xmm1,xmm0
  88. vdivss xmm1,xmm1,xmm3
  89. vmovaps xmm0,xmm2
  90. call System.Math.Min(Single, Single)
  91. if(ti > 0 && ti < t)
  92. ^^^^^^^^^^^^^^^^^^^^
  93. vxorps xmm1,xmm1,xmm1
  94. vucomiss xmm0,xmm1
  95. jbe M01_L00
  96. vmovss xmm1,dword ptr [rsi+10h]
  97. vucomiss xmm1,xmm0
  98. jbe M01_L00
  99. t = ti;
  100. ^^^^^^^
  101. vmovss dword ptr [rsi+10h],xmm0
  102. return true;
  103. ^^^^^^^^^^^^
  104. mov eax,1
  105. vmovaps xmm6,xmmword ptr [rsp+20h]
  106. add rsp,30h
  107. pop rsi
  108. ret
  109. return false;
  110. ^^^^^^^^^^^^^
  111. M01_L00
  112.  
  113. xor eax,eax
  114.  
  115. .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3062.0
  116. Raytracer2.Benchmarking.CircleTest.IntersectionPacked()
  117. for (int j = 0, l = n / 8; j < l; j++)
  118. ^^^^^^^^^
  119. xor edi,edi
  120. for (int j = 0, l = n / 8; j < l; j++)
  121. ^^^^^^^^^
  122. mov edx,dword ptr [rsi+20h]
  123. mov ecx,edx
  124. sar ecx,1Fh
  125. and ecx,7
  126. add ecx,edx
  127. mov ebx,ecx
  128. sar ebx,3
  129. for (int j = 0, l = n / 8; j < l; j++)
  130. ^^^^^
  131. test ebx,ebx
  132. jle M00_L01
  133. lea rbp,[rsi+28h]
  134. rps[j].Intersect(cp);
  135. ^^^^^^^^^^^^^^^^^^^^^
  136. M00_L00
  137.  
  138. mov rdx,qword ptr [rsi+18h]
  139. cmp edi,dword ptr [rdx+8]
  140. jae 00007ffe`3aa75fd7
  141. movsxd rcx,edi
  142. imul rcx,rcx,0A0h
  143. lea r14,[rdx+rcx+10h]
  144. lea r15,[rsp+80h]
  145. lea rcx,[rsp+20h]
  146. mov rdx,rbp
  147. mov r8d,60h
  148. call clr+0x1350
  149. mov rcx,r14
  150. mov rdx,r15
  151. lea r8,[rsp+20h]
  152. call Raytracer2.Primitives.Infinite.RayPack.Intersect(Raytracer2.Primitives.CirclePack)
  153. for (int j = 0, l = n / 8; j < l; j++)
  154. ^^^
  155. inc edi
  156. cmp edi,ebx
  157. jl M00_L00
  158. }
  159. ^
  160. M00_L01
  161.  
  162. add rsp,0A8h
  163.  
  164. Raytracer2.Primitives.Infinite.RayPack.Intersect(Raytracer2.Primitives.CirclePack)
  165. Vector zeros = Constants.zeros;
  166. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  167. mov rax,qword ptr [12CC5D10h]
  168. vmovupd ymm0,ymmword ptr [rax+8]
  169. Vector twos = Constants.twos;
  170. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  171. mov rax,qword ptr [12CC5D40h]
  172. vmovupd ymm1,ymmword ptr [rax+8]
  173. Vector fours = Constants.fours;
  174. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  175. mov rax,qword ptr [12CC5D48h]
  176. vmovupd ymm2,ymmword ptr [rax+8]
  177. Vector p = this.origin.x - other.origin.x; ;
  178. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  179. vmovupd ymm3,ymmword ptr [rcx]
  180. vmovupd ymm4,ymmword ptr [r8]
  181. vsubps ymm3,ymm3,ymm4
  182. Vector q = this.origin.y - other.origin.y;
  183. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  184. vmovupd ymm4,ymmword ptr [rcx+20h]
  185. vmovupd ymm5,ymmword ptr [r8+20h]
  186. vsubps ymm4,ymm4,ymm5
  187. Vector r = twos * p * this.direction.x;
  188. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  189. vmulps ymm5,ymm1,ymm3
  190. vmovupd ymm6,ymmword ptr [rcx+40h]
  191. vmulps ymm5,ymm5,ymm6
  192. Vector s = twos * q * this.direction.y; ;
  193. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  194. vmulps ymm6,ymm1,ymm4
  195. vmovupd ymm7,ymmword ptr [rcx+60h]
  196. vmulps ymm6,ymm6,ymm7
  197. Vector a = this.direction.x * this.direction.x + this.direction.y * this.direction.y;
  198. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  199. vmovupd ymm7,ymmword ptr [rcx+40h]
  200. vmovupd ymm8,ymmword ptr [rcx+40h]
  201. vmulps ymm7,ymm7,ymm8
  202. vmovupd ymm8,ymmword ptr [rcx+60h]
  203. vmovupd ymm9,ymmword ptr [rcx+60h]
  204. vmulps ymm8,ymm8,ymm9
  205. vaddps ymm7,ymm7,ymm8
  206. Vector b = r + s;
  207. ^^^^^^^^^^^^^^^^^^^^^^^^
  208. vmulps ymm3,ymm3,ymm3
  209. vmulps ymm4,ymm4,ymm4
  210. vaddps ymm3,ymm3,ymm4
  211. vmovupd ymm4,ymmword ptr [r8+40h]
  212. vmovupd ymm8,ymmword ptr [r8+40h]
  213. vmulps ymm4,ymm4,ymm8
  214. vsubps ymm3,ymm3,ymm4
  215. vaddps ymm4,ymm5,ymm6
  216. vmulps ymm5,ymm4,ymm4
  217. vmulps ymm2,ymm2,ymm7
  218. vmulps ymm2,ymm2,ymm3
  219. vsubps ymm3,ymm5,ymm2
  220. vmovaps ymm2,ymm0
  221. vcmpltps ymm2,ymm2,ymm3
  222. vsqrtps ymm3,ymm3
  223. vmulps ymm1,ymm1,ymm7
  224. vxorps ymm5,ymm5,ymm5
  225. vsubps ymm5,ymm5,ymm4
  226. vaddps ymm4,ymm5,ymm3
  227. vdivps ymm4,ymm4,ymm1
  228. vsubps ymm5,ymm5,ymm3
  229. vdivps ymm3,ymm5,ymm1
  230. Vector tm = Vector.ConditionalSelect(Vector.LessThan(t1, t0), t1, t0);
  231. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  232. vmovaps ymm1,ymm3
  233. vcmpltps ymm1,ymm1,ymm4
  234. vpand ymm1,ymm3,ymm1
  235. vcmpltps ymm3,ymm3,ymm4
  236. vpandn ymm3,ymm3,ymm4
  237. vpor ymm1,ymm1,ymm3
  238. Vector maskBiggerThanZero = Vector.GreaterThan(tm, zeros);
  239. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  240. vcmpltps ymm0,ymm0,ymm1
  241. Vector maskSmallerThanT = Vector.LessThan(tm, this.t);
  242. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  243. vmovupd ymm3,ymmword ptr [rcx+80h]
  244. vmovaps ymm4,ymm1
  245. vcmpltps ymm4,ymm4,ymm3
  246. vpand ymm0,ymm0,ymm4
  247. vpand ymm0,ymm2,ymm0
  248. this.t = Vector.ConditionalSelect(
  249. ^^^^^^^
  250. mask, // the bit mask that allows us to choose.
  251. ^^^^^^^
  252. tm, // the smallest of the t's.
  253. ^^^^^^^
  254. t); // if the bit mask is false (0), then we get our original t.
  255. ^^^^^^^
  256. vpand ymm1,ymm1,ymm0
  257. vmovupd ymm2,ymmword ptr [rcx+80h]
  258. vpandn ymm3,ymm0,ymm2
  259. vpor ymm1,ymm1,ymm3
  260. vmovupd ymmword ptr [rcx+80h],ymm1
  261. return mask;
  262. ^^^^^^^^^^^^
  263. vmovupd ymmword ptr [rdx],ymm0
  264. mov rax,rdx
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement