Advertisement
Willem142

20190709-CircleAssemblyOriginal

Jul 9th, 2019
293
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.30 KB | None | 0 0
  1. .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3062.0
  2. Raytracer2.Benchmarking.CircleTest.Intersection()
  3. for (int j = 0, l = n; j < l; j++)
  4. ^^^^^^^^^
  5. xor edi,edi
  6. for (int j = 0, l = n; j < l; j++)
  7. ^^^^^
  8. mov ebx,dword ptr [rsi+20h]
  9. for (int j = 0, l = n; j < l; j++)
  10. ^^^^^
  11. test ebx,ebx
  12. jle M00_L01
  13. rs[j].Intersect(c);
  14. ^^^^^^^^^^^^^^^^^^^
  15. M00_L00
  16.  
  17. mov rcx,qword ptr [rsi+8]
  18. cmp edi,dword ptr [rcx+8]
  19. jae 00007ffe`3aa65f8f
  20. movsxd rdx,edi
  21. lea rdx,[rdx+rdx*4]
  22. lea rcx,[rcx+rdx*4+10h]
  23. mov rdx,qword ptr [rsi+10h]
  24. call Raytracer2.Primitives.Infinite.Ray.Intersect(Raytracer2.Primitives.Circle)
  25. for (int j = 0, l = n; j < l; j++)
  26. ^^^
  27. inc edi
  28. cmp edi,ebx
  29. jl M00_L00
  30. }
  31. ^
  32. M00_L01
  33.  
  34. add rsp,20h
  35.  
  36. Raytracer2.Primitives.Infinite.Ray.Intersect(Raytracer2.Primitives.Circle)
  37. float dx = this.direction.X;
  38. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  39. lea rax,[rsi+8]
  40. mov rcx,rax
  41. vmovss xmm1,dword ptr [rcx]
  42. vmovss xmm2,dword ptr [rsi+4]
  43. cmp dword ptr [rsi],esi
  44. vmovss xmm3,dword ptr [rax+4]
  45. lea rax,[rdx+10h]
  46. mov rcx,rax
  47. vmovss xmm4,dword ptr [rcx]
  48. vmovss xmm5,dword ptr [rax+4]
  49. vmovss xmm6,dword ptr [rdx+8]
  50. vsubss xmm0,xmm0,xmm4
  51. vsubss xmm2,xmm2,xmm5
  52. float r = 2 * p * dx;
  53. ^^^^^^^^^^^^^^^^^^^^^
  54. vmovaps xmm4,xmm2
  55. vmulss xmm4,xmm4,dword ptr [00007ffe`3aa66138]
  56. vmulss xmm4,xmm4,xmm3
  57. vmovaps xmm5,xmm1
  58. vmulss xmm5,xmm5,xmm1
  59. vmulss xmm3,xmm3,xmm3
  60. vaddss xmm3,xmm3,xmm5
  61. vmovaps xmm5,xmm0
  62. vmulss xmm5,xmm5,dword ptr [00007ffe`3aa6613c]
  63. vmulss xmm5,xmm5,xmm1
  64. vmovaps xmm1,xmm5
  65. vaddss xmm1,xmm1,xmm4
  66. float c = p * p + q * q - cr * cr;
  67. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  68. vmulss xmm0,xmm0,xmm0
  69. vmulss xmm2,xmm2,xmm2
  70. vaddss xmm0,xmm0,xmm2
  71. vmulss xmm6,xmm6,xmm6
  72. vsubss xmm0,xmm0,xmm6
  73. float DSqrt = b * b - 4 * a * c;
  74. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  75. vmovaps xmm2,xmm1
  76. vmulss xmm2,xmm2,xmm1
  77. vmovaps xmm4,xmm3
  78. vmulss xmm4,xmm4,dword ptr [00007ffe`3aa66140]
  79. vmulss xmm4,xmm4,xmm0
  80. vmovaps xmm0,xmm2
  81. vsubss xmm0,xmm0,xmm4
  82. if (DSqrt < 0)
  83. ^^^^^^^^^^^^^^
  84. vxorps xmm2,xmm2,xmm2
  85. vucomiss xmm2,xmm0
  86. jbe M01_L00
  87. { return false; }
  88. ^^^^^^^^^^^^^
  89. xor eax,eax
  90. vmovaps xmm6,xmmword ptr [rsp+20h]
  91. add rsp,30h
  92. pop rsi
  93. ret
  94. float D = (float)Math.Sqrt(DSqrt);
  95. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  96. M01_L00
  97.  
  98. vcvtss2sd xmm0,xmm0,xmm0
  99. vsqrtsd xmm0,xmm0,xmm0
  100. vcvtsd2ss xmm0,xmm0,xmm0
  101. float t0 = (-b + D) / (2 * a);
  102. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  103. vmovss xmm2,dword ptr [00007ffe`3aa66144]
  104. vxorps xmm2,xmm2,xmm1
  105. vaddss xmm2,xmm2,xmm0
  106. vmulss xmm3,xmm3,dword ptr [00007ffe`3aa66148]
  107. vdivss xmm2,xmm2,xmm3
  108. float t1 = (-b - D) / (2 * a);
  109. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  110. vmovss xmm4,dword ptr [00007ffe`3aa66144]
  111. vxorps xmm1,xmm1,xmm4
  112. vsubss xmm1,xmm1,xmm0
  113. vdivss xmm1,xmm1,xmm3
  114. vmovaps xmm0,xmm2
  115. call System.Math.Min(Single, Single)
  116. if(ti > 0 && ti < t)
  117. ^^^^^^^^^^^^^^^^^^^^
  118. vxorps xmm1,xmm1,xmm1
  119. vucomiss xmm0,xmm1
  120. jbe M01_L01
  121. vmovss xmm1,dword ptr [rsi+10h]
  122. vucomiss xmm1,xmm0
  123. jbe M01_L01
  124. t = ti;
  125. ^^^^^^^
  126. vmovss dword ptr [rsi+10h],xmm0
  127. return true;
  128. ^^^^^^^^^^^^
  129. mov eax,1
  130. vmovaps xmm6,xmmword ptr [rsp+20h]
  131. add rsp,30h
  132. pop rsi
  133. ret
  134. return false;
  135. ^^^^^^^^^^^^^
  136. M01_L01
  137.  
  138. xor eax,eax
  139.  
  140. .NET Framework 4.7.2 (CLR 4.0.30319.42000), 64bit RyuJIT-v4.7.3062.0
  141. Raytracer2.Benchmarking.CircleTest.IntersectionPacked()
  142. for (int j = 0, l = n / 8; j < l; j++)
  143. ^^^^^^^^^
  144. xor edi,edi
  145. for (int j = 0, l = n / 8; j < l; j++)
  146. ^^^^^^^^^
  147. mov edx,dword ptr [rsi+20h]
  148. mov ecx,edx
  149. sar ecx,1Fh
  150. and ecx,7
  151. add ecx,edx
  152. mov ebx,ecx
  153. sar ebx,3
  154. for (int j = 0, l = n / 8; j < l; j++)
  155. ^^^^^
  156. test ebx,ebx
  157. jle M00_L01
  158. lea rbp,[rsi+28h]
  159. rps[j].Intersect(cp);
  160. ^^^^^^^^^^^^^^^^^^^^^
  161. M00_L00
  162.  
  163. mov rdx,qword ptr [rsi+18h]
  164. cmp edi,dword ptr [rdx+8]
  165. jae 00007ffe`3aa75fd7
  166. movsxd rcx,edi
  167. imul rcx,rcx,0A0h
  168. lea r14,[rdx+rcx+10h]
  169. lea r15,[rsp+80h]
  170. lea rcx,[rsp+20h]
  171. mov rdx,rbp
  172. mov r8d,60h
  173. call clr+0x1350
  174. mov rcx,r14
  175. mov rdx,r15
  176. lea r8,[rsp+20h]
  177. call Raytracer2.Primitives.Infinite.RayPack.Intersect(Raytracer2.Primitives.CirclePack)
  178. for (int j = 0, l = n / 8; j < l; j++)
  179. ^^^
  180. inc edi
  181. cmp edi,ebx
  182. jl M00_L00
  183. }
  184. ^
  185. M00_L01
  186.  
  187. add rsp,0A8h
  188.  
  189. Raytracer2.Primitives.Infinite.RayPack.Intersect(Raytracer2.Primitives.CirclePack)
  190. Vector ox = this.origin.x;
  191. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  192. vmovupd ymm1,ymmword ptr [rsi]
  193. Vector dx = this.direction.x;
  194. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  195. vmovupd ymm6,ymmword ptr [rsi+40h]
  196. Vector oy = this.origin.y;
  197. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  198. vmovupd ymm0,ymmword ptr [rsi+20h]
  199. vmovupd ymm7,ymmword ptr [rsi+60h]
  200. vmovupd ymm2,ymmword ptr [r8]
  201. vmovupd ymm3,ymmword ptr [r8+20h]
  202. vmovupd ymm8,ymmword ptr [r8+40h]
  203. vsubps ymm9,ymm1,ymm2
  204. vsubps ymm10,ymm0,ymm3
  205. Vector r = 2 * p * dx;
  206. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  207. lea rcx,[rsp+0E0h]
  208. vmovupd ymmword ptr [rsp+40h],ymm9
  209. vmovss xmm1,dword ptr [00007ffe`3aa76ab8]
  210. lea r8,[rsp+40h]
  211. vextractf128 xmm11,ymm9,1
  212. vextractf128 xmm12,ymm10,1
  213. vextractf128 xmm13,ymm6,1
  214. vextractf128 xmm14,ymm7,1
  215. vextractf128 xmm15,ymm8,1
  216. call System.Numerics.Vector`1[[System.Single, mscorlib]].op_Multiply(Single, System.Numerics.Vector`1)
  217. vinsertf128 ymm8,ymm8,xmm15,1
  218. vinsertf128 ymm7,ymm7,xmm14,1
  219. vinsertf128 ymm6,ymm6,xmm13,1
  220. vinsertf128 ymm10,ymm10,xmm12,1
  221. vinsertf128 ymm9,ymm9,xmm11,1
  222. lea rcx,[rsp+0C0h]
  223. vmovupd ymmword ptr [rsp+40h],ymm10
  224. vmovss xmm1,dword ptr [00007ffe`3aa76abc]
  225. lea r8,[rsp+40h]
  226. vextractf128 xmm11,ymm9,1
  227. vextractf128 xmm12,ymm10,1
  228. vextractf128 xmm13,ymm6,1
  229. vextractf128 xmm14,ymm7,1
  230. vextractf128 xmm15,ymm8,1
  231. call System.Numerics.Vector`1[[System.Single, mscorlib]].op_Multiply(Single, System.Numerics.Vector`1)
  232. vinsertf128 ymm8,ymm8,xmm15,1
  233. vinsertf128 ymm7,ymm7,xmm14,1
  234. vinsertf128 ymm6,ymm6,xmm13,1
  235. vinsertf128 ymm10,ymm10,xmm12,1
  236. vinsertf128 ymm9,ymm9,xmm11,1
  237. vmovupd ymm1,ymmword ptr [rsp+0C0h]
  238. vmulps ymm1,ymm1,ymm7
  239. vmulps ymm0,ymm6,ymm6
  240. vmulps ymm7,ymm7,ymm7
  241. vaddps ymm7,ymm0,ymm7
  242. vmulps ymm9,ymm9,ymm9
  243. vmulps ymm10,ymm10,ymm10
  244. vaddps ymm9,ymm9,ymm10
  245. vmulps ymm8,ymm8,ymm8
  246. vsubps ymm9,ymm9,ymm8
  247. vmovupd ymm0,ymmword ptr [rsp+0E0h]
  248. vmulps ymm0,ymm0,ymm6
  249. vaddps ymm6,ymm0,ymm1
  250. lea rcx,[rsp+0A0h]
  251. vmovupd ymmword ptr [rsp+40h],ymm7
  252. vmovss xmm1,dword ptr [00007ffe`3aa76ac0]
  253. lea r8,[rsp+40h]
  254. vextractf128 xmm8,ymm6,1
  255. vextractf128 xmm10,ymm7,1
  256. vextractf128 xmm11,ymm9,1
  257. call System.Numerics.Vector`1[[System.Single, mscorlib]].op_Multiply(Single, System.Numerics.Vector`1)
  258. vinsertf128 ymm9,ymm9,xmm11,1
  259. vinsertf128 ymm7,ymm7,xmm10,1
  260. vinsertf128 ymm6,ymm6,xmm8,1
  261. vmulps ymm1,ymm6,ymm6
  262. vmovupd ymm0,ymmword ptr [rsp+0A0h]
  263. vmulps ymm0,ymm0,ymm9
  264. vsubps ymm1,ymm1,ymm0
  265. mov rcx,qword ptr [13465D10h]
  266. vmovupd ymm0,ymmword ptr [rcx+8]
  267. vmovaps ymm8,ymm0
  268. vcmpltps ymm8,ymm8,ymm1
  269. vsqrtps ymm9,ymm1
  270. vxorps ymm1,ymm1,ymm1
  271. vsubps ymm10,ymm1,ymm6
  272. lea rcx,[rsp+80h]
  273. vmovupd ymmword ptr [rsp+40h],ymm7
  274. vmovss xmm1,dword ptr [00007ffe`3aa76ac4]
  275. lea r8,[rsp+40h]
  276. vextractf128 xmm11,ymm6,1
  277. vextractf128 xmm12,ymm7,1
  278. vextractf128 xmm13,ymm10,1
  279. vextractf128 xmm14,ymm9,1
  280. vextractf128 xmm15,ymm8,1
  281. call System.Numerics.Vector`1[[System.Single, mscorlib]].op_Multiply(Single, System.Numerics.Vector`1)
  282. vinsertf128 ymm8,ymm8,xmm15,1
  283. vinsertf128 ymm9,ymm9,xmm14,1
  284. vinsertf128 ymm10,ymm10,xmm13,1
  285. vinsertf128 ymm7,ymm7,xmm12,1
  286. vinsertf128 ymm6,ymm6,xmm11,1
  287. vaddps ymm10,ymm10,ymm9
  288. vmovupd ymm1,ymmword ptr [rsp+80h]
  289. vdivps ymm10,ymm10,ymm1
  290. vxorps ymm1,ymm1,ymm1
  291. vsubps ymm1,ymm1,ymm6
  292. vsubps ymm6,ymm1,ymm9
  293. lea rcx,[rsp+60h]
  294. vmovupd ymmword ptr [rsp+40h],ymm7
  295. vmovss xmm1,dword ptr [00007ffe`3aa76ac8]
  296. lea r8,[rsp+40h]
  297. vextractf128 xmm7,ymm10,1
  298. vextractf128 xmm9,ymm6,1
  299. vextractf128 xmm11,ymm8,1
  300. call System.Numerics.Vector`1[[System.Single, mscorlib]].op_Multiply(Single, System.Numerics.Vector`1)
  301. vinsertf128 ymm8,ymm8,xmm11,1
  302. vinsertf128 ymm6,ymm6,xmm9,1
  303. vinsertf128 ymm10,ymm10,xmm7,1
  304. vmovupd ymm0,ymmword ptr [rsp+60h]
  305. vdivps ymm1,ymm6,ymm0
  306. Vector tm = Vector.ConditionalSelect(Vector.LessThan(t1, t0), t1, t0);
  307. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  308. vmovaps ymm0,ymm1
  309. vcmpltps ymm0,ymm0,ymm10
  310. vpand ymm0,ymm1,ymm0
  311. vcmpltps ymm1,ymm1,ymm10
  312. vpandn ymm1,ymm1,ymm10
  313. vpor ymm0,ymm0,ymm1
  314. Vector maskBiggerThanZero = Vector.GreaterThan(tm, Constants.zeros);
  315. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  316. mov rax,qword ptr [13465D10h]
  317. vmovupd ymm1,ymmword ptr [rax+8]
  318. vcmpltps ymm1,ymm1,ymm0
  319. Vector maskSmallerThanT = Vector.LessThan(tm, this.t);
  320. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  321. vmovupd ymm2,ymmword ptr [rsi+80h]
  322. vmovaps ymm3,ymm0
  323. vcmpltps ymm3,ymm3,ymm2
  324. vpand ymm1,ymm1,ymm3
  325. vpand ymm1,ymm8,ymm1
  326. this.t = Vector.ConditionalSelect(
  327. ^^^^^^^
  328. mask, // the bit mask that allows us to choose.
  329. ^^^^^^^
  330. tm, // the smallest of the t's.
  331. ^^^^^^^
  332. t); // if the bit mask is false (0), then we get our original t.
  333. ^^^^^^^
  334. vpand ymm0,ymm0,ymm1
  335. vmovupd ymm2,ymmword ptr [rsi+80h]
  336. vpandn ymm3,ymm1,ymm2
  337. vpor ymm0,ymm0,ymm3
  338. vmovupd ymmword ptr [rsi+80h],ymm0
  339. return mask;
  340. ^^^^^^^^^^^^
  341. vmovupd ymmword ptr [rdi],ymm1
  342. mov rax,rdi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement