Advertisement
Guest User

Untitled

a guest
Jan 4th, 2011
183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.55 KB | None | 0 0
  1. --- c:\testproj\test\test\test.cpp ---------------------------------------------
  2. void func( __m128i out[4], __m128i h[8] )
  3. {
  4. 00AA1000 push ebx
  5. 00AA1001 mov ebx,esp
  6. 00AA1003 sub esp,8
  7. 00AA1006 and esp,0FFFFFFF0h
  8. 00AA1009 add esp,4
  9. 00AA100C push ebp
  10. 00AA100D mov ebp,dword ptr [ebx+4]
  11. 00AA1010 mov dword ptr [esp+4],ebp
  12. 00AA1014 mov ebp,esp
  13. 00AA1016 sub esp,10Ch
  14. __m128i R4 = _mm_hadd_epi16( _mm_hadd_epi16( D4 * i0, D4 * i1 ), _mm_hadd_epi16( D4 * i2, D4 * i3 ) );
  15. 00AA101C push edi
  16. __m128i D0 = _mm_mulhrs_epi16( h[0], a );
  17. 00AA101D movdqa xmm5,xmmword ptr [__fmode+50h (0AA33C0h)]
  18. __m128i D1 = _mm_mulhrs_epi16( h[1], a );
  19. __m128i D2 = _mm_mulhrs_epi16( h[2], a );
  20. 00AA1025 movdqa xmm2,xmmword ptr [eax+20h]
  21. __m128i D3 = _mm_mulhrs_epi16( h[3], a );
  22. __m128i D4 = _mm_mulhrs_epi16( h[4], a );
  23. __m128i D5 = _mm_mulhrs_epi16( h[5], a );
  24. 00AA102A movdqa xmm4,xmmword ptr [eax+50h]
  25. 00AA102F movdqa xmm0,xmmword ptr [eax]
  26. 00AA1033 movdqa xmm1,xmmword ptr [eax+10h]
  27. 00AA1038 movdqa xmm3,xmmword ptr [eax+40h]
  28. 00AA103D pmulhrsw xmm1,xmm5
  29. 00AA1042 pmulhrsw xmm0,xmm5
  30. 00AA1047 pmulhrsw xmm2,xmm5
  31. __m128i D6 = _mm_mulhrs_epi16( h[6], a );
  32. __m128i D7 = _mm_mulhrs_epi16( h[7], a );
  33.  
  34. __m128i R0 = _mm_hadd_epi16( _mm_hadd_epi16( D0 * i0, D0 * i1 ), _mm_hadd_epi16( D0 * i2, D0 * i3 ) );
  35. 00AA104C movdqa xmm6,xmm0
  36. 00AA1050 pmulhrsw xmm4,xmm5
  37. 00AA1055 movdqa xmmword ptr [D2],xmm2
  38. 00AA105A movdqa xmm2,xmmword ptr [eax+30h]
  39. 00AA105F pmulhrsw xmm2,xmm5
  40. 00AA1064 pmulhrsw xmm3,xmm5
  41. 00AA1069 movdqa xmmword ptr [D5],xmm4
  42. 00AA106E movdqa xmm4,xmmword ptr [eax+60h]
  43. 00AA1073 movdqa xmm7,xmm0
  44. 00AA1077 pmullw xmm7,xmmword ptr [__fmode+10h (0AA3380h)]
  45. 00AA107F pmulhrsw xmm4,xmm5
  46. 00AA1084 movdqa xmm5,xmmword ptr [__fmode+30h (0AA33A0h)]
  47. 00AA108C pmullw xmm6,xmm5
  48. 00AA1090 movdqa xmmword ptr [expectedValue],xmm6
  49. 00AA1095 movdqa xmm6,xmm0
  50. 00AA1099 pmullw xmm6,xmmword ptr [__fmode+20h (0AA3390h)]
  51. 00AA10A1 phaddw xmm6,xmmword ptr [expectedValue]
  52. 00AA10A7 movdqa xmm6,xmmword ptr [__fmode+40h (0AA33B0h)]
  53. 00AA10AF pmullw xmm0,xmm6
  54. 00AA10B3 phaddw xmm7,xmm0
  55. 00AA10B8 movdqa xmm0,xmmword ptr [expectedValue]
  56. 00AA10BD phaddw xmm0,xmm7
  57. 00AA10C2 movdqa xmmword ptr [R0],xmm0
  58. __m128i R1 = _mm_hadd_epi16( _mm_hadd_epi16( D1 * i0, D1 * i1 ), _mm_hadd_epi16( D1 * i2, D1 * i3 ) );
  59. 00AA10C7 movdqa xmm0,xmm1
  60. 00AA10CB movdqa xmm7,xmm1
  61. 00AA10CF pmullw xmm0,xmm5
  62. 00AA10D3 movdqa xmmword ptr [expectedValue],xmm0
  63. 00AA10D8 movdqa xmm0,xmm1
  64. 00AA10DC pmullw xmm0,xmmword ptr [__fmode+20h (0AA3390h)]
  65. 00AA10E4 phaddw xmm0,xmmword ptr [expectedValue]
  66. 00AA10EA movdqa xmm0,xmmword ptr [__fmode+10h (0AA3380h)]
  67. 00AA10F2 pmullw xmm7,xmm0
  68. 00AA10F6 pmullw xmm1,xmm6
  69. 00AA10FA phaddw xmm7,xmm1
  70. 00AA10FF movdqa xmm1,xmmword ptr [expectedValue]
  71. 00AA1104 phaddw xmm1,xmm7
  72. 00AA1109 movdqa xmmword ptr [R1],xmm1
  73. __m128i R2 = _mm_hadd_epi16( _mm_hadd_epi16( D2 * i0, D2 * i1 ), _mm_hadd_epi16( D2 * i2, D2 * i3 ) );
  74. 00AA1111 movdqa xmm1,xmmword ptr [__fmode+20h (0AA3390h)]
  75. 00AA1119 movdqa xmmword ptr [ebp-70h],xmm1
  76. __m128i R3 = _mm_hadd_epi16( _mm_hadd_epi16( D3 * i0, D3 * i1 ), _mm_hadd_epi16( D3 * i2, D3 * i3 ) );
  77. 00AA111E movdqa xmm7,xmm2
  78. 00AA1122 pmullw xmm7,xmm5
  79. 00AA1126 movdqa xmmword ptr [expectedValue],xmm7
  80. 00AA112B movdqa xmm7,xmm2
  81. 00AA112F pmullw xmm7,xmm1
  82. 00AA1133 movdqa xmm1,xmmword ptr [expectedValue]
  83. 00AA1138 phaddw xmm1,xmm7
  84. 00AA113D movdqa xmm7,xmm2
  85. 00AA1141 pmullw xmm7,xmm0
  86. 00AA1145 pmullw xmm2,xmm6
  87. 00AA1149 phaddw xmm7,xmm2
  88. __m128i R4 = _mm_hadd_epi16( _mm_hadd_epi16( D4 * i0, D4 * i1 ), _mm_hadd_epi16( D4 * i2, D4 * i3 ) );
  89. 00AA114E movdqa xmm2,xmmword ptr [__fmode+20h (0AA3390h)]
  90. 00AA1156 phaddw xmm1,xmm7
  91. 00AA115B movdqa xmmword ptr [R3],xmm1
  92. 00AA1163 movdqa xmm1,xmm3
  93. 00AA1167 movdqa xmm7,xmm3
  94. 00AA116B pmullw xmm7,xmm2
  95. 00AA116F pmullw xmm1,xmm5
  96. 00AA1173 phaddw xmm1,xmm7
  97. 00AA1178 movdqa xmm7,xmm3
  98. 00AA117C pmullw xmm7,xmm0
  99. 00AA1180 pmullw xmm3,xmm6
  100. 00AA1184 phaddw xmm7,xmm3
  101. 00AA1189 phaddw xmm1,xmm7
  102. 00AA118E movdqa xmmword ptr [ebp-30h],xmm6
  103. 00AA1193 movdqa xmmword ptr [ebp-0F0h],xmm0
  104. 00AA119B movdqa xmmword ptr [ebp-0B0h],xmm5
  105. 00AA11A3 movdqa xmmword ptr [R4],xmm1
  106. __m128i R5 = _mm_hadd_epi16( _mm_hadd_epi16( D5 * i0, D5 * i1 ), _mm_hadd_epi16( D5 * i2, D5 * i3 ) );
  107. 00AA11A8 movdqa xmmword ptr [ebp-0E0h],xmm6
  108. 00AA11B0 movdqa xmmword ptr [ebp-0C0h],xmm0
  109. 00AA11B8 movdqa xmmword ptr [ebp-0A0h],xmm2
  110. 00AA11C0 movdqa xmmword ptr [ebp-80h],xmm5
  111. __m128i R6 = _mm_hadd_epi16( _mm_hadd_epi16( D6 * i0, D6 * i1 ), _mm_hadd_epi16( D6 * i2, D6 * i3 ) );
  112.  
  113. __m128i expectedValue = _mm_set1_epi16( 3 );
  114. show( expectedValue );
  115. 00AA11C5 mov edi,dword ptr [__imp__printf (0AA20A0h)]
  116. 00AA11CB movdqa xmm3,xmm4
  117. 00AA11CF pmullw xmm3,xmm2
  118. 00AA11D3 movdqa xmm2,xmm4
  119. 00AA11D7 pmullw xmm2,xmm0
  120. 00AA11DB movdqa xmm0,xmmword ptr ds:[0AA2130h]
  121. 00AA11E3 movdqa xmmword ptr [expectedValue],xmm0
  122. 00AA11E8 movzx eax,word ptr [expectedValue]
  123. 00AA11EC movzx ecx,word ptr [ebp-0Eh]
  124. 00AA11F0 movzx edx,word ptr [ebp-0Ch]
  125. 00AA11F4 push eax
  126. 00AA11F5 movzx eax,word ptr [ebp-0Ah]
  127. 00AA11F9 push ecx
  128. 00AA11FA movzx ecx,word ptr [ebp-8]
  129. 00AA11FE push edx
  130. 00AA11FF movzx edx,word ptr [ebp-6]
  131. 00AA1203 push eax
  132. 00AA1204 movzx eax,word ptr [ebp-4]
  133. 00AA1208 push ecx
  134. 00AA1209 movzx ecx,word ptr [ebp-2]
  135. 00AA120D push edx
  136. 00AA120E push eax
  137. 00AA120F movdqa xmm1,xmm4
  138. 00AA1213 pmullw xmm1,xmm5
  139. 00AA1217 pmullw xmm4,xmm6
  140. 00AA121B push ecx
  141. 00AA121C phaddw xmm1,xmm3
  142. 00AA1221 phaddw xmm2,xmm4
  143. 00AA1226 phaddw xmm1,xmm2
  144. 00AA122B push offset string "%04X %04X %04X %04X %04X %04X %0"... (0AA2104h)
  145. 00AA1230 movdqa xmmword ptr [R6],xmm1
  146. 00AA1238 call edi
  147. show( R0 );
  148. 00AA123A movzx edx,word ptr [R0]
  149. 00AA123E movzx eax,word ptr [ebp-1Eh]
  150. 00AA1242 movzx ecx,word ptr [ebp-1Ch]
  151. 00AA1246 add esp,24h
  152. 00AA1249 push edx
  153. 00AA124A movzx edx,word ptr [ebp-1Ah]
  154. 00AA124E push eax
  155. 00AA124F movzx eax,word ptr [ebp-18h]
  156. 00AA1253 push ecx
  157. 00AA1254 movzx ecx,word ptr [ebp-16h]
  158. 00AA1258 push edx
  159. 00AA1259 movzx edx,word ptr [ebp-14h]
  160. 00AA125D push eax
  161. 00AA125E movzx eax,word ptr [ebp-12h]
  162. 00AA1262 push ecx
  163. 00AA1263 push edx
  164. 00AA1264 push eax
  165. 00AA1265 push offset string "%04X %04X %04X %04X %04X %04X %0"... (0AA2104h)
  166. 00AA126A call edi
  167. 00AA126C movdqa xmm1,xmmword ptr [D2]
  168.  
  169. out[0] = R0 * R1 * R2 * R3 * R4 * R5;
  170. 00AA1271 movdqa xmm0,xmmword ptr [R0]
  171. 00AA1276 movdqa xmm2,xmmword ptr [R1]
  172. 00AA127E movdqa xmm3,xmm1
  173. 00AA1282 pmullw xmm3,xmmword ptr [ebp-0B0h]
  174. 00AA128A movdqa xmm4,xmm1
  175. 00AA128E pmullw xmm4,xmmword ptr [ebp-70h]
  176. 00AA1293 phaddw xmm3,xmm4
  177. 00AA1298 movdqa xmm4,xmm1
  178. 00AA129C pmullw xmm1,xmmword ptr [ebp-30h]
  179. 00AA12A1 pmullw xmm4,xmmword ptr [ebp-0F0h]
  180. 00AA12A9 phaddw xmm4,xmm1
  181. 00AA12AE movdqa xmm1,xmmword ptr [D5]
  182. 00AA12B3 movdqa xmm5,xmm1
  183. 00AA12B7 pmullw xmm5,xmmword ptr [ebp-80h]
  184. 00AA12BC movdqa xmm6,xmm1
  185. 00AA12C0 pmullw xmm6,xmmword ptr [ebp-0A0h]
  186. 00AA12C8 pmullw xmm0,xmm2
  187. 00AA12CC phaddw xmm3,xmm4
  188. 00AA12D1 movdqa xmm4,xmmword ptr [R3]
  189. 00AA12D9 phaddw xmm5,xmm6
  190. 00AA12DE pmullw xmm0,xmm3
  191. 00AA12E2 movdqa xmm3,xmmword ptr [R4]
  192. 00AA12E7 movdqa xmm6,xmm1
  193. 00AA12EB pmullw xmm6,xmmword ptr [ebp-0C0h]
  194. 00AA12F3 pmullw xmm1,xmmword ptr [ebp-0E0h]
  195. 00AA12FB pmullw xmm0,xmm4
  196. 00AA12FF add esp,24h
  197. 00AA1302 pmullw xmm0,xmm3
  198. 00AA1306 phaddw xmm6,xmm1
  199. 00AA130B phaddw xmm5,xmm6
  200. 00AA1310 pmullw xmm0,xmm5
  201. 00AA1314 movdqa xmmword ptr [esi],xmm0
  202. out[1] = R3;
  203. out[2] = R4 * R6;
  204. 00AA1318 movdqa xmm0,xmmword ptr [R6]
  205. 00AA1320 pmullw xmm3,xmm0
  206. out[3] = R1 * R6;
  207. 00AA1324 pmullw xmm2,xmm0
  208. 00AA1328 movdqa xmmword ptr [esi+10h],xmm4
  209. 00AA132D movdqa xmmword ptr [esi+20h],xmm3
  210. 00AA1332 movdqa xmmword ptr [esi+30h],xmm2
  211. }
  212. 00AA1337 pop edi
  213. 00AA1338 mov esp,ebp
  214. 00AA133A pop ebp
  215. 00AA133B mov esp,ebx
  216. 00AA133D pop ebx
  217. 00AA133E ret
  218. --- No source file -------------------------------------------------------------
  219. 00AA133F int 3
  220. --- c:\testproj\test\test\test.cpp ---------------------------------------------
  221.  
  222. int main()
  223. {
  224. 00AA1340 push ebp
  225. 00AA1341 mov ebp,esp
  226. 00AA1343 and esp,0FFFFFFF0h
  227. 00AA1346 sub esp,0CCh
  228. const __m128i zero = _mm_setzero_si128();
  229. __m128i h[8] = { _mm_set_epi16( 0, 0, 0, 6, 0, 0, 0, 6 ), zero, zero, zero, zero, zero, zero, zero };
  230. 00AA134C movdqa xmm1,xmmword ptr ds:[0AA2140h]
  231. 00AA1354 pxor xmm0,xmm0
  232. 00AA1358 push esi
  233. __m128i out[4];
  234. func( out, h );
  235. 00AA1359 lea eax,[esp+10h]
  236. 00AA135D lea esi,[esp+90h]
  237. 00AA1364 movdqa xmmword ptr [esp+10h],xmm1
  238. 00AA136A movdqa xmmword ptr [esp+20h],xmm0
  239. 00AA1370 movdqa xmmword ptr [esp+30h],xmm0
  240. 00AA1376 movdqa xmmword ptr [esp+40h],xmm0
  241. 00AA137C movdqa xmmword ptr [esp+50h],xmm0
  242. 00AA1382 movdqa xmmword ptr [esp+60h],xmm0
  243. 00AA1388 movdqa xmmword ptr [esp+70h],xmm0
  244. 00AA138E movdqa xmmword ptr [esp+80h],xmm0
  245. 00AA1397 call func (0AA1000h)
  246. return 0;
  247. 00AA139C xor eax,eax
  248. }
  249. 00AA139E pop esi
  250. 00AA139F mov esp,ebp
  251. 00AA13A1 pop ebp
  252. 00AA13A2 ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement