Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- --- c:\testproj\test\test\test.cpp ---------------------------------------------
- void func( __m128i out[4], __m128i h[8] )
- {
- 00AA1000 push ebx
- 00AA1001 mov ebx,esp
- 00AA1003 sub esp,8
- 00AA1006 and esp,0FFFFFFF0h
- 00AA1009 add esp,4
- 00AA100C push ebp
- 00AA100D mov ebp,dword ptr [ebx+4]
- 00AA1010 mov dword ptr [esp+4],ebp
- 00AA1014 mov ebp,esp
- 00AA1016 sub esp,10Ch
- __m128i R4 = _mm_hadd_epi16( _mm_hadd_epi16( D4 * i0, D4 * i1 ), _mm_hadd_epi16( D4 * i2, D4 * i3 ) );
- 00AA101C push edi
- __m128i D0 = _mm_mulhrs_epi16( h[0], a );
- 00AA101D movdqa xmm5,xmmword ptr [__fmode+50h (0AA33C0h)]
- __m128i D1 = _mm_mulhrs_epi16( h[1], a );
- __m128i D2 = _mm_mulhrs_epi16( h[2], a );
- 00AA1025 movdqa xmm2,xmmword ptr [eax+20h]
- __m128i D3 = _mm_mulhrs_epi16( h[3], a );
- __m128i D4 = _mm_mulhrs_epi16( h[4], a );
- __m128i D5 = _mm_mulhrs_epi16( h[5], a );
- 00AA102A movdqa xmm4,xmmword ptr [eax+50h]
- 00AA102F movdqa xmm0,xmmword ptr [eax]
- 00AA1033 movdqa xmm1,xmmword ptr [eax+10h]
- 00AA1038 movdqa xmm3,xmmword ptr [eax+40h]
- 00AA103D pmulhrsw xmm1,xmm5
- 00AA1042 pmulhrsw xmm0,xmm5
- 00AA1047 pmulhrsw xmm2,xmm5
- __m128i D6 = _mm_mulhrs_epi16( h[6], a );
- __m128i D7 = _mm_mulhrs_epi16( h[7], a );
- __m128i R0 = _mm_hadd_epi16( _mm_hadd_epi16( D0 * i0, D0 * i1 ), _mm_hadd_epi16( D0 * i2, D0 * i3 ) );
- 00AA104C movdqa xmm6,xmm0
- 00AA1050 pmulhrsw xmm4,xmm5
- 00AA1055 movdqa xmmword ptr [D2],xmm2
- 00AA105A movdqa xmm2,xmmword ptr [eax+30h]
- 00AA105F pmulhrsw xmm2,xmm5
- 00AA1064 pmulhrsw xmm3,xmm5
- 00AA1069 movdqa xmmword ptr [D5],xmm4
- 00AA106E movdqa xmm4,xmmword ptr [eax+60h]
- 00AA1073 movdqa xmm7,xmm0
- 00AA1077 pmullw xmm7,xmmword ptr [__fmode+10h (0AA3380h)]
- 00AA107F pmulhrsw xmm4,xmm5
- 00AA1084 movdqa xmm5,xmmword ptr [__fmode+30h (0AA33A0h)]
- 00AA108C pmullw xmm6,xmm5
- 00AA1090 movdqa xmmword ptr [expectedValue],xmm6
- 00AA1095 movdqa xmm6,xmm0
- 00AA1099 pmullw xmm6,xmmword ptr [__fmode+20h (0AA3390h)]
- 00AA10A1 phaddw xmm6,xmmword ptr [expectedValue]
- 00AA10A7 movdqa xmm6,xmmword ptr [__fmode+40h (0AA33B0h)]
- 00AA10AF pmullw xmm0,xmm6
- 00AA10B3 phaddw xmm7,xmm0
- 00AA10B8 movdqa xmm0,xmmword ptr [expectedValue]
- 00AA10BD phaddw xmm0,xmm7
- 00AA10C2 movdqa xmmword ptr [R0],xmm0
- __m128i R1 = _mm_hadd_epi16( _mm_hadd_epi16( D1 * i0, D1 * i1 ), _mm_hadd_epi16( D1 * i2, D1 * i3 ) );
- 00AA10C7 movdqa xmm0,xmm1
- 00AA10CB movdqa xmm7,xmm1
- 00AA10CF pmullw xmm0,xmm5
- 00AA10D3 movdqa xmmword ptr [expectedValue],xmm0
- 00AA10D8 movdqa xmm0,xmm1
- 00AA10DC pmullw xmm0,xmmword ptr [__fmode+20h (0AA3390h)]
- 00AA10E4 phaddw xmm0,xmmword ptr [expectedValue]
- 00AA10EA movdqa xmm0,xmmword ptr [__fmode+10h (0AA3380h)]
- 00AA10F2 pmullw xmm7,xmm0
- 00AA10F6 pmullw xmm1,xmm6
- 00AA10FA phaddw xmm7,xmm1
- 00AA10FF movdqa xmm1,xmmword ptr [expectedValue]
- 00AA1104 phaddw xmm1,xmm7
- 00AA1109 movdqa xmmword ptr [R1],xmm1
- __m128i R2 = _mm_hadd_epi16( _mm_hadd_epi16( D2 * i0, D2 * i1 ), _mm_hadd_epi16( D2 * i2, D2 * i3 ) );
- 00AA1111 movdqa xmm1,xmmword ptr [__fmode+20h (0AA3390h)]
- 00AA1119 movdqa xmmword ptr [ebp-70h],xmm1
- __m128i R3 = _mm_hadd_epi16( _mm_hadd_epi16( D3 * i0, D3 * i1 ), _mm_hadd_epi16( D3 * i2, D3 * i3 ) );
- 00AA111E movdqa xmm7,xmm2
- 00AA1122 pmullw xmm7,xmm5
- 00AA1126 movdqa xmmword ptr [expectedValue],xmm7
- 00AA112B movdqa xmm7,xmm2
- 00AA112F pmullw xmm7,xmm1
- 00AA1133 movdqa xmm1,xmmword ptr [expectedValue]
- 00AA1138 phaddw xmm1,xmm7
- 00AA113D movdqa xmm7,xmm2
- 00AA1141 pmullw xmm7,xmm0
- 00AA1145 pmullw xmm2,xmm6
- 00AA1149 phaddw xmm7,xmm2
- __m128i R4 = _mm_hadd_epi16( _mm_hadd_epi16( D4 * i0, D4 * i1 ), _mm_hadd_epi16( D4 * i2, D4 * i3 ) );
- 00AA114E movdqa xmm2,xmmword ptr [__fmode+20h (0AA3390h)]
- 00AA1156 phaddw xmm1,xmm7
- 00AA115B movdqa xmmword ptr [R3],xmm1
- 00AA1163 movdqa xmm1,xmm3
- 00AA1167 movdqa xmm7,xmm3
- 00AA116B pmullw xmm7,xmm2
- 00AA116F pmullw xmm1,xmm5
- 00AA1173 phaddw xmm1,xmm7
- 00AA1178 movdqa xmm7,xmm3
- 00AA117C pmullw xmm7,xmm0
- 00AA1180 pmullw xmm3,xmm6
- 00AA1184 phaddw xmm7,xmm3
- 00AA1189 phaddw xmm1,xmm7
- 00AA118E movdqa xmmword ptr [ebp-30h],xmm6
- 00AA1193 movdqa xmmword ptr [ebp-0F0h],xmm0
- 00AA119B movdqa xmmword ptr [ebp-0B0h],xmm5
- 00AA11A3 movdqa xmmword ptr [R4],xmm1
- __m128i R5 = _mm_hadd_epi16( _mm_hadd_epi16( D5 * i0, D5 * i1 ), _mm_hadd_epi16( D5 * i2, D5 * i3 ) );
- 00AA11A8 movdqa xmmword ptr [ebp-0E0h],xmm6
- 00AA11B0 movdqa xmmword ptr [ebp-0C0h],xmm0
- 00AA11B8 movdqa xmmword ptr [ebp-0A0h],xmm2
- 00AA11C0 movdqa xmmword ptr [ebp-80h],xmm5
- __m128i R6 = _mm_hadd_epi16( _mm_hadd_epi16( D6 * i0, D6 * i1 ), _mm_hadd_epi16( D6 * i2, D6 * i3 ) );
- __m128i expectedValue = _mm_set1_epi16( 3 );
- show( expectedValue );
- 00AA11C5 mov edi,dword ptr [__imp__printf (0AA20A0h)]
- 00AA11CB movdqa xmm3,xmm4
- 00AA11CF pmullw xmm3,xmm2
- 00AA11D3 movdqa xmm2,xmm4
- 00AA11D7 pmullw xmm2,xmm0
- 00AA11DB movdqa xmm0,xmmword ptr ds:[0AA2130h]
- 00AA11E3 movdqa xmmword ptr [expectedValue],xmm0
- 00AA11E8 movzx eax,word ptr [expectedValue]
- 00AA11EC movzx ecx,word ptr [ebp-0Eh]
- 00AA11F0 movzx edx,word ptr [ebp-0Ch]
- 00AA11F4 push eax
- 00AA11F5 movzx eax,word ptr [ebp-0Ah]
- 00AA11F9 push ecx
- 00AA11FA movzx ecx,word ptr [ebp-8]
- 00AA11FE push edx
- 00AA11FF movzx edx,word ptr [ebp-6]
- 00AA1203 push eax
- 00AA1204 movzx eax,word ptr [ebp-4]
- 00AA1208 push ecx
- 00AA1209 movzx ecx,word ptr [ebp-2]
- 00AA120D push edx
- 00AA120E push eax
- 00AA120F movdqa xmm1,xmm4
- 00AA1213 pmullw xmm1,xmm5
- 00AA1217 pmullw xmm4,xmm6
- 00AA121B push ecx
- 00AA121C phaddw xmm1,xmm3
- 00AA1221 phaddw xmm2,xmm4
- 00AA1226 phaddw xmm1,xmm2
- 00AA122B push offset string "%04X %04X %04X %04X %04X %04X %0"... (0AA2104h)
- 00AA1230 movdqa xmmword ptr [R6],xmm1
- 00AA1238 call edi
- show( R0 );
- 00AA123A movzx edx,word ptr [R0]
- 00AA123E movzx eax,word ptr [ebp-1Eh]
- 00AA1242 movzx ecx,word ptr [ebp-1Ch]
- 00AA1246 add esp,24h
- 00AA1249 push edx
- 00AA124A movzx edx,word ptr [ebp-1Ah]
- 00AA124E push eax
- 00AA124F movzx eax,word ptr [ebp-18h]
- 00AA1253 push ecx
- 00AA1254 movzx ecx,word ptr [ebp-16h]
- 00AA1258 push edx
- 00AA1259 movzx edx,word ptr [ebp-14h]
- 00AA125D push eax
- 00AA125E movzx eax,word ptr [ebp-12h]
- 00AA1262 push ecx
- 00AA1263 push edx
- 00AA1264 push eax
- 00AA1265 push offset string "%04X %04X %04X %04X %04X %04X %0"... (0AA2104h)
- 00AA126A call edi
- 00AA126C movdqa xmm1,xmmword ptr [D2]
- out[0] = R0 * R1 * R2 * R3 * R4 * R5;
- 00AA1271 movdqa xmm0,xmmword ptr [R0]
- 00AA1276 movdqa xmm2,xmmword ptr [R1]
- 00AA127E movdqa xmm3,xmm1
- 00AA1282 pmullw xmm3,xmmword ptr [ebp-0B0h]
- 00AA128A movdqa xmm4,xmm1
- 00AA128E pmullw xmm4,xmmword ptr [ebp-70h]
- 00AA1293 phaddw xmm3,xmm4
- 00AA1298 movdqa xmm4,xmm1
- 00AA129C pmullw xmm1,xmmword ptr [ebp-30h]
- 00AA12A1 pmullw xmm4,xmmword ptr [ebp-0F0h]
- 00AA12A9 phaddw xmm4,xmm1
- 00AA12AE movdqa xmm1,xmmword ptr [D5]
- 00AA12B3 movdqa xmm5,xmm1
- 00AA12B7 pmullw xmm5,xmmword ptr [ebp-80h]
- 00AA12BC movdqa xmm6,xmm1
- 00AA12C0 pmullw xmm6,xmmword ptr [ebp-0A0h]
- 00AA12C8 pmullw xmm0,xmm2
- 00AA12CC phaddw xmm3,xmm4
- 00AA12D1 movdqa xmm4,xmmword ptr [R3]
- 00AA12D9 phaddw xmm5,xmm6
- 00AA12DE pmullw xmm0,xmm3
- 00AA12E2 movdqa xmm3,xmmword ptr [R4]
- 00AA12E7 movdqa xmm6,xmm1
- 00AA12EB pmullw xmm6,xmmword ptr [ebp-0C0h]
- 00AA12F3 pmullw xmm1,xmmword ptr [ebp-0E0h]
- 00AA12FB pmullw xmm0,xmm4
- 00AA12FF add esp,24h
- 00AA1302 pmullw xmm0,xmm3
- 00AA1306 phaddw xmm6,xmm1
- 00AA130B phaddw xmm5,xmm6
- 00AA1310 pmullw xmm0,xmm5
- 00AA1314 movdqa xmmword ptr [esi],xmm0
- out[1] = R3;
- out[2] = R4 * R6;
- 00AA1318 movdqa xmm0,xmmword ptr [R6]
- 00AA1320 pmullw xmm3,xmm0
- out[3] = R1 * R6;
- 00AA1324 pmullw xmm2,xmm0
- 00AA1328 movdqa xmmword ptr [esi+10h],xmm4
- 00AA132D movdqa xmmword ptr [esi+20h],xmm3
- 00AA1332 movdqa xmmword ptr [esi+30h],xmm2
- }
- 00AA1337 pop edi
- 00AA1338 mov esp,ebp
- 00AA133A pop ebp
- 00AA133B mov esp,ebx
- 00AA133D pop ebx
- 00AA133E ret
- --- No source file -------------------------------------------------------------
- 00AA133F int 3
- --- c:\testproj\test\test\test.cpp ---------------------------------------------
- int main()
- {
- 00AA1340 push ebp
- 00AA1341 mov ebp,esp
- 00AA1343 and esp,0FFFFFFF0h
- 00AA1346 sub esp,0CCh
- const __m128i zero = _mm_setzero_si128();
- __m128i h[8] = { _mm_set_epi16( 0, 0, 0, 6, 0, 0, 0, 6 ), zero, zero, zero, zero, zero, zero, zero };
- 00AA134C movdqa xmm1,xmmword ptr ds:[0AA2140h]
- 00AA1354 pxor xmm0,xmm0
- 00AA1358 push esi
- __m128i out[4];
- func( out, h );
- 00AA1359 lea eax,[esp+10h]
- 00AA135D lea esi,[esp+90h]
- 00AA1364 movdqa xmmword ptr [esp+10h],xmm1
- 00AA136A movdqa xmmword ptr [esp+20h],xmm0
- 00AA1370 movdqa xmmword ptr [esp+30h],xmm0
- 00AA1376 movdqa xmmword ptr [esp+40h],xmm0
- 00AA137C movdqa xmmword ptr [esp+50h],xmm0
- 00AA1382 movdqa xmmword ptr [esp+60h],xmm0
- 00AA1388 movdqa xmmword ptr [esp+70h],xmm0
- 00AA138E movdqa xmmword ptr [esp+80h],xmm0
- 00AA1397 call func (0AA1000h)
- return 0;
- 00AA139C xor eax,eax
- }
- 00AA139E pop esi
- 00AA139F mov esp,ebp
- 00AA13A1 pop ebp
- 00AA13A2 ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement