Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void Image::getConvolve(const WarpParams ¶ms)
- {
- 00409E60 push ebx
- 00409E61 mov ebx,esp
- 00409E63 sub esp,8
- 00409E66 and esp,0FFFFFFF0h
- 00409E69 add esp,4
- 00409E6C push ebp
- 00409E6D mov ebp,dword ptr [ebx+4]
- 00409E70 mov dword ptr [esp+4],ebp
- 00409E74 mov ebp,esp
- 00409E76 sub esp,98h
- 00409E7C push esi
- 00409E7D mov esi,dword ptr [ebx+8]
- 00409E80 push edi
- __declspec(align(16)) static float kernelX[4], kernelY[4];
- updateCoordMaps(params);
- 00409E81 mov edx,esi
- 00409E83 call cci::Image::updateCoordMaps (40AC30h)
- const Size2DI subSize = size();
- 00409E88 mov eax,dword ptr [esi+2Ch]
- 00409E8B mov ecx,dword ptr [eax+4]
- 00409E8E mov eax,dword ptr [eax]
- const size_t pxCount = subSize.count();
- const float
- *const mapX = mapX_.ptr<float>(),
- *const mapY = mapY_.ptr<float>(),
- *const img = image_->data().ptr<float>();
- 00409E90 mov edx,dword ptr [esi]
- 00409E92 imul eax,ecx
- 00409E95 mov edi,dword ptr [edx+14h]
- 00409E98 mov ecx,dword ptr [esi+4Ch]
- float *const subset = pixels_.ptr<float>(),
- x, y, xint, yint;
- const ptrdiff_t imgw = image_->width();
- 00409E9B mov edx,dword ptr [edx+2Ch]
- 00409E9E mov dword ptr [img],edi
- 00409EA1 mov edi,dword ptr [esi+14h]
- 00409EA4 mov dword ptr [subset],edi
- 00409EA7 mov edi,dword ptr [edx+4]
- 00409EAA mov dword ptr [mapX],ecx
- 00409EAD mov ecx,dword ptr [esi+84h]
- 00409EB3 mov dword ptr [imgw],edi
- ptrdiff_t imgoffs;
- __m128 v_px, v_kernX, v_kernY, v_val;
- for (size_t idx = 0; idx < pxCount; ++idx)
- 00409EB6 test eax,eax
- 00409EB8 je cci::Image::getConvolve+275h (40A0D5h)
- tap4Kernel(y, kernelY);
- 00409EBE movaps xmm0,xmmword ptr [cci::Image::CV_PIXEL_MAT+10h (411980h)]
- 00409EC5 mov edx,dword ptr [mapX]
- 00409EC8 movaps xmmword ptr [ebp-80h],xmm0
- 00409ECC movaps xmm0,xmmword ptr [cci::Image::CV_PIXEL_MAT+30h (4119A0h)]
- 00409ED3 movaps xmmword ptr [ebp-50h],xmm0
- 00409ED7 movaps xmm0,xmmword ptr [cci::Image::CV_PIXEL_MAT+50h (4119C0h)]
- 00409EDE sub edx,ecx
- 00409EE0 movaps xmmword ptr [ebp-60h],xmm0
- 00409EE4 movaps xmm0,xmmword ptr [cci::Image::CV_PIXEL_MAT+20h (411990h)]
- 00409EEB mov dword ptr [ebp-20h],edx
- 00409EEE mov edx,dword ptr [subset]
- 00409EF1 movaps xmmword ptr [ebp-70h],xmm0
- 00409EF5 movaps xmm0,xmmword ptr [cci::Image::CV_PIXEL_MAT+40h (4119B0h)]
- 00409EFC movaps xmmword ptr [ebp-90h],xmm0
- 00409F03 movaps xmm0,xmmword ptr [cci::Image::CV_PIXEL_MAT+60h (4119D0h)]
- 00409F0A sub edx,ecx
- 00409F0C movaps xmmword ptr [ebp-40h],xmm0
- 00409F10 mov esi,ecx
- 00409F12 mov dword ptr [ebp-0Ch],edx
- 00409F15 mov dword ptr [mapX],eax
- {
- x = modf(mapX[idx], &xint);
- 00409F18 mov eax,dword ptr [ebp-20h]
- 00409F1B fld dword ptr [eax+esi]
- 00409F1E lea ecx,[ebp-28h]
- 00409F21 fstp dword ptr [subset]
- 00409F24 push ecx
- 00409F25 fld dword ptr [subset]
- 00409F28 sub esp,8
- 00409F2B fstp qword ptr [esp]
- 00409F2E call dword ptr [__imp__modf (4111E8h)]
- 00409F34 fstp dword ptr [subset]
- 00409F37 add esp,0Ch
- 00409F3A fld dword ptr [subset]
- y = modf(mapY[idx], &yint);
- 00409F3D lea edx,[ebp-30h]
- 00409F40 fstp dword ptr [x]
- 00409F43 push edx
- 00409F44 fld dword ptr [esi]
- 00409F46 sub esp,8
- 00409F49 fstp dword ptr [subset]
- 00409F4C fld dword ptr [subset]
- 00409F4F fstp qword ptr [esp]
- 00409F52 call dword ptr [__imp__modf (4111E8h)]
- 00409F58 fstp dword ptr [subset]
- 00409F5B add esp,0Ch
- 00409F5E fld dword ptr [subset]
- 00409F61 fstp dword ptr [y]
- imgoffs = (((ptrdiff_t)yint - 1) * imgw) + (ptrdiff_t)xint - 1;
- 00409F64 fld qword ptr [ebp-30h]
- 00409F67 fstp dword ptr [subset]
- 00409F6A fld dword ptr [subset]
- 00409F6D call _ftol2_sse (40ECB0h)
- 00409F72 fld qword ptr [ebp-28h]
- 00409F75 dec eax
- 00409F76 fstp dword ptr [subset]
- 00409F79 fld dword ptr [subset]
- 00409F7C imul eax,edi
- 00409F7F mov dword ptr [subset],eax
- 00409F82 call _ftol2_sse (40ECB0h)
- tap4Kernel(x, kernelX);
- 00409F87 movaps xmm3,xmmword ptr [ebp-80h]
- 00409F8B movss xmm1,dword ptr [x]
- 00409F90 movaps xmm6,xmmword ptr [ebp-70h]
- 00409F94 mov ecx,dword ptr [subset]
- 00409F97 shufps xmm1,xmm1,0
- 00409F9B subps xmm1,xmmword ptr [ebp-50h]
- 00409F9F movaps xmm0,xmm3
- 00409FA2 andnps xmm0,xmm1
- 00409FA5 movaps xmm2,xmm0
- 00409FA8 mulps xmm2,xmm0
- 00409FAB movaps xmm1,xmm2
- 00409FAE mulps xmm1,xmm0
- 00409FB1 mulps xmm1,xmmword ptr [ebp-60h]
- 00409FB5 movaps xmm7,xmm6
- 00409FB8 mulps xmm7,xmm2
- 00409FBB addps xmm1,xmm7
- 00409FBE movaps xmm7,xmmword ptr [ebp-90h]
- 00409FC5 movaps xmm2,xmm7
- 00409FC8 mulps xmm2,xmm0
- 00409FCB addps xmm2,xmm1
- 00409FCE addps xmm2,xmmword ptr [ebp-40h]
- tap4Kernel(y, kernelY);
- 00409FD2 movss xmm1,dword ptr [y]
- 00409FD7 shufps xmm1,xmm1,0
- 00409FDB subps xmm1,xmmword ptr [ebp-50h]
- 00409FDF lea eax,[ecx+eax-1]
- 00409FE3 mov ecx,offset kernelX (416940h)
- 00409FE8 movaps xmmword ptr [ecx],xmm2
- v_kernX = _mm_load_ps(kernelX);
- v_kernY = _mm_set_ps1(kernelY[0]);
- v_px = _mm_loadu_ps(img + imgoffs);
- 00409FEB mov ecx,dword ptr [img]
- 00409FEE movaps xmm0,xmm3
- 00409FF1 andnps xmm0,xmm1
- 00409FF4 movaps xmm2,xmm0
- 00409FF7 mulps xmm2,xmm0
- 00409FFA movaps xmm1,xmm2
- 00409FFD mulps xmm1,xmm0
- 0040A000 mulps xmm1,xmmword ptr [ebp-60h]
- 0040A004 mulps xmm6,xmm2
- 0040A007 addps xmm1,xmm6
- 0040A00A mulps xmm7,xmm0
- 0040A00D addps xmm7,xmm1
- 0040A010 addps xmm7,xmmword ptr [ebp-40h]
- 0040A014 mov edx,offset kernelY (416930h)
- 0040A019 movaps xmmword ptr [edx],xmm7
- 0040A01C movaps xmm1,xmmword ptr [kernelX (416940h)]
- 0040A023 movups xmm2,xmmword ptr [ecx+eax*4]
- v_px = _mm_mul_ps(v_px, v_kernX);
- 0040A027 mulps xmm2,xmm1
- v_px = _mm_mul_ps(v_px, v_kernY);
- v_val = v_px;
- imgoffs += imgw;
- 0040A02A mov edi,dword ptr [imgw]
- 0040A02D movss xmm0,dword ptr [kernelY (416930h)]
- v_kernY = _mm_set_ps1(kernelY[1]);
- 0040A035 movss xmm3,dword ptr [kernelY+4 (416934h)]
- 0040A03D shufps xmm0,xmm0,0
- 0040A041 mulps xmm2,xmm0
- 0040A044 add eax,edi
- v_px = _mm_loadu_ps(img + imgoffs);
- 0040A046 movups xmm0,xmmword ptr [ecx+eax*4]
- 0040A04A shufps xmm3,xmm3,0
- v_px = _mm_mul_ps(v_px, v_kernX);
- 0040A04E mulps xmm0,xmm1
- v_px = _mm_mul_ps(v_px, v_kernY);
- 0040A051 mulps xmm0,xmm3
- v_val = _mm_add_ps(v_val, v_px);
- 0040A054 addps xmm0,xmm2
- imgoffs += imgw;
- v_kernY = _mm_set_ps1(kernelY[2]);
- 0040A057 movss xmm2,dword ptr [kernelY+8 (416938h)]
- 0040A05F add eax,edi
- v_px = _mm_loadu_ps(img + imgoffs);
- 0040A061 movups xmm3,xmmword ptr [ecx+eax*4]
- 0040A065 shufps xmm2,xmm2,0
- v_px = _mm_mul_ps(v_px, v_kernX);
- 0040A069 mulps xmm3,xmm1
- v_px = _mm_mul_ps(v_px, v_kernY);
- 0040A06C mulps xmm3,xmm2
- v_val = _mm_add_ps(v_val, v_px);
- imgoffs += imgw;
- v_kernY = _mm_set_ps1(kernelY[3]);
- v_px = _mm_loadu_ps(img + imgoffs);
- v_px = _mm_mul_ps(v_px, v_kernX);
- v_px = _mm_mul_ps(v_px, v_kernY);
- v_val = _mm_add_ps(v_val, v_px);
- imgoffs += imgw;
- subset[idx] = horizSum(v_val);
- 0040A06F add eax,edi
- 0040A071 movups xmm2,xmmword ptr [ecx+eax*4]
- 0040A075 mov eax,dword ptr [ebp-0Ch]
- 0040A078 addps xmm3,xmm0
- 0040A07B movss xmm0,dword ptr [kernelY+0Ch (41693Ch)]
- 0040A083 mulps xmm2,xmm1
- 0040A086 shufps xmm0,xmm0,0
- 0040A08A mov edx,offset `cci::Image::horizSum'::`2'::buf (416910h)
- 0040A08F mulps xmm2,xmm0
- 0040A092 addps xmm3,xmm2
- 0040A095 movaps xmmword ptr [edx],xmm3
- 0040A098 fld dword ptr [`cci::Image::horizSum'::`2'::buf+4 (416914h)]
- 0040A09E fadd dword ptr [`cci::Image::horizSum'::`2'::buf (416910h)]
- 0040A0A4 add esi,4
- 0040A0A7 dec dword ptr [mapX]
- 0040A0AA fadd dword ptr [`cci::Image::horizSum'::`2'::buf+8 (416918h)]
- 0040A0B0 fadd dword ptr [`cci::Image::horizSum'::`2'::buf+0Ch (41691Ch)]
- 0040A0B6 fstp dword ptr [eax+esi-4]
- 0040A0BA jne cci::Image::getConvolve+0B8h (409F18h)
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement