Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 0x0000000000bcec8b: mov rdi,rbx
- 0x0000000000bcec8e: push rbx
- 0x0000000000bcec8f: mov ecx,edi
- 0x0000000000bcec91: xor ebx,ebx
- 0x0000000000bcec93: add ecx,0x7
- 0x0000000000bcec96: xor eax,eax
- 0x0000000000bcec98: and ecx,0xfffffff8
- 0x0000000000bcec9b: sub ecx,edi
- 0x0000000000bcec9d: je 0xbcecb2
- => 0x0000000000bcec9f: mov al,BYTE PTR [edi+ebx*1]
- 0x0000000000bceca3: add al,BYTE PTR [esi+ebx*1]
- 0x0000000000bceca7: inc ebx
- 0x0000000000bceca9: cmp ebx,ecx
- 0x0000000000bcecab: mov BYTE PTR [edi+ebx*1-0x1],al
- 0x0000000000bcecb0: jb 0xbcec9f
- 0x0000000000bcecb2: mov ecx,edx
- 0x0000000000bcecb4: sub edx,ebx
- 0x0000000000bcecb6: and edx,0x3f
- 0x0000000000bcecb9: sub ecx,edx
- 0x0000000000bcecbb: movq mm1,QWORD PTR [esi+ebx*1]
- 0x0000000000bcecc0: movq mm0,QWORD PTR [edi+ebx*1]
- 0x0000000000bcecc5: movq mm3,QWORD PTR [esi+ebx*1+0x8]
- 0x0000000000bceccb: paddb mm0,mm1
- 0x0000000000bcecce: movq mm2,QWORD PTR [edi+ebx*1+0x8]
- 0x0000000000bcecd4: movq QWORD PTR [edi+ebx*1],mm0
- 0x0000000000bcecd9: paddb mm2,mm3
- 0x0000000000bcecdc: movq mm5,QWORD PTR [esi+ebx*1+0x10]
- // Optimized code for PNG Up filter decoder
- void /* PRIVATE */
- png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
- png_bytep prev_row)
- {
- png_uint_32 len;
- len = row_info->rowbytes; // # of bytes to filter
- _asm {
- mov edi, row
- // get # of bytes to alignment
- mov ecx, edi
- xor ebx, ebx
- add ecx, 0x7
- xor eax, eax
- and ecx, 0xfffffff8
- mov esi, prev_row
- sub ecx, edi
- jz dupgo
- // fix alignment
- duplp1:
- mov al, [edi+ebx]
- add al, [esi+ebx]
- inc ebx
- cmp ebx, ecx
- mov [edi + ebx-1], al // mov does not affect flags; -1 to offset inc ebx
- jb duplp1
- dupgo:
- mov ecx, len
- mov edx, ecx
- sub edx, ebx // subtract alignment fix
- and edx, 0x0000003f // calc bytes over mult of 64
- sub ecx, edx // drop over bytes from length
- // Unrolled loop - use all MMX registers and interleave to reduce
- // number of branch instructions (loops) and reduce partial stalls
- duploop:
- movq mm1, [esi+ebx]
- movq mm0, [edi+ebx]
- movq mm3, [esi+ebx+8]
- paddb mm0, mm1
- movq mm2, [edi+ebx+8]
- movq [edi+ebx], mm0
- paddb mm2, mm3
- movq mm5, [esi+ebx+16]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement