Advertisement
Guest User

Untitled

a guest
Jun 2nd, 2013
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.51 KB | None | 0 0
  1. 0x0000000000bcec8b: mov rdi,rbx
  2. 0x0000000000bcec8e: push rbx
  3. 0x0000000000bcec8f: mov ecx,edi
  4. 0x0000000000bcec91: xor ebx,ebx
  5. 0x0000000000bcec93: add ecx,0x7
  6. 0x0000000000bcec96: xor eax,eax
  7. 0x0000000000bcec98: and ecx,0xfffffff8
  8. 0x0000000000bcec9b: sub ecx,edi
  9. 0x0000000000bcec9d: je 0xbcecb2
  10. => 0x0000000000bcec9f: mov al,BYTE PTR [edi+ebx*1]
  11. 0x0000000000bceca3: add al,BYTE PTR [esi+ebx*1]
  12. 0x0000000000bceca7: inc ebx
  13. 0x0000000000bceca9: cmp ebx,ecx
  14. 0x0000000000bcecab: mov BYTE PTR [edi+ebx*1-0x1],al
  15. 0x0000000000bcecb0: jb 0xbcec9f
  16. 0x0000000000bcecb2: mov ecx,edx
  17. 0x0000000000bcecb4: sub edx,ebx
  18. 0x0000000000bcecb6: and edx,0x3f
  19. 0x0000000000bcecb9: sub ecx,edx
  20. 0x0000000000bcecbb: movq mm1,QWORD PTR [esi+ebx*1]
  21. 0x0000000000bcecc0: movq mm0,QWORD PTR [edi+ebx*1]
  22. 0x0000000000bcecc5: movq mm3,QWORD PTR [esi+ebx*1+0x8]
  23. 0x0000000000bceccb: paddb mm0,mm1
  24. 0x0000000000bcecce: movq mm2,QWORD PTR [edi+ebx*1+0x8]
  25. 0x0000000000bcecd4: movq QWORD PTR [edi+ebx*1],mm0
  26. 0x0000000000bcecd9: paddb mm2,mm3
  27. 0x0000000000bcecdc: movq mm5,QWORD PTR [esi+ebx*1+0x10]
  28.  
  29.  
  30. // Optimized code for PNG Up filter decoder
  31. void /* PRIVATE */
  32. png_read_filter_row_mmx_up(png_row_infop row_info, png_bytep row,
  33. png_bytep prev_row)
  34. {
  35. png_uint_32 len;
  36. len = row_info->rowbytes; // # of bytes to filter
  37. _asm {
  38. mov edi, row
  39. // get # of bytes to alignment
  40. mov ecx, edi
  41. xor ebx, ebx
  42. add ecx, 0x7
  43. xor eax, eax
  44. and ecx, 0xfffffff8
  45. mov esi, prev_row
  46. sub ecx, edi
  47. jz dupgo
  48. // fix alignment
  49. duplp1:
  50. mov al, [edi+ebx]
  51. add al, [esi+ebx]
  52. inc ebx
  53. cmp ebx, ecx
  54. mov [edi + ebx-1], al // mov does not affect flags; -1 to offset inc ebx
  55. jb duplp1
  56. dupgo:
  57. mov ecx, len
  58. mov edx, ecx
  59. sub edx, ebx // subtract alignment fix
  60. and edx, 0x0000003f // calc bytes over mult of 64
  61. sub ecx, edx // drop over bytes from length
  62. // Unrolled loop - use all MMX registers and interleave to reduce
  63. // number of branch instructions (loops) and reduce partial stalls
  64. duploop:
  65. movq mm1, [esi+ebx]
  66. movq mm0, [edi+ebx]
  67. movq mm3, [esi+ebx+8]
  68. paddb mm0, mm1
  69. movq mm2, [edi+ebx+8]
  70. movq [edi+ebx], mm0
  71. paddb mm2, mm3
  72. movq mm5, [esi+ebx+16]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement