Advertisement
xiahanlu

bilinear_x

Sep 1st, 2018
203
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.61 KB | None | 0 0
  1. ;; simple bilinear interpolation for IDirect3DSurface9 - RGB32 (Alpha empty) MASM32
  2. ;; not mapper center point (SrcX=(dstX+0.5)* (srcWidth/dstWidth) -0.5 ) - moecmks .
  3. ;; TODO: more opt. In fact, the code is not very good.
  4. ;; settings .
  5. ;;
  6. MOD_SFT equ 16 ;; n:= 16
  7. MOD_MAP equ 00010000h ;; 2^16
  8. MOD_MASK equ 0000FFFFh
  9.  
  10. ;;
  11. ;;
  12. ;; prototype
  13.  
  14. ;; void __cdecl bilinear_x (uint32_t *dstPtr, intptr_t dstPitch,
  15. ;; uint32_t *srcPtr, intptr_t srcPitch,
  16. ;; intptr_t dstW, intptr_t dstH,
  17. ;; intptr_t srcW, intptr_t srcH)
  18.  
  19.  
  20. .686 ; create 32 bit code
  21. .mmx
  22. .xmm
  23. .model flat, c ; 32 bit memory model
  24. option casemap :none ; case sensitive
  25.  
  26. .code
  27. align 16
  28. bilinear_x proc c
  29. option prologue:none, epilogue:none
  30.  
  31. ;; stack arg base offset typedef .
  32. dstPtr equ 4*1
  33. dstPitch equ 4*2
  34. srcPtr equ 4*3
  35. srcPitch equ 4*4
  36. dstW equ 4*5
  37. dstH equ 4*6
  38. srcW equ 4*7
  39. srcH equ 4*8
  40.  
  41. ;; first, save ctx
  42. push edi
  43. push esi
  44. push ebx
  45. push ebp ;; stack += 16
  46.  
  47. stk_skip equ 16
  48.  
  49. movd mm0, esp
  50.  
  51. ;; calc vptr and rva pitch
  52. ;; for target pitch, we can get rva
  53. ;; source pitch can't
  54.  
  55. mov edi, stk_skip[esp+dstPtr] ;; edi !! dstPtr
  56. mov ebx, stk_skip[esp+dstPitch]
  57. mov eax, stk_skip[esp+dstW] ;; eax !! dstW
  58. mov ebp, stk_skip[esp+dstH] ;; ebp !! dstH
  59.  
  60. cmp eax, 0
  61. jle V_EXIT
  62. cmp ebp, 0
  63. jle V_EXIT
  64. test edi, edi
  65. je V_EXIT
  66.  
  67. lea edx, [eax*4]
  68. sub ebx, edx ;; ebx !! target pitch rva .
  69.  
  70. mov edx, stk_skip[esp+srcH] ;; edx !!srcH
  71. mov ecx, stk_skip[esp+srcW] ;; ecx !!srcW
  72. mov esi, stk_skip[esp+srcPtr] ;; esi !!srcPtr
  73.  
  74. cmp edx, 0
  75. jle V_EXIT
  76. cmp ecx, 0
  77. jle V_EXIT
  78. test esi, esi
  79. je V_EXIT
  80.  
  81. ;; eax <_ dstW
  82. ;; ebx <_ target pitch rva .
  83. ;; ecx <_ srcW
  84. ;; edx <_ srcH
  85. ;; esi <_ source ptr poll
  86. ;; edi <_ target ptr
  87. ;; ebp <_ dstH
  88. ;; esp <_ temp
  89.  
  90. movss xmm4, dword ptr stk_skip[esp+srcPitch]
  91. mov esp, MOD_MAP
  92.  
  93. cvtsi2sd xmm3, edx ;; srcH
  94. cvtsi2sd xmm5, esp ;; MOD_MAP
  95. cvtsi2sd xmm6, ebp ;; dstH
  96. divsd xmm3, xmm6 ;; srcH/ dstH
  97. mulsd xmm3, xmm5 ;; * MOD_MAP
  98. cvttpd2dq xmm3, xmm3 ;; cast(int) rt_delta_y:DWORD
  99.  
  100. cvtsi2sd xmm6, ecx ;; srcW
  101. cvtsi2sd xmm0, eax ;; dstW
  102. divsd xmm6, xmm0 ;; srcW/ dstW
  103. mulsd xmm6, xmm5 ;; * MOD_MAP
  104. cvttpd2dq xmm6, xmm6 ;; cast(int) rt_delta_x:DWORD
  105.  
  106. ;; xmm6 <_ rt_delta_x_with rev
  107. pxor xmm1, xmm1
  108. pxor xmm7, xmm7
  109. pcmpeqd xmm0, xmm0
  110. movq xmm1, xmm0
  111. movlhps xmm7, xmm0
  112. pshuflw xmm2, xmm6, 0
  113. pshufd xmm2, xmm2, 0
  114. movd esp, xmm6
  115. movdqa xmm6, xmm2
  116. movd xmm2, eax
  117. movd xmm1, esi
  118. movd xmm0, ebx
  119. pxor xmm6, xmm7
  120. pshufd xmm6, xmm6, 00AH
  121. movdqa xmm7, xmm7
  122. movlhps xmm0, xmm0
  123. pcmpeqq xmm5, xmm5
  124. psrlw xmm5, 15
  125. movhlps xmm5, xmm1
  126. pshufd xmm5, xmm5, 00AH
  127. paddw xmm6, xmm5
  128.  
  129. xor ebx, ebx
  130. xor ecx, ecx
  131. xor edx, edx
  132.  
  133. ;; eax <_ dstW
  134. ;; ebx <_ rt-total-X
  135. ;; ecx <_ rt-total-Y
  136. ;; edx <_ spare
  137. ;; esi <_ source ptr poll
  138. ;; edi <_ target ptr
  139. ;; ebp <_ dstH
  140. ;; esp <_ rt_delta_x
  141. ;;
  142. ;; xmm7 <_ rt_poll .
  143. ;; xmm6 <_ rt_delta_x_with rev
  144. ;; xmm5 <_ temp use
  145. ;; xmm4 <_ srcPitch
  146. ;; xmm3 <_ rt_delta_y
  147. ;; xmm2 <_ dstW
  148. ;; xmm1 <_ source ptr base
  149. ;; xmm0 <_ Target RVA | LO-QWORD : temp use
  150. align 16
  151. doscanline:
  152. pmovzxbw xmm5, [esi+edx*4]
  153. pmulhuw xmm5, xmm7
  154. ;; 00 A1 00 R1 00 G1 00 B1 | 00 A0 00 R0 00 G0 00 B0
  155. movhlps xmm0, xmm5
  156. paddw xmm5, xmm0
  157. packuswb xmm5, xmm5
  158. movd dword ptr[edi], xmm5
  159. paddw xmm7, xmm6
  160. add ebx, esp
  161. mov edx, ebx
  162. shr edx, 16
  163. add edi, 4
  164. dec eax
  165. jne doscanline
  166. ;; reset inline doscanline data/status, outloop check set
  167. movd eax, xmm3
  168. movd ebx, xmm1
  169. movd edx, xmm4
  170.  
  171. pcmpeqd xmm7, xmm7
  172. pxor xmm5, xmm5
  173. movhlps xmm7, xmm5
  174. ;;
  175. ;;
  176. add ecx, eax
  177. mov eax, ecx
  178. shr eax, 16
  179. imul eax, edx
  180. lea esi, [ebx+eax]
  181. ;;
  182. ;;
  183. movhlps xmm5, xmm0
  184. movd ebx, xmm5
  185. movd eax, xmm2
  186.  
  187. add edi, ebx
  188. xor ebx, ebx
  189. xor edx, edx
  190.  
  191. dec ebp
  192. jne doscanline
  193. V_EXIT:
  194. movd esp, mm0
  195. emms
  196. pop ebp
  197. pop ebx
  198. pop esi
  199. pop edi
  200. ret
  201.  
  202. bilinear_x endp
  203. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement