xiahanlu

bilinear

Aug 31st, 2018
131
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.51 KB | None | 0 0
  1. ;; simple bilinear interpolation for IDirect3DSurface9 - RGB32 (Alpha empty) MASM32
  2. ;; not mapper center point (SrcX=(dstX+0.5)* (srcWidth/dstWidth) -0.5 ) - moecmks .
  3. ;; FIXME: source code not debug in scale ratio. !!!
  4. ;;
  5. ;; settings .
  6. ;;
  7. MOD_SFT equ 12 ;; n:= 12
  8. MOD_MAP equ 4096 ;; 2^n := 4096
  9. MOD_SFT_MUL equ MOD_MAP*MOD_MAP ;; 4096 * 4096 := 16777216 := 0x0100 0000 suitfor pmulhuw
  10. MOD_MASK equ 4095
  11.  
  12. ;;
  13. ;;
  14. ;; prototype
  15.  
  16. ;; void __cdecl bilinear (uint32_t *dstPtr, intptr_t dstPitch,
  17. ;; uint32_t *srcPtr, intptr_t srcPitch,
  18. ;; intptr_t dstW, intptr_t dstH,
  19. ;; intptr_t srcW, intptr_t srcH)
  20.  
  21.  
  22. .686 ; create 32 bit code
  23. .mmx
  24. .xmm
  25. .model flat, c ; 32 bit memory model
  26. option casemap :none ; case sensitive
  27.  
  28. .code
  29. align 16
  30. bilinear proc c
  31. option prologue:none, epilogue:none
  32.  
  33. ;; stack arg base offset typedef .
  34. dstPtr equ 4*1
  35. dstPitch equ 4*2
  36. srcPtr equ 4*3
  37. srcPitch equ 4*4
  38. dstW equ 4*5
  39. dstH equ 4*6
  40. srcW equ 4*7
  41. srcH equ 4*8
  42.  
  43. ;; first, save ctx
  44. push edi
  45. push esi
  46. push ebx
  47. push ebp ;; stack += 16
  48.  
  49. stk_skip equ 16
  50.  
  51. movd mm0, esp
  52.  
  53. ;; calc vptr and rva pitch
  54. ;; for target pitch, we can get rva
  55. ;; source pitch can't
  56.  
  57. mov edi, stk_skip[esp+dstPtr] ;; edi !! dstPtr
  58. mov ebx, stk_skip[esp+dstPitch]
  59. mov eax, stk_skip[esp+dstW] ;; eax !! dstW
  60. mov ecx, stk_skip[esp+dstH] ;; ecx !! dstH
  61.  
  62. cmp eax, 0
  63. jle V_EXIT
  64. cmp ecx, 0
  65. jle V_EXIT
  66. test edi, edi
  67. je V_EXIT
  68.  
  69. lea edx, [eax*4]
  70. sub ebx, edx ;; ebx !! target pitch rva .
  71.  
  72. mov edx, stk_skip[esp+srcH] ;; edx !!srcH
  73. mov ebp, stk_skip[esp+srcW] ;; ebp !!srcW
  74. mov esi, stk_skip[esp+srcPtr] ;; esi !!srcPtr
  75.  
  76. cmp edx, 0
  77. jle V_EXIT
  78. cmp ebp, 0
  79. jle V_EXIT
  80. test esi, esi
  81. je V_EXIT
  82.  
  83. movd mm2, ebx
  84.  
  85.  
  86. mov ebx, MOD_MAP
  87.  
  88. cvtsi2ss xmm7, edx ;; srcH
  89. cvtsi2ss xmm5, ebx ;; MOD_MAP
  90. cvtsi2ss xmm6, ecx ;; dstH
  91. divss xmm7, xmm6 ;; xmm7 <_ RT-H
  92. mulss xmm7, xmm5 ;; xmm7 !! rt_y unit
  93. cvttps2dq xmm7, xmm7 ;; cast(int)
  94. cvtsi2ss xmm6, ebp ;; srcW
  95. cvtsi2ss xmm0, eax ;; dstW
  96. divss xmm6, xmm0 ;; xmm6 <_ RT-W
  97. mulss xmm6, xmm5 ;; xmm6 <_ MOD_MAP * ( srcH/ dstH)
  98. cvttps2dq xmm6, xmm6 ;; xmm6 !! rt_x unit
  99.  
  100. pshuflw xmm7, xmm7, 0
  101. pshuflw xmm6, xmm6, 0
  102. pshufd xmm7, xmm7, 0
  103. pshufd xmm6, xmm6, 0
  104. pxor xmm4, xmm4
  105.  
  106. xor ebx, ebx
  107. mov ebp, stk_skip[esp+srcPitch]
  108. movd esp, xmm6
  109. and esp, 0FFFFh
  110. movd mm3, ebp
  111.  
  112. movdq2q mm7, xmm7
  113. psrlq mm7, 48
  114. movd mm6, esi
  115. pxor mm5, mm5
  116.  
  117. mov edx, MOD_MASK
  118. movd xmm3, edx
  119.  
  120. mov edx, esi
  121. pshuflw xmm3, xmm3, 0
  122. movddup xmm7, xmm3
  123. movdqa xmm5, xmm7
  124. movd mm1, eax
  125. ;; eax <_ dstW
  126. ;; ebx <_ cache-total-X (only read, throw into mm cache)
  127. ;; ecx <_ dstH
  128. ;; edx <_
  129. ;; ebp <_ srcPit
  130. ;; esp <_ rt_x unit
  131. ;; edi <_
  132. ;; esi <_
  133.  
  134. ;; mm7 <_ h_base rt unit
  135. ;; mm6 <_ esi base
  136. ;; mm5 <_ y-cahce total
  137. ;; mm4 <_ cahce
  138. ;; mm3 <_ cache CsrcPitch
  139. ;; mm2 <_ RVA
  140. ;; mm1 <_ temp width
  141. ;; mm0 <_ esp cache
  142.  
  143. align 16
  144. doscanline:
  145. ;; edx <_ cahce source ptr.
  146.  
  147.  
  148. ;; fetch pixel
  149.  
  150. ;; -----------------------------------
  151. ;; top-left | top-right
  152. ;; |
  153. ;; bottom-left | bottom- right
  154. ;; -----------------------------------
  155.  
  156. ;; xmm3 <_ mod map mask 2^n-1 in lo ddword
  157. ;; xmm4 <_ x rt
  158. ;; xmm5 <_ y rt bottom
  159. ;; xmm6 <_ w_base rt unit
  160. ;; xmm7 <_ mod map mask 2^n-1 in all ddword
  161.  
  162. movdqa xmm0, xmm4 ;; xmm0<_ x rt cache
  163. pxor xmm0, xmm3 ;; xmm0<_ apRight | apLeft LO->HI (temp!save )
  164. movdqa xmm1, xmm0 ;; cache
  165. pmulhuw xmm1, xmm5 ;; LO->HI apRight* apBottom | apLeft * apBottom
  166. pmovzxbw xmm2, [edx] ;; fetch cross- top pixel
  167. psllw xmm2, 8
  168. pmulhuw xmm2, xmm1 ;; xmm2 <_ pixel
  169. ;; xmm0, xmm1 free ..
  170. movdqa xmm1, xmm5
  171. pxor xmm1, xmm7 ;; all is top
  172. pmulhuw xmm1, xmm0 ;; xmm0 is free
  173. pxor xmm0, xmm0
  174. pmovzxbw xmm0, [edx+ebp] ;; fetch cross- top pixel
  175. psllw xmm0, 8
  176. pmulhuw xmm0, xmm1 ;; xmm0 <_ pixel
  177. paddusb xmm0, xmm2
  178. packuswb xmm0, xmm0
  179. pshufd xmm1, xmm0, 1 ;; shift
  180. paddusb xmm0, xmm1
  181. movd dword ptr[edi], xmm0
  182.  
  183. ;; add x - rt .
  184. paddw xmm4, xmm6
  185. pand xmm4, xmm7
  186.  
  187. add ebx, esp
  188. mov edx, ebx
  189. shr edx, MOD_SFT
  190. lea edx, [esi+edx*4]
  191. add edi, 4
  192. dec eax
  193. jne doscanline
  194.  
  195. ;; add y - rt .
  196. pxor xmm5, xmm7
  197. movq2dq xmm0, mm7
  198. movlhps xmm0, xmm0
  199. paddw xmm5, xmm0
  200. pshuflw xmm5, xmm5, 0
  201. pshufd xmm5, xmm5, 0
  202. pand xmm5, xmm7
  203. pxor xmm4, xmm4
  204. pxor xmm5, xmm7
  205. movd eax, mm1
  206.  
  207. ;; acquire next esi cache
  208. paddd mm5, mm7
  209. movq mm4, mm5
  210. psrld mm4, MOD_SFT
  211. pmuludq mm4, mm3
  212. paddd mm4, mm6
  213. movd esi, mm4
  214. movd edx, mm2
  215.  
  216. add edi, edx
  217. mov edx, esi
  218. xor ebx, ebx
  219. dec ecx
  220. jne doscanline
  221. V_EXIT:
  222. movd esp, mm0
  223. emms
  224.  
  225. pop ebp
  226. pop ebx
  227. pop esi
  228. pop edi
  229. ret
  230.  
  231. bilinear endp
  232. end
Add Comment
Please, Sign In to add comment