Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ;%include "libavutil/x86/x86inc.asm"
- %include "libavutil/x86/x86util.asm"
- SECTION_RODATA
- SECTION .text
- INIT_XMM sse4 ; adds ff_ and _sse4 to function name
- ;******************************
- ;void put_hevc_mc_pixels_8(int16_t *dst, ptrdiff_t dststride,
- ; uint8_t *_src, ptrdiff_t _srcstride,
- ; int width, int height, int mx, int my,
- ; int16_t* mcbuffer)
- ;
- ; r0 : *dst
- ; r1 : dststride
- ; r2 : *src
- ; r3 : srcstride
- ; r4 : width
- ; r5 : height
- ;
- ;******************************
- cglobal put_hevc_mc_pixels_8, 9, 12
- pxor xmm0,xmm0 ;set register at zero
- mov r6,0 ;height
- ;8 by 8
- mc_pixels_h: ;for height
- mov r7,0 ;width
- mc_pixels_w: ;for width
- mov r9,0
- mov r9,[r2+r7]
- shl r9,6
- mov [r0+r7],r9
- inc r7
- ; movq xmm1,[r2+r7] ;load 64 bits
- ; punpcklbw xmm1,xmm0 ;unpack to 16 bits
- ; psllw xmm1,6 ;shift left 6 bits (14 - bit depth)
- ; movdqu [r0+r7],xmm1 ;store 128 bits
- ; add r7,8 ;add 8 for width loop
- cmp r7, r4 ;cmp width
- jl mc_pixels_w ;width loop
- add r0,r1 ;dst += dststride
- add r2,r3 ;src += srcstride
- inc r6
- cmp r6,r5 ;cmp height
- jl mc_pixels_h ;height loop
- REP_RET
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement