Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- %macro FLAC_DECORRELATE_INDEP_32 2
- cglobal flac_decorrelate_indep%1_32, 2, %1+2, %1+2, out, in0, in1, len, in2, in3, in4, in5
- %if ARCH_X86_32
- movd m%2, r4m
- %if %1 == 6
- DEFINE_ARGS out, in0, in1, in2, in3, in4, in5
- %define lend dword r3m
- %else
- mov lend, lenm
- %endif
- %elif WIN64
- movd m%2, r4m
- %else ; UNIX64
- movd m%2, r4d
- %endif
- %assign %%i 1
- %rep %1-1
- mov in %+ %%i %+ q, [in0q+%%i*gprsize]
- %assign %%i %%i+1
- %endrep
- mov in0q, [in0q]
- mov outq, [outq]
- %assign %%i 1
- %rep %1-1
- sub in %+ %%i %+ q, in0q
- %assign %%i %%i+1
- %endrep
- align 16
- .loop:
- mova m0, [in0q]
- %assign %%i 1
- %rep %1-1
- mova m %+ %%i, [in0q+in %+ %%i %+ q]
- %assign %%i %%i+1
- %endrep
- %assign %%i 0
- %rep %1
- pslld m %+ %%i, m%2
- %assign %%i %%i+1
- %endrep
- %if %1 == 6
- SBUTTERFLY dq, 0, 1, 6
- SBUTTERFLY dq, 2, 3, 6
- SBUTTERFLY dq, 4, 5, 6
- punpcklqdq m6, m0, m2
- punpckhqdq m2, m4
- shufps m4, m0, 0xe4
- mova [outq ], m6
- mova [outq+16], m4
- mova [outq+32], m2
- punpcklqdq m6, m1, m3
- punpckhqdq m3, m5
- shufps m5, m1, 0xe4
- mova [outq+48], m6
- mova [outq+64], m5
- mova [outq+80], m3
- %else ; %1 == 2 || %1 == 4
- %if %1 == 4
- TRANSPOSE4x4D 0, 1, 2, 3, 4
- %else ; %1 == 2
- SBUTTERFLY dq, 0, 1, 2
- %endif
- mova [outq ], m0
- mova [outq+1*mmsize], m1
- %if %1 > 2
- mova [outq+2*mmsize], m2
- mova [outq+3*mmsize], m3
- %endif
- %endif ; %1 == 2 || %1 == 4 || %1 == 6
- add in0q, mmsize
- add outq, mmsize*%1
- sub lend, mmsize/4
- jg .loop
- REP_RET
- %endmacro
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement