Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
- index 076a225..282a58b 100644
- --- a/libavcodec/dcadec.c
- +++ b/libavcodec/dcadec.c
- @@ -1096,6 +1096,8 @@ static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst,
- }
- #endif
- +#include "libavutil/timer.h"
- +
- static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
- {
- int k, l;
- @@ -1225,10 +1227,11 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
- "Stream with high frequencies VQ coding\n");
- s->debug_flag |= 0x01;
- }
- -
- +{START_TIMER
- int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l],
- &high_freq_vq[hfvq][subsubframe * 8],
- s->scale_factor[k][l][0]);
- +STOP_TIMER("g")}
- }
- }
- diff --git a/libavcodec/x86/dca.h b/libavcodec/x86/dca.h
- index ab175b3..e2591e3 100644
- --- a/libavcodec/x86/dca.h
- +++ b/libavcodec/x86/dca.h
- @@ -20,15 +20,40 @@
- #include "config.h"
- -#if ARCH_X86_64 && HAVE_SSE2_INLINE
- -# include "libavutil/x86/asm.h"
- +#define HAVE_SSE2_INTRINSICS 0
- +
- +#if ARCH_X86_64 && (HAVE_SSE2_INTRINSICS || HAVE_SSE2_INLINE)
- # include "libavutil/mem.h"
- #include "libavcodec/dcadsp.h"
- +# if HAVE_SSE2_INTRINSICS
- +# include <emmintrin.h>
- +# else
- +# include "libavutil/x86/asm.h"
- +# endif
- +
- # define int8x8_fmul_int32 int8x8_fmul_int32
- static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
- float *dst, const int8_t *src, int scale)
- {
- +# if HAVE_SSE2_INTRINSICS
- + float tmp = scale * 1.0f/16; // should generate cvtsi2ss+mulss
- + __m128i in2, in1 = _mm_loadl_epi64((const __m128i*)src);
- + __m128 out1, out2, fscale = _mm_load_ss(&tmp);
- + in1 = _mm_unpacklo_epi8(in1, in1);
- + in2 = in1;
- + in1 = _mm_unpacklo_epi16(in1, in1);
- + in2 = _mm_unpackhi_epi16(in2, in2);
- + in1 = _mm_srai_epi32(in1, 24);
- + in2 = _mm_srai_epi32(in2, 24);
- + fscale = _mm_shuffle_ps(fscale, fscale, 0);
- + out1 = _mm_cvtepi32_ps(in1);
- + out2 = _mm_cvtepi32_ps(in2);
- + out1 = _mm_mul_ps(out1, fscale);
- + out2 = _mm_mul_ps(out2, fscale);
- + _mm_store_ps(dst+0, out1);
- + _mm_store_ps(dst+4, out2);
- +# elif HAVE_SSE2_INLINE
- DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000;
- __asm__ volatile (
- "cvtsi2ss %2, %%xmm0 \n\t"
- @@ -50,6 +75,7 @@ static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
- :: "r"(dst), "r"(src), "m"(scale), "m"(inverse16)
- XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2")
- );
- +# endif
- }
- #endif /* ARCH_X86_64 && HAVE_SSE2_INLINE */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement