Advertisement
Guest User

Untitled

a guest
Feb 8th, 2014
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 2.71 KB | None | 0 0
  1. diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
  2. index 076a225..282a58b 100644
  3. --- a/libavcodec/dcadec.c
  4. +++ b/libavcodec/dcadec.c
  5. @@ -1096,6 +1096,8 @@ static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst,
  6.  }
  7.  #endif
  8.  
  9. +#include "libavutil/timer.h"
  10. +
  11.  static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
  12.  {
  13.      int k, l;
  14. @@ -1225,10 +1227,11 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
  15.                         "Stream with high frequencies VQ coding\n");
  16.                  s->debug_flag |= 0x01;
  17.              }
  18. -
  19. +{START_TIMER
  20.              int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l],
  21.                                &high_freq_vq[hfvq][subsubframe * 8],
  22.                                s->scale_factor[k][l][0]);
  23. +STOP_TIMER("g")}
  24.          }
  25.      }
  26.  
  27. diff --git a/libavcodec/x86/dca.h b/libavcodec/x86/dca.h
  28. index ab175b3..e2591e3 100644
  29. --- a/libavcodec/x86/dca.h
  30. +++ b/libavcodec/x86/dca.h
  31. @@ -20,15 +20,40 @@
  32.  
  33.  #include "config.h"
  34.  
  35. -#if ARCH_X86_64 && HAVE_SSE2_INLINE
  36. -# include "libavutil/x86/asm.h"
  37. +#define HAVE_SSE2_INTRINSICS   0
  38. +
  39. +#if ARCH_X86_64 && (HAVE_SSE2_INTRINSICS || HAVE_SSE2_INLINE)
  40.  # include "libavutil/mem.h"
  41.  #include "libavcodec/dcadsp.h"
  42.  
  43. +# if HAVE_SSE2_INTRINSICS
  44. +#   include <emmintrin.h>
  45. +# else
  46. +#   include "libavutil/x86/asm.h"
  47. +# endif
  48. +
  49.  # define int8x8_fmul_int32 int8x8_fmul_int32
  50.  static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
  51.                                       float *dst, const int8_t *src, int scale)
  52.  {
  53. +# if HAVE_SSE2_INTRINSICS
  54. +    float tmp = scale * 1.0f/16; // should generate cvtsi2ss+mulss
  55. +    __m128i in2, in1 = _mm_loadl_epi64((const __m128i*)src);
  56. +    __m128 out1, out2, fscale = _mm_load_ss(&tmp);
  57. +    in1    = _mm_unpacklo_epi8(in1, in1);
  58. +    in2    = in1;
  59. +    in1    = _mm_unpacklo_epi16(in1, in1);
  60. +    in2    = _mm_unpackhi_epi16(in2, in2);
  61. +    in1    = _mm_srai_epi32(in1, 24);
  62. +    in2    = _mm_srai_epi32(in2, 24);
  63. +    fscale = _mm_shuffle_ps(fscale, fscale, 0);
  64. +    out1   = _mm_cvtepi32_ps(in1);
  65. +    out2   = _mm_cvtepi32_ps(in2);
  66. +    out1   = _mm_mul_ps(out1, fscale);
  67. +    out2   = _mm_mul_ps(out2, fscale);
  68. +    _mm_store_ps(dst+0, out1);
  69. +    _mm_store_ps(dst+4, out2);
  70. +# elif HAVE_SSE2_INLINE
  71.      DECLARE_ALIGNED(16, static const uint32_t, inverse16) = 0x3D800000;
  72.      __asm__ volatile (
  73.          "cvtsi2ss        %2, %%xmm0 \n\t"
  74. @@ -50,6 +75,7 @@ static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp,
  75.          :: "r"(dst), "r"(src), "m"(scale), "m"(inverse16)
  76.          XMM_CLOBBERS_ONLY("xmm0", "xmm1", "xmm2")
  77.      );
  78. +# endif
  79.  }
  80.  
  81.  #endif /* ARCH_X86_64 && HAVE_SSE2_INLINE */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement