diff options
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/aarch64/dcadsp_init.c | 7 | ||||
-rw-r--r-- | libavcodec/aarch64/dcadsp_neon.S | 60 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_init_arm.c | 7 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_neon.S | 29 | ||||
-rw-r--r-- | libavcodec/dcadata.c | 7 | ||||
-rw-r--r-- | libavcodec/dcadata.h | 1 | ||||
-rw-r--r-- | libavcodec/dcadec.c | 12 | ||||
-rw-r--r-- | libavcodec/dcadsp.c | 21 | ||||
-rw-r--r-- | libavcodec/dcadsp.h | 7 | ||||
-rw-r--r-- | libavcodec/x86/dcadsp.asm | 86 | ||||
-rw-r--r-- | libavcodec/x86/dcadsp_init.c | 20 |
11 files changed, 8 insertions, 249 deletions
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c index 769ab6ee1b..78642a5ed8 100644 --- a/libavcodec/aarch64/dcadsp_init.c +++ b/libavcodec/aarch64/dcadsp_init.c @@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct, float out[32], const float in[32], float scale); -void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], - const int32_t vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int32_t scale[DCA_SUBBANDS][2], - intptr_t start, intptr_t end); - av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); @@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s) if (have_neon(cpu_flags)) { s->lfe_fir[0] = ff_dca_lfe_fir0_neon; s->lfe_fir[1] = ff_dca_lfe_fir1_neon; - s->decode_hf = ff_decode_hf_neon; } } diff --git a/libavcodec/aarch64/dcadsp_neon.S b/libavcodec/aarch64/dcadsp_neon.S index d57aa1d6e7..0426dc6f46 100644 --- a/libavcodec/aarch64/dcadsp_neon.S +++ b/libavcodec/aarch64/dcadsp_neon.S @@ -21,66 +21,6 @@ #include "libavutil/aarch64/asm.S" -function ff_decode_hf_neon, export=1 - add x2, x2, x3 - add x0, x0, x5, lsl #5 - add x1, x1, x5, lsl #2 - add x4, x4, x5, lsl #3 - sub x6, x6, x5 - ldr w7, [x1], #4 - add x7, x2, x7, lsl #5 - subs x6, x6, #1 - b.eq 1f - b.gt 2f - ret -2: - ldr w8, [x1], #4 - subs x6, x6, #2 - add x8, x2, x8, lsl #5 - ld1 {v2.4s}, [x4], #16 - ld1 {v0.8b}, [x7] - ld1 {v4.8b}, [x8] - sxtl v3.8h, v0.8b - sxtl v7.8h, v4.8b - scvtf v2.4s, v2.4s, #4 - sxtl v0.4s, v3.4h - sxtl2 v1.4s, v3.8h - sxtl v4.4s, v7.4h - sxtl2 v5.4s, v7.8h - scvtf v0.4s, v0.4s - scvtf v1.4s, v1.4s - scvtf v4.4s, v4.4s - scvtf v5.4s, v5.4s - fmul v0.4s, v0.4s, v2.s[0] - fmul v1.4s, v1.4s, v2.s[0] - fmul v4.4s, v4.4s, v2.s[2] - fmul v5.4s, v5.4s, v2.s[2] - b.lt 10f - - ldr w7, [x1], #4 - add x7, x2, x7, lsl #5 - st1 {v0.4s,v1.4s}, [x0], #32 - st1 {v4.4s,v5.4s}, [x0], #32 - b.gt 2b -1: - ldr w9, [x4] - ld1 {v0.8b}, [x7] - scvtf s2, w9, #4 - sxtl v3.8h, v0.8b - sxtl v0.4s, v3.4h - sxtl2 v1.4s, v3.8h - scvtf v0.4s, v0.4s - scvtf v1.4s, v1.4s - fmul v0.4s, v0.4s, v2.s[0] - fmul v1.4s, v1.4s, v2.s[0] - st1 {v0.4s,v1.4s}, [x0] - ret -10: - st1 {v0.4s,v1.4s}, [x0], #32 - st1 {v4.4s,v5.4s}, [x0] - ret -endfunc - function ff_dca_lfe_fir0_neon, export=1 mov x3, #32 // decifactor sub x1, x1, #7*4 diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index 40ad78745c..0f2e4c49c9 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct, float out[32], const float in[32], float scale); -void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8], - const int32_t vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int32_t scale[DCA_SUBBANDS][2], - intptr_t start, intptr_t end); - av_cold void ff_dcadsp_init_arm(DCADSPContext *s) { int cpu_flags = av_get_cpu_flags(); @@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) if (have_neon(cpu_flags)) { s->lfe_fir[0] = ff_dca_lfe_fir0_neon; s->lfe_fir[1] = ff_dca_lfe_fir1_neon; - s->decode_hf = ff_decode_hf_neon; } } diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S index cdc41367e9..101fee0884 100644 --- a/libavcodec/arm/dcadsp_neon.S +++ b/libavcodec/arm/dcadsp_neon.S @@ -20,35 +20,6 @@ #include "libavutil/arm/asm.S" -function ff_decode_hf_neon, export=1 - push {r4-r5,lr} - add r2, r2, r3 - ldr r3, [sp, #12] - ldrd r4, r5, [sp, #16] - add r3, r3, r4, lsl #3 - add r1, r1, r4, lsl #2 - add r0, r0, r4, lsl #5 - -1: ldr_post lr, r1, #4 - add r4, r4, #1 - add lr, r2, lr, lsl #5 - cmp r4, r5 - vld1.32 {d7}, [r3]! - vld1.8 {d0}, [lr,:64] - vcvt.f32.s32 d7, d7, #4 - vmovl.s8 q1, d0 - vmovl.s16 q0, d2 - vmovl.s16 q1, d3 - vcvt.f32.s32 q0, q0 - vcvt.f32.s32 q1, q1 - vmul.f32 q0, q0, d7[0] - vmul.f32 q1, q1, d7[0] - vst1.32 {q0-q1}, [r0,:128]! - bne 1b - - pop {r4-r5,pc} -endfunc - function ff_dca_lfe_fir0_neon, export=1 push {r4-r6,lr} mov r3, #32 @ decifactor diff --git a/libavcodec/dcadata.c b/libavcodec/dcadata.c index 5d7d5943a1..fc877aa19d 100644 --- a/libavcodec/dcadata.c +++ b/libavcodec/dcadata.c @@ -4189,13 +4189,6 @@ const uint32_t ff_dca_lossy_quant[32] = { 84, 42, 21, 0, 0, 0, 0, 0 }; -const float ff_dca_lossy_quant_d[32] = { - 0, 1.6, 1.0, 0.8, 0.59, 0.50, 0.42, 0.34, - 0.19, 0.11, 0.06, 0.035, 0.019, 0.011, 0.0065, 0.0040, - 0.0025, 0.0014, 0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004, - 0.00002, 0.00001, 0.000005, 0, 0, 0, 0, 0 -}; - /* 20bits unsigned fractional binary codes */ const uint32_t ff_dca_lossless_quant[32] = { 0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127, diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h index 1d3d605b9e..9964a929f5 100644 --- a/libavcodec/dcadata.h +++ b/libavcodec/dcadata.h @@ -35,7 +35,6 @@ extern const uint32_t ff_dca_scale_factor_quant6[64]; extern const uint32_t ff_dca_scale_factor_quant7[128]; extern const uint32_t ff_dca_lossy_quant[32]; -extern const float ff_dca_lossy_quant_d[32]; extern const uint32_t ff_dca_lossless_quant[32]; extern const float ff_dca_lossless_quant_d[32]; diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c index 258857a563..187e1728be 100644 --- a/libavcodec/dcadec.c +++ b/libavcodec/dcadec.c @@ -992,12 +992,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) s->debug_flag |= 0x01; } - s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq, - ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND, - s->dca_chan[k].scale_factor, - s->audio_header.vq_start_subband[k], - s->audio_header.subband_activity[k]); - + s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq, + ff_dca_high_freq_vq, + subsubframe * SAMPLES_PER_SUBBAND, + s->dca_chan[k].scale_factor, + s->audio_header.vq_start_subband[k], + s->audio_header.subband_activity[k]); } } diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c index 412c1dcf1f..3fb70c6c4c 100644 --- a/libavcodec/dcadsp.c +++ b/libavcodec/dcadsp.c @@ -27,30 +27,12 @@ #include "dcadsp.h" #include "dcamath.h" -static void decode_hf_c(float dst[DCA_SUBBANDS][8], +static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS], const int8_t hf_vq[1024][32], intptr_t vq_offset, int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end) { - int i, l; - - for (l = start; l < end; l++) { - /* 1 vector -> 32 samples but we only need the 8 samples - * for this subsubframe. */ - const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset]; - float fscale = scale[l][0] * (1 / 16.0); - for (i = 0; i < 8; i++) - dst[l][i] = ptr[i] * fscale; - } -} - -static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8], - const int32_t vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int32_t scale[DCA_SUBBANDS][2], - intptr_t start, intptr_t end) -{ int i, j; for (j = start; j < end; j++) { @@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) s->lfe_fir[1] = dca_lfe_fir1_c; s->qmf_32_subbands = dca_qmf_32_subbands; s->decode_hf = decode_hf_c; - s->decode_hf_int = decode_hf_int_c; s->dequantize = dequantize_c; if (ARCH_AARCH64) diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h index 24902cb1ca..ccb2955470 100644 --- a/libavcodec/dcadsp.h +++ b/libavcodec/dcadsp.h @@ -32,16 +32,11 @@ typedef struct DCADSPContext { int *synth_buf_offset, float synth_buf2[32], const float window[512], float *samples_out, float raXin[32], float scale); - void (*decode_hf)(float dst[DCA_SUBBANDS][8], + void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS], const int8_t hf_vq[1024][32], intptr_t vq_offset, int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); - void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8], - const int32_t vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int32_t scale[DCA_SUBBANDS][2], - intptr_t start, intptr_t end); void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale); } DCADSPContext; diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 548cec10b1..502b70a4cb 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -26,92 +26,6 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16 SECTION .text -; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS], -; const int8_t hf_vq[1024][32], intptr_t vq_offset, -; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end) - -%macro DECODE_HF 0 -cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end - lea srcq, [srcq + offsetq] - shl startq, 2 - mov offsetd, endm -%define DICT offsetq - shl offsetq, 2 - mov endm, offsetq -.loop: -%if ARCH_X86_64 - mov offsetd, [scaleq + 2 * startq] - cvtsi2ss m0, offsetd -%else - cvtsi2ss m0, [scaleq + 2 * startq] -%endif - mov offsetd, [numq + startq] - mulss m0, [pf_inv16] - shl DICT, 5 - shufps m0, m0, 0 -%if cpuflag(sse2) -%if cpuflag(sse4) - pmovsxbd m1, [srcq + DICT + 0] - pmovsxbd m2, [srcq + DICT + 4] -%else - movq m1, [srcq + DICT] - punpcklbw m1, m1 - mova m2, m1 - punpcklwd m1, m1 - punpckhwd m2, m2 - psrad m1, 24 - psrad m2, 24 -%endif - cvtdq2ps m1, m1 - cvtdq2ps m2, m2 -%else - movd mm0, [srcq + DICT + 0] - movd mm1, [srcq + DICT + 4] - punpcklbw mm0, mm0 - punpcklbw mm1, mm1 - movq mm2, mm0 - movq mm3, mm1 - punpcklwd mm0, mm0 - punpcklwd mm1, mm1 - punpckhwd mm2, mm2 - punpckhwd mm3, mm3 - psrad mm0, 24 - psrad mm1, 24 - psrad mm2, 24 - psrad mm3, 24 - cvtpi2ps m1, mm0 - cvtpi2ps m2, mm1 - cvtpi2ps m3, mm2 - cvtpi2ps m4, mm3 - shufps m0, m0, 0 - shufps m1, m3, q1010 - shufps m2, m4, q1010 -%endif - mulps m1, m0 - mulps m2, m0 - mova [dstq + 8 * startq + 0], m1 - mova [dstq + 8 * startq + 16], m2 - add startq, 4 - cmp startq, endm - jl .loop -.end: -%if notcpuflag(sse2) - emms -%endif - REP_RET -%endmacro - -%if ARCH_X86_32 -INIT_XMM sse -DECODE_HF -%endif - -INIT_XMM sse2 -DECODE_HF - -INIT_XMM sse4 -DECODE_HF - ; %1=v0/v1 %2=in1 %3=in2 %macro FIR_LOOP 2-3 .loop%1: diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 1a19f6b807..1321dda652 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -23,15 +23,6 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/dcadsp.h" -void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); -void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); -void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], - const int8_t hf_vq[1024][32], intptr_t vq_offset, - int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs); void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs); void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs); @@ -41,21 +32,10 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) int cpu_flags = av_get_cpu_flags(); if (EXTERNAL_SSE(cpu_flags)) { -#if ARCH_X86_32 - s->decode_hf = ff_decode_hf_sse; -#endif s->lfe_fir[0] = ff_dca_lfe_fir0_sse; s->lfe_fir[1] = ff_dca_lfe_fir1_sse; } - if (EXTERNAL_SSE2(cpu_flags)) { - s->decode_hf = ff_decode_hf_sse2; - } - - if (EXTERNAL_SSE4(cpu_flags)) { - s->decode_hf = ff_decode_hf_sse4; - } - if (EXTERNAL_FMA3(cpu_flags)) { s->lfe_fir[0] = ff_dca_lfe_fir0_fma3; } |