aboutsummaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/aarch64/dcadsp_init.c7
-rw-r--r--libavcodec/aarch64/dcadsp_neon.S60
-rw-r--r--libavcodec/arm/dcadsp_init_arm.c7
-rw-r--r--libavcodec/arm/dcadsp_neon.S29
-rw-r--r--libavcodec/dcadata.c7
-rw-r--r--libavcodec/dcadata.h1
-rw-r--r--libavcodec/dcadec.c12
-rw-r--r--libavcodec/dcadsp.c21
-rw-r--r--libavcodec/dcadsp.h7
-rw-r--r--libavcodec/x86/dcadsp.asm86
-rw-r--r--libavcodec/x86/dcadsp_init.c20
11 files changed, 8 insertions, 249 deletions
diff --git a/libavcodec/aarch64/dcadsp_init.c b/libavcodec/aarch64/dcadsp_init.c
index 769ab6ee1b..78642a5ed8 100644
--- a/libavcodec/aarch64/dcadsp_init.c
+++ b/libavcodec/aarch64/dcadsp_init.c
@@ -41,12 +41,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32],
float scale);
-void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end);
-
av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -54,7 +48,6 @@ av_cold void ff_dcadsp_init_aarch64(DCADSPContext *s)
if (have_neon(cpu_flags)) {
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
- s->decode_hf = ff_decode_hf_neon;
}
}
diff --git a/libavcodec/aarch64/dcadsp_neon.S b/libavcodec/aarch64/dcadsp_neon.S
index d57aa1d6e7..0426dc6f46 100644
--- a/libavcodec/aarch64/dcadsp_neon.S
+++ b/libavcodec/aarch64/dcadsp_neon.S
@@ -21,66 +21,6 @@
#include "libavutil/aarch64/asm.S"
-function ff_decode_hf_neon, export=1
- add x2, x2, x3
- add x0, x0, x5, lsl #5
- add x1, x1, x5, lsl #2
- add x4, x4, x5, lsl #3
- sub x6, x6, x5
- ldr w7, [x1], #4
- add x7, x2, x7, lsl #5
- subs x6, x6, #1
- b.eq 1f
- b.gt 2f
- ret
-2:
- ldr w8, [x1], #4
- subs x6, x6, #2
- add x8, x2, x8, lsl #5
- ld1 {v2.4s}, [x4], #16
- ld1 {v0.8b}, [x7]
- ld1 {v4.8b}, [x8]
- sxtl v3.8h, v0.8b
- sxtl v7.8h, v4.8b
- scvtf v2.4s, v2.4s, #4
- sxtl v0.4s, v3.4h
- sxtl2 v1.4s, v3.8h
- sxtl v4.4s, v7.4h
- sxtl2 v5.4s, v7.8h
- scvtf v0.4s, v0.4s
- scvtf v1.4s, v1.4s
- scvtf v4.4s, v4.4s
- scvtf v5.4s, v5.4s
- fmul v0.4s, v0.4s, v2.s[0]
- fmul v1.4s, v1.4s, v2.s[0]
- fmul v4.4s, v4.4s, v2.s[2]
- fmul v5.4s, v5.4s, v2.s[2]
- b.lt 10f
-
- ldr w7, [x1], #4
- add x7, x2, x7, lsl #5
- st1 {v0.4s,v1.4s}, [x0], #32
- st1 {v4.4s,v5.4s}, [x0], #32
- b.gt 2b
-1:
- ldr w9, [x4]
- ld1 {v0.8b}, [x7]
- scvtf s2, w9, #4
- sxtl v3.8h, v0.8b
- sxtl v0.4s, v3.4h
- sxtl2 v1.4s, v3.8h
- scvtf v0.4s, v0.4s
- scvtf v1.4s, v1.4s
- fmul v0.4s, v0.4s, v2.s[0]
- fmul v1.4s, v1.4s, v2.s[0]
- st1 {v0.4s,v1.4s}, [x0]
- ret
-10:
- st1 {v0.4s,v1.4s}, [x0], #32
- st1 {v4.4s,v5.4s}, [x0]
- ret
-endfunc
-
function ff_dca_lfe_fir0_neon, export=1
mov x3, #32 // decifactor
sub x1, x1, #7*4
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index 40ad78745c..0f2e4c49c9 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -49,12 +49,6 @@ void ff_synth_filter_float_neon(FFTContext *imdct,
float out[32], const float in[32],
float scale);
-void ff_decode_hf_neon(float dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end);
-
av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
{
int cpu_flags = av_get_cpu_flags();
@@ -67,7 +61,6 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
if (have_neon(cpu_flags)) {
s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
- s->decode_hf = ff_decode_hf_neon;
}
}
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index cdc41367e9..101fee0884 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -20,35 +20,6 @@
#include "libavutil/arm/asm.S"
-function ff_decode_hf_neon, export=1
- push {r4-r5,lr}
- add r2, r2, r3
- ldr r3, [sp, #12]
- ldrd r4, r5, [sp, #16]
- add r3, r3, r4, lsl #3
- add r1, r1, r4, lsl #2
- add r0, r0, r4, lsl #5
-
-1: ldr_post lr, r1, #4
- add r4, r4, #1
- add lr, r2, lr, lsl #5
- cmp r4, r5
- vld1.32 {d7}, [r3]!
- vld1.8 {d0}, [lr,:64]
- vcvt.f32.s32 d7, d7, #4
- vmovl.s8 q1, d0
- vmovl.s16 q0, d2
- vmovl.s16 q1, d3
- vcvt.f32.s32 q0, q0
- vcvt.f32.s32 q1, q1
- vmul.f32 q0, q0, d7[0]
- vmul.f32 q1, q1, d7[0]
- vst1.32 {q0-q1}, [r0,:128]!
- bne 1b
-
- pop {r4-r5,pc}
-endfunc
-
function ff_dca_lfe_fir0_neon, export=1
push {r4-r6,lr}
mov r3, #32 @ decifactor
diff --git a/libavcodec/dcadata.c b/libavcodec/dcadata.c
index 5d7d5943a1..fc877aa19d 100644
--- a/libavcodec/dcadata.c
+++ b/libavcodec/dcadata.c
@@ -4189,13 +4189,6 @@ const uint32_t ff_dca_lossy_quant[32] = {
84, 42, 21, 0, 0, 0, 0, 0
};
-const float ff_dca_lossy_quant_d[32] = {
- 0, 1.6, 1.0, 0.8, 0.59, 0.50, 0.42, 0.34,
- 0.19, 0.11, 0.06, 0.035, 0.019, 0.011, 0.0065, 0.0040,
- 0.0025, 0.0014, 0.0008, 0.00045, 0.00030, 0.00017, 0.00008, 0.00004,
- 0.00002, 0.00001, 0.000005, 0, 0, 0, 0, 0
-};
-
/* 20bits unsigned fractional binary codes */
const uint32_t ff_dca_lossless_quant[32] = {
0, 4194304, 2097152, 1384120, 1048576, 696254, 524288, 348127,
diff --git a/libavcodec/dcadata.h b/libavcodec/dcadata.h
index 1d3d605b9e..9964a929f5 100644
--- a/libavcodec/dcadata.h
+++ b/libavcodec/dcadata.h
@@ -35,7 +35,6 @@ extern const uint32_t ff_dca_scale_factor_quant6[64];
extern const uint32_t ff_dca_scale_factor_quant7[128];
extern const uint32_t ff_dca_lossy_quant[32];
-extern const float ff_dca_lossy_quant_d[32];
extern const uint32_t ff_dca_lossless_quant[32];
extern const float ff_dca_lossless_quant_d[32];
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 258857a563..187e1728be 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -992,12 +992,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
s->debug_flag |= 0x01;
}
- s->dcadsp.decode_hf_int(subband_samples, s->dca_chan[k].high_freq_vq,
- ff_dca_high_freq_vq, subsubframe * SAMPLES_PER_SUBBAND,
- s->dca_chan[k].scale_factor,
- s->audio_header.vq_start_subband[k],
- s->audio_header.subband_activity[k]);
-
+ s->dcadsp.decode_hf(subband_samples, s->dca_chan[k].high_freq_vq,
+ ff_dca_high_freq_vq,
+ subsubframe * SAMPLES_PER_SUBBAND,
+ s->dca_chan[k].scale_factor,
+ s->audio_header.vq_start_subband[k],
+ s->audio_header.subband_activity[k]);
}
}
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index 412c1dcf1f..3fb70c6c4c 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -27,30 +27,12 @@
#include "dcadsp.h"
#include "dcamath.h"
-static void decode_hf_c(float dst[DCA_SUBBANDS][8],
+static void decode_hf_c(int32_t dst[DCA_SUBBANDS][8],
const int32_t vq_num[DCA_SUBBANDS],
const int8_t hf_vq[1024][32], intptr_t vq_offset,
int32_t scale[DCA_SUBBANDS][2],
intptr_t start, intptr_t end)
{
- int i, l;
-
- for (l = start; l < end; l++) {
- /* 1 vector -> 32 samples but we only need the 8 samples
- * for this subsubframe. */
- const int8_t *ptr = &hf_vq[vq_num[l]][vq_offset];
- float fscale = scale[l][0] * (1 / 16.0);
- for (i = 0; i < 8; i++)
- dst[l][i] = ptr[i] * fscale;
- }
-}
-
-static void decode_hf_int_c(int32_t dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end)
-{
int i, j;
for (j = start; j < end; j++) {
@@ -141,7 +123,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s)
s->lfe_fir[1] = dca_lfe_fir1_c;
s->qmf_32_subbands = dca_qmf_32_subbands;
s->decode_hf = decode_hf_c;
- s->decode_hf_int = decode_hf_int_c;
s->dequantize = dequantize_c;
if (ARCH_AARCH64)
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index 24902cb1ca..ccb2955470 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -32,16 +32,11 @@ typedef struct DCADSPContext {
int *synth_buf_offset, float synth_buf2[32],
const float window[512], float *samples_out,
float raXin[32], float scale);
- void (*decode_hf)(float dst[DCA_SUBBANDS][8],
+ void (*decode_hf)(int32_t dst[DCA_SUBBANDS][8],
const int32_t vq_num[DCA_SUBBANDS],
const int8_t hf_vq[1024][32], intptr_t vq_offset,
int32_t scale[DCA_SUBBANDS][2],
intptr_t start, intptr_t end);
- void (*decode_hf_int)(int32_t dst[DCA_SUBBANDS][8],
- const int32_t vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int32_t scale[DCA_SUBBANDS][2],
- intptr_t start, intptr_t end);
void (*dequantize)(int32_t *samples, uint32_t step_size, uint32_t scale);
} DCADSPContext;
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 548cec10b1..502b70a4cb 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -26,92 +26,6 @@ pf_inv16: times 4 dd 0x3D800000 ; 1/16
SECTION .text
-; void decode_hf(float dst[DCA_SUBBANDS][8], const int32_t vq_num[DCA_SUBBANDS],
-; const int8_t hf_vq[1024][32], intptr_t vq_offset,
-; int32_t scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end)
-
-%macro DECODE_HF 0
-cglobal decode_hf, 6,6,5, dst, num, src, offset, scale, start, end
- lea srcq, [srcq + offsetq]
- shl startq, 2
- mov offsetd, endm
-%define DICT offsetq
- shl offsetq, 2
- mov endm, offsetq
-.loop:
-%if ARCH_X86_64
- mov offsetd, [scaleq + 2 * startq]
- cvtsi2ss m0, offsetd
-%else
- cvtsi2ss m0, [scaleq + 2 * startq]
-%endif
- mov offsetd, [numq + startq]
- mulss m0, [pf_inv16]
- shl DICT, 5
- shufps m0, m0, 0
-%if cpuflag(sse2)
-%if cpuflag(sse4)
- pmovsxbd m1, [srcq + DICT + 0]
- pmovsxbd m2, [srcq + DICT + 4]
-%else
- movq m1, [srcq + DICT]
- punpcklbw m1, m1
- mova m2, m1
- punpcklwd m1, m1
- punpckhwd m2, m2
- psrad m1, 24
- psrad m2, 24
-%endif
- cvtdq2ps m1, m1
- cvtdq2ps m2, m2
-%else
- movd mm0, [srcq + DICT + 0]
- movd mm1, [srcq + DICT + 4]
- punpcklbw mm0, mm0
- punpcklbw mm1, mm1
- movq mm2, mm0
- movq mm3, mm1
- punpcklwd mm0, mm0
- punpcklwd mm1, mm1
- punpckhwd mm2, mm2
- punpckhwd mm3, mm3
- psrad mm0, 24
- psrad mm1, 24
- psrad mm2, 24
- psrad mm3, 24
- cvtpi2ps m1, mm0
- cvtpi2ps m2, mm1
- cvtpi2ps m3, mm2
- cvtpi2ps m4, mm3
- shufps m0, m0, 0
- shufps m1, m3, q1010
- shufps m2, m4, q1010
-%endif
- mulps m1, m0
- mulps m2, m0
- mova [dstq + 8 * startq + 0], m1
- mova [dstq + 8 * startq + 16], m2
- add startq, 4
- cmp startq, endm
- jl .loop
-.end:
-%if notcpuflag(sse2)
- emms
-%endif
- REP_RET
-%endmacro
-
-%if ARCH_X86_32
-INIT_XMM sse
-DECODE_HF
-%endif
-
-INIT_XMM sse2
-DECODE_HF
-
-INIT_XMM sse4
-DECODE_HF
-
; %1=v0/v1 %2=in1 %3=in2
%macro FIR_LOOP 2-3
.loop%1:
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 1a19f6b807..1321dda652 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -23,15 +23,6 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/dcadsp.h"
-void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
-void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS],
- const int8_t hf_vq[1024][32], intptr_t vq_offset,
- int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end);
void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs);
void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs);
void ff_dca_lfe_fir0_fma3(float *out, const float *in, const float *coefs);
@@ -41,21 +32,10 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
int cpu_flags = av_get_cpu_flags();
if (EXTERNAL_SSE(cpu_flags)) {
-#if ARCH_X86_32
- s->decode_hf = ff_decode_hf_sse;
-#endif
s->lfe_fir[0] = ff_dca_lfe_fir0_sse;
s->lfe_fir[1] = ff_dca_lfe_fir1_sse;
}
- if (EXTERNAL_SSE2(cpu_flags)) {
- s->decode_hf = ff_decode_hf_sse2;
- }
-
- if (EXTERNAL_SSE4(cpu_flags)) {
- s->decode_hf = ff_decode_hf_sse4;
- }
-
if (EXTERNAL_FMA3(cpu_flags)) {
s->lfe_fir[0] = ff_dca_lfe_fir0_fma3;
}