aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--libavcodec/arm/dcadsp_init_arm.c23
-rw-r--r--libavcodec/arm/dcadsp_neon.S18
-rw-r--r--libavcodec/arm/dcadsp_vfp.S32
-rw-r--r--libavcodec/dcadec.c10
-rw-r--r--libavcodec/dcadsp.c20
-rw-r--r--libavcodec/dcadsp.h4
6 files changed, 64 insertions, 43 deletions
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
index d49a1765f6..2ea12895de 100644
--- a/libavcodec/arm/dcadsp_init_arm.c
+++ b/libavcodec/arm/dcadsp_init_arm.c
@@ -24,16 +24,22 @@
#include "libavutil/attributes.h"
#include "libavcodec/dcadsp.h"
-void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
- int decifactor, float scale);
+void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs,
+ float scale);
+void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs,
+ float scale);
+
+void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs,
+ float scale);
+void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs,
+ float scale);
+
void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
SynthFilterContext *synth, FFTContext *imdct,
float synth_buf_ptr[512],
int *synth_buf_offset, float synth_buf2[32],
const float window[512], float *samples_out,
float raXin[32], float scale);
-void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
- int decifactor, float scale);
void ff_synth_filter_float_vfp(FFTContext *imdct,
float *synth_buf_ptr, int *synth_buf_offset,
@@ -52,11 +58,14 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
int cpu_flags = av_get_cpu_flags();
if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
- s->lfe_fir = ff_dca_lfe_fir_vfp;
+ s->lfe_fir[0] = ff_dca_lfe_fir32_vfp;
+ s->lfe_fir[1] = ff_dca_lfe_fir64_vfp;
s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
}
- if (have_neon(cpu_flags))
- s->lfe_fir = ff_dca_lfe_fir_neon;
+ if (have_neon(cpu_flags)) {
+ s->lfe_fir[0] = ff_dca_lfe_fir0_neon;
+ s->lfe_fir[1] = ff_dca_lfe_fir1_neon;
+ }
}
av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S
index fe3aae801a..c798fea7f7 100644
--- a/libavcodec/arm/dcadsp_neon.S
+++ b/libavcodec/arm/dcadsp_neon.S
@@ -20,17 +20,23 @@
#include "libavutil/arm/asm.S"
-function ff_dca_lfe_fir_neon, export=1
+function ff_dca_lfe_fir0_neon, export=1
push {r4-r6,lr}
+NOVFP vmov s0, r3 @ scale
+ mov r3, #32 @ decifactor
+ mov r6, #256/32
+ b dca_lfe_fir
+endfunc
+function ff_dca_lfe_fir1_neon, export=1
+ push {r4-r6,lr}
+NOVFP vmov s0, r3 @ scale
+ mov r3, #64 @ decifactor
+ mov r6, #256/64
+dca_lfe_fir:
add r4, r0, r3, lsl #2 @ out2
add r5, r2, #256*4-16 @ cf1
sub r1, r1, #12
- cmp r3, #32
- ite eq
- moveq r6, #256/32
- movne r6, #256/64
-NOVFP vldr s0, [sp, #16] @ scale
mov lr, #-16
1:
vmov.f32 q2, #0.0 @ v0
diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S
index 5892a84342..edabc29e23 100644
--- a/libavcodec/arm/dcadsp_vfp.S
+++ b/libavcodec/arm/dcadsp_vfp.S
@@ -24,7 +24,6 @@
POUT .req a1
PIN .req a2
PCOEF .req a3
-DECIFACTOR .req a4
OLDFPSCR .req a4
COUNTER .req ip
@@ -129,6 +128,15 @@ POST3 .req s27
.endm
.macro dca_lfe_fir decifactor
+function ff_dca_lfe_fir\decifactor\()_vfp, export=1
+NOVFP vmov s0, r3
+ fmrx OLDFPSCR, FPSCR
+ ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
+ fmxr FPSCR, ip
+ vldr IN0, [PIN, #-0*4]
+ vldr IN1, [PIN, #-1*4]
+ vldr IN2, [PIN, #-2*4]
+ vldr IN3, [PIN, #-3*4]
.if \decifactor == 32
.set JMAX, 8
vpush {s16-s31}
@@ -165,32 +173,16 @@ POST3 .req s27
.endif
fmxr FPSCR, OLDFPSCR
bx lr
+endfunc
.endm
-
-/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
- * int decifactor, float scale)
- */
-function ff_dca_lfe_fir_vfp, export=1
- teq DECIFACTOR, #32
- fmrx OLDFPSCR, FPSCR
- ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
- fmxr FPSCR, ip
-NOVFP vldr s0, [sp]
- vldr IN0, [PIN, #-0*4]
- vldr IN1, [PIN, #-1*4]
- vldr IN2, [PIN, #-2*4]
- vldr IN3, [PIN, #-3*4]
- beq 32f
-64: dca_lfe_fir 64
+ dca_lfe_fir 64
.ltorg
-32: dca_lfe_fir 32
-endfunc
+ dca_lfe_fir 32
.unreq POUT
.unreq PIN
.unreq PCOEF
- .unreq DECIFACTOR
.unreq OLDFPSCR
.unreq COUNTER
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
index 6ffb040aaa..723ed191dc 100644
--- a/libavcodec/dcadec.c
+++ b/libavcodec/dcadec.c
@@ -957,23 +957,23 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
* samples_out: An array holding interpolated samples
*/
- int decifactor;
+ int idx;
const float *prCoeff;
int deciindex;
/* Select decimation filter */
if (decimation_select == 1) {
- decifactor = 64;
+ idx = 1;
prCoeff = lfe_fir_128;
} else {
- decifactor = 32;
+ idx = 0;
prCoeff = lfe_fir_64;
}
/* Interpolation */
for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
- s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, scale);
+ s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff, scale);
samples_in++;
- samples_out += 2 * decifactor;
+ samples_out += 2 * 32 * (1 + idx);
}
}
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
index 148f6dd607..8d242c5959 100644
--- a/libavcodec/dcadsp.c
+++ b/libavcodec/dcadsp.c
@@ -32,8 +32,9 @@ static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale)
dst[i] = src[i] * fscale;
}
-static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
- int decifactor, float scale)
+static inline void
+dca_lfe_fir(float *out, const float *in, const float *coefs,
+ int decifactor, float scale)
{
float *out2 = out + decifactor;
const float *cf0 = coefs;
@@ -82,9 +83,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act,
}
}
+static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs,
+ float scale)
+{
+ dca_lfe_fir(out, in, coefs, 32, scale);
+}
+
+static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs,
+ float scale)
+{
+ dca_lfe_fir(out, in, coefs, 64, scale);
+}
+
av_cold void ff_dcadsp_init(DCADSPContext *s)
{
- s->lfe_fir = dca_lfe_fir_c;
+ s->lfe_fir[0] = dca_lfe_fir0_c;
+ s->lfe_fir[1] = dca_lfe_fir1_c;
s->qmf_32_subbands = dca_qmf_32_subbands;
s->int8x8_fmul_int32 = int8x8_fmul_int32_c;
if (ARCH_ARM) ff_dcadsp_init_arm(s);
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
index e2ad09adf6..3e04426a80 100644
--- a/libavcodec/dcadsp.h
+++ b/libavcodec/dcadsp.h
@@ -23,8 +23,8 @@
#include "synth_filter.h"
typedef struct DCADSPContext {
- void (*lfe_fir)(float *out, const float *in, const float *coefs,
- int decifactor, float scale);
+ void (*lfe_fir[2])(float *out, const float *in, const float *coefs,
+ float scale);
void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
SynthFilterContext *synth, FFTContext *imdct,
float synth_buf_ptr[512],