diff options
-rw-r--r-- | libavcodec/arm/dcadsp_init_arm.c | 23 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_neon.S | 18 | ||||
-rw-r--r-- | libavcodec/arm/dcadsp_vfp.S | 32 | ||||
-rw-r--r-- | libavcodec/dcadec.c | 10 | ||||
-rw-r--r-- | libavcodec/dcadsp.c | 20 | ||||
-rw-r--r-- | libavcodec/dcadsp.h | 4 |
6 files changed, 64 insertions, 43 deletions
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c index d49a1765f6..2ea12895de 100644 --- a/libavcodec/arm/dcadsp_init_arm.c +++ b/libavcodec/arm/dcadsp_init_arm.c @@ -24,16 +24,22 @@ #include "libavutil/attributes.h" #include "libavcodec/dcadsp.h" -void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, - int decifactor, float scale); +void ff_dca_lfe_fir0_neon(float *out, const float *in, const float *coefs, + float scale); +void ff_dca_lfe_fir1_neon(float *out, const float *in, const float *coefs, + float scale); + +void ff_dca_lfe_fir32_vfp(float *out, const float *in, const float *coefs, + float scale); +void ff_dca_lfe_fir64_vfp(float *out, const float *in, const float *coefs, + float scale); + void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, SynthFilterContext *synth, FFTContext *imdct, float synth_buf_ptr[512], int *synth_buf_offset, float synth_buf2[32], const float window[512], float *samples_out, float raXin[32], float scale); -void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, - int decifactor, float scale); void ff_synth_filter_float_vfp(FFTContext *imdct, float *synth_buf_ptr, int *synth_buf_offset, @@ -52,11 +58,14 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) int cpu_flags = av_get_cpu_flags(); if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { - s->lfe_fir = ff_dca_lfe_fir_vfp; + s->lfe_fir[0] = ff_dca_lfe_fir32_vfp; + s->lfe_fir[1] = ff_dca_lfe_fir64_vfp; s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; } - if (have_neon(cpu_flags)) - s->lfe_fir = ff_dca_lfe_fir_neon; + if (have_neon(cpu_flags)) { + s->lfe_fir[0] = ff_dca_lfe_fir0_neon; + s->lfe_fir[1] = ff_dca_lfe_fir1_neon; + } } av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) diff --git a/libavcodec/arm/dcadsp_neon.S b/libavcodec/arm/dcadsp_neon.S index fe3aae801a..c798fea7f7 100644 --- a/libavcodec/arm/dcadsp_neon.S +++ b/libavcodec/arm/dcadsp_neon.S @@ -20,17 +20,23 @@ #include "libavutil/arm/asm.S" -function ff_dca_lfe_fir_neon, export=1 +function ff_dca_lfe_fir0_neon, export=1 push {r4-r6,lr} +NOVFP vmov s0, r3 @ scale + mov r3, #32 @ decifactor + mov r6, #256/32 + b dca_lfe_fir +endfunc +function ff_dca_lfe_fir1_neon, export=1 + push {r4-r6,lr} +NOVFP vmov s0, r3 @ scale + mov r3, #64 @ decifactor + mov r6, #256/64 +dca_lfe_fir: add r4, r0, r3, lsl #2 @ out2 add r5, r2, #256*4-16 @ cf1 sub r1, r1, #12 - cmp r3, #32 - ite eq - moveq r6, #256/32 - movne r6, #256/64 -NOVFP vldr s0, [sp, #16] @ scale mov lr, #-16 1: vmov.f32 q2, #0.0 @ v0 diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S index 5892a84342..edabc29e23 100644 --- a/libavcodec/arm/dcadsp_vfp.S +++ b/libavcodec/arm/dcadsp_vfp.S @@ -24,7 +24,6 @@ POUT .req a1 PIN .req a2 PCOEF .req a3 -DECIFACTOR .req a4 OLDFPSCR .req a4 COUNTER .req ip @@ -129,6 +128,15 @@ POST3 .req s27 .endm .macro dca_lfe_fir decifactor +function ff_dca_lfe_fir\decifactor\()_vfp, export=1 +NOVFP vmov s0, r3 + fmrx OLDFPSCR, FPSCR + ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 + fmxr FPSCR, ip + vldr IN0, [PIN, #-0*4] + vldr IN1, [PIN, #-1*4] + vldr IN2, [PIN, #-2*4] + vldr IN3, [PIN, #-3*4] .if \decifactor == 32 .set JMAX, 8 vpush {s16-s31} @@ -165,32 +173,16 @@ POST3 .req s27 .endif fmxr FPSCR, OLDFPSCR bx lr +endfunc .endm - -/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, - * int decifactor, float scale) - */ -function ff_dca_lfe_fir_vfp, export=1 - teq DECIFACTOR, #32 - fmrx OLDFPSCR, FPSCR - ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 - fmxr FPSCR, ip -NOVFP vldr s0, [sp] - vldr IN0, [PIN, #-0*4] - vldr IN1, [PIN, #-1*4] - vldr IN2, [PIN, #-2*4] - vldr IN3, [PIN, #-3*4] - beq 32f -64: dca_lfe_fir 64 + dca_lfe_fir 64 .ltorg -32: dca_lfe_fir 32 -endfunc + dca_lfe_fir 32 .unreq POUT .unreq PIN .unreq PCOEF - .unreq DECIFACTOR .unreq OLDFPSCR .unreq COUNTER diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c index 6ffb040aaa..723ed191dc 100644 --- a/libavcodec/dcadec.c +++ b/libavcodec/dcadec.c @@ -957,23 +957,23 @@ static void lfe_interpolation_fir(DCAContext *s, int decimation_select, * samples_out: An array holding interpolated samples */ - int decifactor; + int idx; const float *prCoeff; int deciindex; /* Select decimation filter */ if (decimation_select == 1) { - decifactor = 64; + idx = 1; prCoeff = lfe_fir_128; } else { - decifactor = 32; + idx = 0; prCoeff = lfe_fir_64; } /* Interpolation */ for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { - s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, scale); + s->dcadsp.lfe_fir[idx](samples_out, samples_in, prCoeff, scale); samples_in++; - samples_out += 2 * decifactor; + samples_out += 2 * 32 * (1 + idx); } } diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c index 148f6dd607..8d242c5959 100644 --- a/libavcodec/dcadsp.c +++ b/libavcodec/dcadsp.c @@ -32,8 +32,9 @@ static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale) dst[i] = src[i] * fscale; } -static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, - int decifactor, float scale) +static inline void +dca_lfe_fir(float *out, const float *in, const float *coefs, + int decifactor, float scale) { float *out2 = out + decifactor; const float *cf0 = coefs; @@ -82,9 +83,22 @@ static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act, } } +static void dca_lfe_fir0_c(float *out, const float *in, const float *coefs, + float scale) +{ + dca_lfe_fir(out, in, coefs, 32, scale); +} + +static void dca_lfe_fir1_c(float *out, const float *in, const float *coefs, + float scale) +{ + dca_lfe_fir(out, in, coefs, 64, scale); +} + av_cold void ff_dcadsp_init(DCADSPContext *s) { - s->lfe_fir = dca_lfe_fir_c; + s->lfe_fir[0] = dca_lfe_fir0_c; + s->lfe_fir[1] = dca_lfe_fir1_c; s->qmf_32_subbands = dca_qmf_32_subbands; s->int8x8_fmul_int32 = int8x8_fmul_int32_c; if (ARCH_ARM) ff_dcadsp_init_arm(s); diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h index e2ad09adf6..3e04426a80 100644 --- a/libavcodec/dcadsp.h +++ b/libavcodec/dcadsp.h @@ -23,8 +23,8 @@ #include "synth_filter.h" typedef struct DCADSPContext { - void (*lfe_fir)(float *out, const float *in, const float *coefs, - int decifactor, float scale); + void (*lfe_fir[2])(float *out, const float *in, const float *coefs, + float scale); void (*qmf_32_subbands)(float samples_in[32][8], int sb_act, SynthFilterContext *synth, FFTContext *imdct, float synth_buf_ptr[512], |