diff options
-rw-r--r-- | libavcodec/ac3dec.c | 46 | ||||
-rw-r--r-- | libavcodec/ac3dec.h | 2 | ||||
-rw-r--r-- | libavcodec/ac3dec_fixed.c | 8 | ||||
-rw-r--r-- | libavcodec/ac3dsp.c | 16 | ||||
-rw-r--r-- | libavcodec/ac3dsp.h | 4 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp_init.c | 36 |
6 files changed, 64 insertions, 48 deletions
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c index 003ce17a16..f9bab94db8 100644 --- a/libavcodec/ac3dec.c +++ b/libavcodec/ac3dec.c @@ -362,52 +362,62 @@ static int parse_frame_header(AC3DecodeContext *s) * Set stereo downmixing coefficients based on frame header info. * reference: Section 7.8.2 Downmixing Into Two Channels */ -static void set_downmix_coeffs(AC3DecodeContext *s) +static int set_downmix_coeffs(AC3DecodeContext *s) { int i; float cmix = gain_levels[s-> center_mix_level]; float smix = gain_levels[s->surround_mix_level]; float norm0, norm1; - float downmix_coeffs[AC3_MAX_CHANNELS][2]; + float downmix_coeffs[2][AC3_MAX_CHANNELS]; + + if (!s->downmix_coeffs[0]) { + s->downmix_coeffs[0] = av_malloc_array(2 * AC3_MAX_CHANNELS, + sizeof(**s->downmix_coeffs)); + if (!s->downmix_coeffs[0]) + return AVERROR(ENOMEM); + s->downmix_coeffs[1] = s->downmix_coeffs[0] + AC3_MAX_CHANNELS; + } for (i = 0; i < s->fbw_channels; i++) { - downmix_coeffs[i][0] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]]; - downmix_coeffs[i][1] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]]; + downmix_coeffs[0][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][0]]; + downmix_coeffs[1][i] = gain_levels[ac3_default_coeffs[s->channel_mode][i][1]]; } if (s->channel_mode > 1 && s->channel_mode & 1) { - downmix_coeffs[1][0] = downmix_coeffs[1][1] = cmix; + downmix_coeffs[0][1] = downmix_coeffs[1][1] = cmix; } if (s->channel_mode == AC3_CHMODE_2F1R || s->channel_mode == AC3_CHMODE_3F1R) { int nf = s->channel_mode - 2; - downmix_coeffs[nf][0] = downmix_coeffs[nf][1] = smix * LEVEL_MINUS_3DB; + downmix_coeffs[0][nf] = downmix_coeffs[1][nf] = smix * LEVEL_MINUS_3DB; } if (s->channel_mode == AC3_CHMODE_2F2R || s->channel_mode == AC3_CHMODE_3F2R) { int nf = s->channel_mode - 4; - downmix_coeffs[nf][0] = downmix_coeffs[nf+1][1] = smix; + downmix_coeffs[0][nf] = downmix_coeffs[1][nf+1] = smix; } /* renormalize */ norm0 = norm1 = 0.0; for (i = 0; i < s->fbw_channels; i++) { - norm0 += downmix_coeffs[i][0]; - norm1 += downmix_coeffs[i][1]; + norm0 += downmix_coeffs[0][i]; + norm1 += downmix_coeffs[1][i]; } norm0 = 1.0f / norm0; norm1 = 1.0f / norm1; for (i = 0; i < s->fbw_channels; i++) { - downmix_coeffs[i][0] *= norm0; - downmix_coeffs[i][1] *= norm1; + downmix_coeffs[0][i] *= norm0; + downmix_coeffs[1][i] *= norm1; } if (s->output_mode == AC3_CHMODE_MONO) { for (i = 0; i < s->fbw_channels; i++) - downmix_coeffs[i][0] = (downmix_coeffs[i][0] + - downmix_coeffs[i][1]) * LEVEL_MINUS_3DB; + downmix_coeffs[0][i] = (downmix_coeffs[0][i] + + downmix_coeffs[1][i]) * LEVEL_MINUS_3DB; } for (i = 0; i < s->fbw_channels; i++) { - s->downmix_coeffs[i][0] = FIXR12(downmix_coeffs[i][0]); - s->downmix_coeffs[i][1] = FIXR12(downmix_coeffs[i][1]); + s->downmix_coeffs[0][i] = FIXR12(downmix_coeffs[0][i]); + s->downmix_coeffs[1][i] = FIXR12(downmix_coeffs[1][i]); } + + return 0; } /** @@ -1562,7 +1572,10 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, /* set downmixing coefficients if needed */ if (s->channels != s->out_channels && !((s->output_mode & AC3_OUTPUT_LFEON) && s->fbw_channels == s->out_channels)) { - set_downmix_coeffs(s); + if ((ret = set_downmix_coeffs(s)) < 0) { + av_log(avctx, AV_LOG_ERROR, "error setting downmix coeffs\n"); + return ret; + } } } else if (!s->channels) { av_log(avctx, AV_LOG_ERROR, "unable to determine channel mode\n"); @@ -1685,6 +1698,7 @@ static av_cold int ac3_decode_end(AVCodecContext *avctx) ff_mdct_end(&s->imdct_512); ff_mdct_end(&s->imdct_256); av_freep(&s->fdsp); + av_freep(&s->downmix_coeffs[0]); return 0; } diff --git a/libavcodec/ac3dec.h b/libavcodec/ac3dec.h index 495e9a69a1..bac661c167 100644 --- a/libavcodec/ac3dec.h +++ b/libavcodec/ac3dec.h @@ -159,7 +159,7 @@ typedef struct AC3DecodeContext { int fbw_channels; ///< number of full-bandwidth channels int channels; ///< number of total channels int lfe_ch; ///< index of LFE channel - SHORTFLOAT downmix_coeffs[AC3_MAX_CHANNELS][2]; ///< stereo downmix coefficients + SHORTFLOAT *downmix_coeffs[2]; ///< stereo downmix coefficients int downmixed; ///< indicates if coeffs are currently downmixed int output_mode; ///< output channel configuration int out_channels; ///< number of output channels diff --git a/libavcodec/ac3dec_fixed.c b/libavcodec/ac3dec_fixed.c index 1f79adee34..682fe935b0 100644 --- a/libavcodec/ac3dec_fixed.c +++ b/libavcodec/ac3dec_fixed.c @@ -139,7 +139,7 @@ static void scale_coefs ( * Downmix samples from original signal to stereo or mono (this is for 16-bit samples * and fixed point decoder - original (for 32-bit samples) is in ac3dsp.c). */ -static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2], +static void ac3_downmix_c_fixed16(int16_t **samples, int16_t **matrix, int out_ch, int in_ch, int len) { int i, j; @@ -148,8 +148,8 @@ static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2], for (i = 0; i < len; i++) { v0 = v1 = 0; for (j = 0; j < in_ch; j++) { - v0 += samples[j][i] * matrix[j][0]; - v1 += samples[j][i] * matrix[j][1]; + v0 += samples[j][i] * matrix[0][j]; + v1 += samples[j][i] * matrix[1][j]; } samples[0][i] = (v0+2048)>>12; samples[1][i] = (v1+2048)>>12; @@ -158,7 +158,7 @@ static void ac3_downmix_c_fixed16(int16_t **samples, int16_t (*matrix)[2], for (i = 0; i < len; i++) { v0 = 0; for (j = 0; j < in_ch; j++) - v0 += samples[j][i] * matrix[j][0]; + v0 += samples[j][i] * matrix[0][j]; samples[0][i] = (v0+2048)>>12; } } diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c index 9902f905f0..23abc5685e 100644 --- a/libavcodec/ac3dsp.c +++ b/libavcodec/ac3dsp.c @@ -213,7 +213,7 @@ static void ac3_sum_square_butterfly_float_c(float sum[4], } } -static void ac3_downmix_c(float **samples, float (*matrix)[2], +static void ac3_downmix_c(float **samples, float **matrix, int out_ch, int in_ch, int len) { int i, j; @@ -222,8 +222,8 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2], for (i = 0; i < len; i++) { v0 = v1 = 0.0f; for (j = 0; j < in_ch; j++) { - v0 += samples[j][i] * matrix[j][0]; - v1 += samples[j][i] * matrix[j][1]; + v0 += samples[j][i] * matrix[0][j]; + v1 += samples[j][i] * matrix[1][j]; } samples[0][i] = v0; samples[1][i] = v1; @@ -232,13 +232,13 @@ static void ac3_downmix_c(float **samples, float (*matrix)[2], for (i = 0; i < len; i++) { v0 = 0.0f; for (j = 0; j < in_ch; j++) - v0 += samples[j][i] * matrix[j][0]; + v0 += samples[j][i] * matrix[0][j]; samples[0][i] = v0; } } } -static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2], +static void ac3_downmix_c_fixed(int32_t **samples, int16_t **matrix, int out_ch, int in_ch, int len) { int i, j; @@ -247,8 +247,8 @@ static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2], for (i = 0; i < len; i++) { v0 = v1 = 0; for (j = 0; j < in_ch; j++) { - v0 += (int64_t)samples[j][i] * matrix[j][0]; - v1 += (int64_t)samples[j][i] * matrix[j][1]; + v0 += (int64_t)samples[j][i] * matrix[0][j]; + v1 += (int64_t)samples[j][i] * matrix[1][j]; } samples[0][i] = (v0+2048)>>12; samples[1][i] = (v1+2048)>>12; @@ -257,7 +257,7 @@ static void ac3_downmix_c_fixed(int32_t **samples, int16_t (*matrix)[2], for (i = 0; i < len; i++) { v0 = 0; for (j = 0; j < in_ch; j++) - v0 += (int64_t)samples[j][i] * matrix[j][0]; + v0 += (int64_t)samples[j][i] * matrix[0][j]; samples[0][i] = (v0+2048)>>12; } } diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h index ed98c8ce6a..b4de307053 100644 --- a/libavcodec/ac3dsp.h +++ b/libavcodec/ac3dsp.h @@ -132,10 +132,10 @@ typedef struct AC3DSPContext { void (*sum_square_butterfly_float)(float sum[4], const float *coef0, const float *coef1, int len); - void (*downmix)(float **samples, float (*matrix)[2], int out_ch, + void (*downmix)(float **samples, float **matrix, int out_ch, int in_ch, int len); - void (*downmix_fixed)(int32_t **samples, int16_t (*matrix)[2], int out_ch, + void (*downmix_fixed)(int32_t **samples, int16_t **matrix, int out_ch, int in_ch, int len); /** diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c index 9fd0aef83e..edb6c60e95 100644 --- a/libavcodec/x86/ac3dsp_init.c +++ b/libavcodec/x86/ac3dsp_init.c @@ -76,8 +76,8 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, #define MIX5(mono, stereo) \ __asm__ volatile ( \ "movss 0(%1), %%xmm5 \n" \ - "movss 8(%1), %%xmm6 \n" \ - "movss 24(%1), %%xmm7 \n" \ + "movss 4(%1), %%xmm6 \n" \ + "movss 12(%1), %%xmm7 \n" \ "shufps $0, %%xmm5, %%xmm5 \n" \ "shufps $0, %%xmm6, %%xmm6 \n" \ "shufps $0, %%xmm7, %%xmm7 \n" \ @@ -102,7 +102,7 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, "add $16, %0 \n" \ "jl 1b \n" \ : "+&r"(i) \ - : "r"(matrix), \ + : "r"(matrix[0]), \ "r"(samples[0] + len), \ "r"(samples[1] + len), \ "r"(samples[2] + len), \ @@ -146,22 +146,22 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input, : "memory" \ ); -static void ac3_downmix_sse(float **samples, float (*matrix)[2], +static void ac3_downmix_sse(float **samples, float **matrix, int out_ch, int in_ch, int len) { - int (*matrix_cmp)[2] = (int(*)[2])matrix; + int **matrix_cmp = (int **)matrix; intptr_t i, j, k, m; i = -len * sizeof(float); if (in_ch == 5 && out_ch == 2 && - !(matrix_cmp[0][1] | matrix_cmp[2][0] | - matrix_cmp[3][1] | matrix_cmp[4][0] | - (matrix_cmp[1][0] ^ matrix_cmp[1][1]) | - (matrix_cmp[0][0] ^ matrix_cmp[2][1]))) { + !(matrix_cmp[1][0] | matrix_cmp[0][2] | + matrix_cmp[1][3] | matrix_cmp[0][4] | + (matrix_cmp[0][1] ^ matrix_cmp[1][1]) | + (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) { MIX5(IF0, IF1); } else if (in_ch == 5 && out_ch == 1 && - matrix_cmp[0][0] == matrix_cmp[2][0] && - matrix_cmp[3][0] == matrix_cmp[4][0]) { + matrix_cmp[0][0] == matrix_cmp[0][2] && + matrix_cmp[0][3] == matrix_cmp[0][4]) { MIX5(IF1, IF0); } else { LOCAL_ALIGNED(16, float, matrix_simd, [AC3_MAX_CHANNELS], [2][4]); @@ -171,18 +171,20 @@ static void ac3_downmix_sse(float **samples, float (*matrix)[2], samp[j] = samples[j] + len; j = 2 * in_ch * sizeof(float); + k = in_ch * sizeof(float); __asm__ volatile ( "1: \n" + "sub $4, %1 \n" "sub $8, %0 \n" - "movss (%2, %0), %%xmm4 \n" - "movss 4(%2, %0), %%xmm5 \n" + "movss (%3, %1), %%xmm4 \n" + "movss (%4, %1), %%xmm5 \n" "shufps $0, %%xmm4, %%xmm4 \n" "shufps $0, %%xmm5, %%xmm5 \n" - "movaps %%xmm4, (%1, %0, 4) \n" - "movaps %%xmm5, 16(%1, %0, 4) \n" + "movaps %%xmm4, (%2, %0, 4) \n" + "movaps %%xmm5, 16(%2, %0, 4) \n" "jg 1b \n" - : "+&r"(j) - : "r"(matrix_simd), "r"(matrix) + : "+&r"(j), "+&r"(k) + : "r"(matrix_simd), "r"(matrix[0]), "r"(matrix[1]) : "memory" ); if (out_ch == 2) { |