diff options
Diffstat (limited to 'libswresample')
42 files changed, 8219 insertions, 0 deletions
diff --git a/libswresample/Makefile b/libswresample/Makefile new file mode 100644 index 0000000000..120ee3385d --- /dev/null +++ b/libswresample/Makefile @@ -0,0 +1,24 @@ +include $(SUBDIR)../config.mak + +NAME = swresample +FFLIBS = avutil + +HEADERS = swresample.h \ + version.h \ + +OBJS = audioconvert.o \ + dither.o \ + options.o \ + rematrix.o \ + resample.o \ + resample_dsp.o \ + swresample.o \ + swresample_frame.o \ + +OBJS-$(CONFIG_LIBSOXR) += soxr_resample.o +OBJS-$(CONFIG_SHARED) += log2_tab.o + +# Windows resource file +SLIBOBJS-$(HAVE_GNU_WINDRES) += swresampleres.o + +TESTPROGS = swresample diff --git a/libswresample/aarch64/Makefile b/libswresample/aarch64/Makefile new file mode 100644 index 0000000000..320ed67e82 --- /dev/null +++ b/libswresample/aarch64/Makefile @@ -0,0 +1,5 @@ +OBJS += aarch64/audio_convert_init.o + +OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o + +NEON-OBJS += aarch64/audio_convert_neon.o diff --git a/libswresample/aarch64/audio_convert_init.c b/libswresample/aarch64/audio_convert_init.c new file mode 100644 index 0000000000..60e24adb1c --- /dev/null +++ b/libswresample/aarch64/audio_convert_init.c @@ -0,0 +1,67 @@ +/* + * This file is part of libswresample. + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/aarch64/cpu.h" +#include "libavutil/samplefmt.h" +#include "libswresample/swresample_internal.h" +#include "libswresample/audioconvert.h" + +void swri_oldapi_conv_flt_to_s16_neon(int16_t *dst, const float *src, int len); +void swri_oldapi_conv_fltp_to_s16_2ch_neon(int16_t *dst, float *const *src, int len, int channels); +void swri_oldapi_conv_fltp_to_s16_nch_neon(int16_t *dst, float *const *src, int len, int channels); + +static void conv_flt_to_s16_neon(uint8_t **dst, const uint8_t **src, int len){ + swri_oldapi_conv_flt_to_s16_neon((int16_t*)*dst, (const float*)*src, len); +} + +static void conv_fltp_to_s16_2ch_neon(uint8_t **dst, const uint8_t **src, int len){ + swri_oldapi_conv_fltp_to_s16_2ch_neon((int16_t*)*dst, (float *const*)src, len, 2); +} + +static void conv_fltp_to_s16_nch_neon(uint8_t **dst, const uint8_t **src, int len){ + int channels; + for(channels=3; channels<SWR_CH_MAX && src[channels]; channels++) + ; + swri_oldapi_conv_fltp_to_s16_nch_neon((int16_t*)*dst, (float *const*)src, len, channels); +} + +av_cold void swri_audio_convert_init_aarch64(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels) +{ + int cpu_flags = av_get_cpu_flags(); + + ac->simd_f= NULL; + + if (have_neon(cpu_flags)) { + if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = conv_flt_to_s16_neon; + if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels == 2) + ac->simd_f = conv_fltp_to_s16_2ch_neon; + if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels > 2) + ac->simd_f = conv_fltp_to_s16_nch_neon; + if(ac->simd_f) + ac->in_simd_align_mask = ac->out_simd_align_mask = 15; + } +} diff --git a/libswresample/aarch64/audio_convert_neon.S b/libswresample/aarch64/audio_convert_neon.S new file mode 100644 index 0000000000..74feff448a --- /dev/null +++ b/libswresample/aarch64/audio_convert_neon.S @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/aarch64/asm.S" + +function swri_oldapi_conv_flt_to_s16_neon, export=1 + subs x2, x2, #8 + ld1 {v0.4s}, [x1], #16 + fcvtzs v4.4s, v0.4s, #31 + ld1 {v1.4s}, [x1], #16 + fcvtzs v5.4s, v1.4s, #31 + b.eq 3f + ands x12, x2, #~15 + b.eq 2f +1: subs x12, x12, #16 + sqrshrn v4.4h, v4.4s, #16 + ld1 {v2.4s}, [x1], #16 + fcvtzs v6.4s, v2.4s, #31 + sqrshrn2 v4.8h, v5.4s, #16 + ld1 {v3.4s}, [x1], #16 + fcvtzs v7.4s, v3.4s, #31 + sqrshrn v6.4h, v6.4s, #16 + st1 {v4.8h}, [x0], #16 + sqrshrn2 v6.8h, v7.4s, #16 + ld1 {v0.4s}, [x1], #16 + fcvtzs v4.4s, v0.4s, #31 + ld1 {v1.4s}, [x1], #16 + fcvtzs v5.4s, v1.4s, #31 + st1 {v6.8h}, [x0], #16 + b.ne 1b + ands x2, x2, #15 + b.eq 3f +2: ld1 {v2.4s}, [x1], #16 + sqrshrn v4.4h, v4.4s, #16 + fcvtzs v6.4s, v2.4s, #31 + ld1 {v3.4s}, [x1], #16 + sqrshrn2 v4.8h, v5.4s, #16 + fcvtzs v7.4s, v3.4s, #31 + sqrshrn v6.4h, v6.4s, #16 + st1 {v4.8h}, [x0], #16 + sqrshrn2 v6.8h, v7.4s, #16 + st1 {v6.8h}, [x0] + ret +3: sqrshrn v4.4h, v4.4s, #16 + sqrshrn2 v4.8h, v5.4s, #16 + st1 {v4.8h}, [x0] + ret +endfunc + +function swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1 + ldp x4, x5, [x1] + subs x2, x2, #8 + ld1 {v0.4s}, [x4], #16 + fcvtzs v4.4s, v0.4s, #31 + ld1 {v1.4s}, [x4], #16 + fcvtzs v5.4s, v1.4s, #31 + ld1 {v2.4s}, [x5], #16 + fcvtzs v6.4s, v2.4s, #31 + ld1 {v3.4s}, [x5], #16 + fcvtzs v7.4s, v3.4s, #31 + b.eq 3f + ands x12, x2, #~15 + b.eq 2f +1: subs x12, x12, #16 + ld1 {v16.4s}, [x4], #16 + fcvtzs v20.4s, v16.4s, #31 + sri v6.4s, v4.4s, #16 + ld1 {v17.4s}, [x4], #16 + fcvtzs v21.4s, v17.4s, #31 + ld1 {v18.4s}, [x5], #16 + fcvtzs v22.4s, v18.4s, #31 + ld1 {v19.4s}, [x5], #16 + sri v7.4s, v5.4s, #16 + st1 {v6.4s}, [x0], #16 + fcvtzs v23.4s, v19.4s, #31 + st1 {v7.4s}, [x0], #16 + sri v22.4s, v20.4s, #16 + ld1 {v0.4s}, [x4], #16 + sri v23.4s, v21.4s, #16 + st1 {v22.4s}, [x0], #16 + fcvtzs v4.4s, v0.4s, #31 + ld1 {v1.4s}, [x4], #16 + fcvtzs v5.4s, v1.4s, #31 + ld1 {v2.4s}, [x5], #16 + fcvtzs v6.4s, v2.4s, #31 + ld1 {v3.4s}, [x5], #16 + fcvtzs v7.4s, v3.4s, #31 + st1 {v23.4s}, [x0], #16 + b.ne 1b + ands x2, x2, #15 + b.eq 3f +2: sri v6.4s, v4.4s, #16 + ld1 {v0.4s}, [x4], #16 + fcvtzs v0.4s, v0.4s, #31 + ld1 {v1.4s}, [x4], #16 + fcvtzs v1.4s, v1.4s, #31 + ld1 {v2.4s}, [x5], #16 + fcvtzs v2.4s, v2.4s, #31 + sri v7.4s, v5.4s, #16 + ld1 {v3.4s}, [x5], #16 + fcvtzs v3.4s, v3.4s, #31 + sri v2.4s, v0.4s, #16 + st1 {v6.4s,v7.4s}, [x0], #32 + sri v3.4s, v1.4s, #16 + st1 {v2.4s,v3.4s}, [x0], #32 + ret +3: sri v6.4s, v4.4s, #16 + sri v7.4s, v5.4s, #16 + st1 {v6.4s,v7.4s}, [x0] + ret +endfunc + +function swri_oldapi_conv_fltp_to_s16_nch_neon, export=1 + cmp w3, #2 + b.eq X(swri_oldapi_conv_fltp_to_s16_2ch_neon) + b.gt 1f + ldr x1, [x1] + b X(swri_oldapi_conv_flt_to_s16_neon) +1: + cmp w3, #4 + lsl x12, x3, #1 + b.lt 4f + +5: // 4 channels + ldp x4, x5, [x1], #16 + ldp x6, x7, [x1], #16 + mov w9, w2 + mov x8, x0 + ld1 {v4.4s}, [x4], #16 + fcvtzs v4.4s, v4.4s, #31 + ld1 {v5.4s}, [x5], #16 + fcvtzs v5.4s, v5.4s, #31 + ld1 {v6.4s}, [x6], #16 + fcvtzs v6.4s, v6.4s, #31 + ld1 {v7.4s}, [x7], #16 + fcvtzs v7.4s, v7.4s, #31 +6: + subs w9, w9, #8 + ld1 {v0.4s}, [x4], #16 + fcvtzs v0.4s, v0.4s, #31 + sri v5.4s, v4.4s, #16 + ld1 {v1.4s}, [x5], #16 + fcvtzs v1.4s, v1.4s, #31 + sri v7.4s, v6.4s, #16 + ld1 {v2.4s}, [x6], #16 + fcvtzs v2.4s, v2.4s, #31 + zip1 v16.4s, v5.4s, v7.4s + ld1 {v3.4s}, [x7], #16 + fcvtzs v3.4s, v3.4s, #31 + zip2 v17.4s, v5.4s, v7.4s + st1 {v16.d}[0], [x8], x12 + sri v1.4s, v0.4s, #16 + st1 {v16.d}[1], [x8], x12 + sri v3.4s, v2.4s, #16 + st1 {v17.d}[0], [x8], x12 + zip1 v18.4s, v1.4s, v3.4s + st1 {v17.d}[1], [x8], x12 + zip2 v19.4s, v1.4s, v3.4s + b.eq 7f + ld1 {v4.4s}, [x4], #16 + fcvtzs v4.4s, v4.4s, #31 + st1 {v18.d}[0], [x8], x12 + ld1 {v5.4s}, [x5], #16 + fcvtzs v5.4s, v5.4s, #31 + st1 {v18.d}[1], [x8], x12 + ld1 {v6.4s}, [x6], #16 + fcvtzs v6.4s, v6.4s, #31 + st1 {v19.d}[0], [x8], x12 + ld1 {v7.4s}, [x7], #16 + fcvtzs v7.4s, v7.4s, #31 + st1 {v19.d}[1], [x8], x12 + b 6b +7: + st1 {v18.d}[0], [x8], x12 + st1 {v18.d}[1], [x8], x12 + st1 {v19.d}[0], [x8], x12 + st1 {v19.d}[1], [x8], x12 + subs w3, w3, #4 + b.eq end + cmp w3, #4 + add x0, x0, #8 + b.ge 5b + +4: // 2 channels + cmp w3, #2 + b.lt 4f + ldp x4, x5, [x1], #16 + mov w9, w2 + mov x8, x0 + tst w9, #8 + ld1 {v4.4s}, [x4], #16 + fcvtzs v4.4s, v4.4s, #31 + ld1 {v5.4s}, [x5], #16 + fcvtzs v5.4s, v5.4s, #31 + ld1 {v6.4s}, [x4], #16 + fcvtzs v6.4s, v6.4s, #31 + ld1 {v7.4s}, [x5], #16 + fcvtzs v7.4s, v7.4s, #31 + b.eq 6f + subs w9, w9, #8 + b.eq 7f + sri v5.4s, v4.4s, #16 + ld1 {v4.4s}, [x4], #16 + fcvtzs v4.4s, v4.4s, #31 + st1 {v5.s}[0], [x8], x12 + sri v7.4s, v6.4s, #16 + st1 {v5.s}[1], [x8], x12 + ld1 {v6.4s}, [x4], #16 + fcvtzs v6.4s, v6.4s, #31 + st1 {v5.s}[2], [x8], x12 + st1 {v5.s}[3], [x8], x12 + st1 {v7.s}[0], [x8], x12 + st1 {v7.s}[1], [x8], x12 + ld1 {v5.4s}, [x5], #16 + fcvtzs v5.4s, v5.4s, #31 + st1 {v7.s}[2], [x8], x12 + st1 {v7.s}[3], [x8], x12 + ld1 {v7.4s}, [x5], #16 + fcvtzs v7.4s, v7.4s, #31 +6: + subs w9, w9, #16 + ld1 {v0.4s}, [x4], #16 + sri v5.4s, v4.4s, #16 + fcvtzs v0.4s, v0.4s, #31 + ld1 {v1.4s}, [x5], #16 + sri v7.4s, v6.4s, #16 + st1 {v5.s}[0], [x8], x12 + st1 {v5.s}[1], [x8], x12 + fcvtzs v1.4s, v1.4s, #31 + st1 {v5.s}[2], [x8], x12 + st1 {v5.s}[3], [x8], x12 + ld1 {v2.4s}, [x4], #16 + st1 {v7.s}[0], [x8], x12 + fcvtzs v2.4s, v2.4s, #31 + st1 {v7.s}[1], [x8], x12 + ld1 {v3.4s}, [x5], #16 + st1 {v7.s}[2], [x8], x12 + fcvtzs v3.4s, v3.4s, #31 + st1 {v7.s}[3], [x8], x12 + sri v1.4s, v0.4s, #16 + sri v3.4s, v2.4s, #16 + b.eq 6f + ld1 {v4.4s}, [x4], #16 + st1 {v1.s}[0], [x8], x12 + fcvtzs v4.4s, v4.4s, #31 + st1 {v1.s}[1], [x8], x12 + ld1 {v5.4s}, [x5], #16 + st1 {v1.s}[2], [x8], x12 + fcvtzs v5.4s, v5.4s, #31 + st1 {v1.s}[3], [x8], x12 + ld1 {v6.4s}, [x4], #16 + st1 {v3.s}[0], [x8], x12 + fcvtzs v6.4s, v6.4s, #31 + st1 {v3.s}[1], [x8], x12 + ld1 {v7.4s}, [x5], #16 + st1 {v3.s}[2], [x8], x12 + fcvtzs v7.4s, v7.4s, #31 + st1 {v3.s}[3], [x8], x12 + b.gt 6b +6: + st1 {v1.s}[0], [x8], x12 + st1 {v1.s}[1], [x8], x12 + st1 {v1.s}[2], [x8], x12 + st1 {v1.s}[3], [x8], x12 + st1 {v3.s}[0], [x8], x12 + st1 {v3.s}[1], [x8], x12 + st1 {v3.s}[2], [x8], x12 + st1 {v3.s}[3], [x8], x12 + b 8f +7: + sri v5.4s, v4.4s, #16 + sri v7.4s, v6.4s, #16 + st1 {v5.s}[0], [x8], x12 + st1 {v5.s}[1], [x8], x12 + st1 {v5.s}[2], [x8], x12 + st1 {v5.s}[3], [x8], x12 + st1 {v7.s}[0], [x8], x12 + st1 {v7.s}[1], [x8], x12 + st1 {v7.s}[2], [x8], x12 + st1 {v7.s}[3], [x8], x12 +8: + subs w3, w3, #2 + add x0, x0, #4 + b.eq end + +4: // 1 channel + ldr x4, [x1] + tst w2, #8 + mov w9, w2 + mov x5, x0 + ld1 {v0.4s}, [x4], #16 + fcvtzs v0.4s, v0.4s, #31 + ld1 {v1.4s}, [x4], #16 + fcvtzs v1.4s, v1.4s, #31 + b.ne 8f +6: + subs w9, w9, #16 + ld1 {v2.4s}, [x4], #16 + fcvtzs v2.4s, v2.4s, #31 + ld1 {v3.4s}, [x4], #16 + fcvtzs v3.4s, v3.4s, #31 + st1 {v0.h}[1], [x5], x12 + st1 {v0.h}[3], [x5], x12 + st1 {v0.h}[5], [x5], x12 + st1 {v0.h}[7], [x5], x12 + st1 {v1.h}[1], [x5], x12 + st1 {v1.h}[3], [x5], x12 + st1 {v1.h}[5], [x5], x12 + st1 {v1.h}[7], [x5], x12 + b.eq 7f + ld1 {v0.4s}, [x4], #16 + fcvtzs v0.4s, v0.4s, #31 + ld1 {v1.4s}, [x4], #16 + fcvtzs v1.4s, v1.4s, #31 +7: + st1 {v2.h}[1], [x5], x12 + st1 {v2.h}[3], [x5], x12 + st1 {v2.h}[5], [x5], x12 + st1 {v2.h}[7], [x5], x12 + st1 {v3.h}[1], [x5], x12 + st1 {v3.h}[3], [x5], x12 + st1 {v3.h}[5], [x5], x12 + st1 {v3.h}[7], [x5], x12 + b.gt 6b + ret +8: + subs w9, w9, #8 + st1 {v0.h}[1], [x5], x12 + st1 {v0.h}[3], [x5], x12 + st1 {v0.h}[5], [x5], x12 + st1 {v0.h}[7], [x5], x12 + st1 {v1.h}[1], [x5], x12 + st1 {v1.h}[3], [x5], x12 + st1 {v1.h}[5], [x5], x12 + st1 {v1.h}[7], [x5], x12 + b.eq end + ld1 {v0.4s}, [x4], #16 + fcvtzs v0.4s, v0.4s, #31 + ld1 {v1.4s}, [x4], #16 + fcvtzs v1.4s, v1.4s, #31 + b 6b +end: + ret +endfunc diff --git a/libswresample/aarch64/neontest.c b/libswresample/aarch64/neontest.c new file mode 100644 index 0000000000..85c71bf4c9 --- /dev/null +++ b/libswresample/aarch64/neontest.c @@ -0,0 +1,29 @@ +/* + * check NEON registers for clobbers + * Copyright (c) 2013 Martin Storsjo + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libswresample/swresample.h" +#include "libavutil/aarch64/neontest.h" + +wrap(swr_convert(struct SwrContext *s, uint8_t **out, int out_count, + const uint8_t **in , int in_count)) +{ + testneonclobbers(swr_convert, s, out, out_count, in, in_count); +} diff --git a/libswresample/arm/Makefile b/libswresample/arm/Makefile new file mode 100644 index 0000000000..53ab4626f4 --- /dev/null +++ b/libswresample/arm/Makefile @@ -0,0 +1,8 @@ +OBJS += arm/audio_convert_init.o \ + arm/resample_init.o + + +OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o + +NEON-OBJS += arm/audio_convert_neon.o \ + arm/resample.o diff --git a/libswresample/arm/audio_convert_init.c b/libswresample/arm/audio_convert_init.c new file mode 100644 index 0000000000..ec9e62ede7 --- /dev/null +++ b/libswresample/arm/audio_convert_init.c @@ -0,0 +1,67 @@ +/* + * This file is part of libswresample. + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdint.h> + +#include "config.h" +#include "libavutil/attributes.h" +#include "libavutil/cpu.h" +#include "libavutil/arm/cpu.h" +#include "libavutil/samplefmt.h" +#include "libswresample/swresample_internal.h" +#include "libswresample/audioconvert.h" + +void swri_oldapi_conv_flt_to_s16_neon(int16_t *dst, const float *src, int len); +void swri_oldapi_conv_fltp_to_s16_2ch_neon(int16_t *dst, float *const *src, int len, int channels); +void swri_oldapi_conv_fltp_to_s16_nch_neon(int16_t *dst, float *const *src, int len, int channels); + +static void conv_flt_to_s16_neon(uint8_t **dst, const uint8_t **src, int len){ + swri_oldapi_conv_flt_to_s16_neon((int16_t*)*dst, (const float*)*src, len); +} + +static void conv_fltp_to_s16_2ch_neon(uint8_t **dst, const uint8_t **src, int len){ + swri_oldapi_conv_fltp_to_s16_2ch_neon((int16_t*)*dst, (float *const*)src, len, 2); +} + +static void conv_fltp_to_s16_nch_neon(uint8_t **dst, const uint8_t **src, int len){ + int channels; + for(channels=3; channels<SWR_CH_MAX && src[channels]; channels++) + ; + swri_oldapi_conv_fltp_to_s16_nch_neon((int16_t*)*dst, (float *const*)src, len, channels); +} + +av_cold void swri_audio_convert_init_arm(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels) +{ + int cpu_flags = av_get_cpu_flags(); + + ac->simd_f= NULL; + + if (have_neon(cpu_flags)) { + if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = conv_flt_to_s16_neon; + if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels == 2) + ac->simd_f = conv_fltp_to_s16_2ch_neon; + if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels > 2) + ac->simd_f = conv_fltp_to_s16_nch_neon; + if(ac->simd_f) + ac->in_simd_align_mask = ac->out_simd_align_mask = 15; + } +} diff --git a/libswresample/arm/audio_convert_neon.S b/libswresample/arm/audio_convert_neon.S new file mode 100644 index 0000000000..1f88316dde --- /dev/null +++ b/libswresample/arm/audio_convert_neon.S @@ -0,0 +1,363 @@ +/* + * Copyright (c) 2008 Mans Rullgard <mans@mansr.com> + * + * This file is part of libswresample. + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" +#include "libavutil/arm/asm.S" + +function swri_oldapi_conv_flt_to_s16_neon, export=1 + subs r2, r2, #8 + vld1.32 {q0}, [r1,:128]! + vcvt.s32.f32 q8, q0, #31 + vld1.32 {q1}, [r1,:128]! + vcvt.s32.f32 q9, q1, #31 + beq 3f + bics r12, r2, #15 + beq 2f +1: subs r12, r12, #16 + vqrshrn.s32 d4, q8, #16 + vld1.32 {q0}, [r1,:128]! + vcvt.s32.f32 q0, q0, #31 + vqrshrn.s32 d5, q9, #16 + vld1.32 {q1}, [r1,:128]! + vcvt.s32.f32 q1, q1, #31 + vqrshrn.s32 d6, q0, #16 + vst1.16 {q2}, [r0,:128]! + vqrshrn.s32 d7, q1, #16 + vld1.32 {q8}, [r1,:128]! + vcvt.s32.f32 q8, q8, #31 + vld1.32 {q9}, [r1,:128]! + vcvt.s32.f32 q9, q9, #31 + vst1.16 {q3}, [r0,:128]! + bne 1b + ands r2, r2, #15 + beq 3f +2: vld1.32 {q0}, [r1,:128]! + vqrshrn.s32 d4, q8, #16 + vcvt.s32.f32 q0, q0, #31 + vld1.32 {q1}, [r1,:128]! + vqrshrn.s32 d5, q9, #16 + vcvt.s32.f32 q1, q1, #31 + vqrshrn.s32 d6, q0, #16 + vst1.16 {q2}, [r0,:128]! + vqrshrn.s32 d7, q1, #16 + vst1.16 {q3}, [r0,:128]! + bx lr +3: vqrshrn.s32 d4, q8, #16 + vqrshrn.s32 d5, q9, #16 + vst1.16 {q2}, [r0,:128]! + bx lr +endfunc + +function swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1 + ldm r1, {r1, r3} + subs r2, r2, #8 + vld1.32 {q0}, [r1,:128]! + vcvt.s32.f32 q8, q0, #31 + vld1.32 {q1}, [r1,:128]! + vcvt.s32.f32 q9, q1, #31 + vld1.32 {q10}, [r3,:128]! + vcvt.s32.f32 q10, q10, #31 + vld1.32 {q11}, [r3,:128]! + vcvt.s32.f32 q11, q11, #31 + beq 3f + bics r12, r2, #15 + beq 2f +1: subs r12, r12, #16 + vld1.32 {q0}, [r1,:128]! + vcvt.s32.f32 q0, q0, #31 + vsri.32 q10, q8, #16 + vld1.32 {q1}, [r1,:128]! + vcvt.s32.f32 q1, q1, #31 + vld1.32 {q12}, [r3,:128]! + vcvt.s32.f32 q12, q12, #31 + vld1.32 {q13}, [r3,:128]! + vsri.32 q11, q9, #16 + vst1.16 {q10}, [r0,:128]! + vcvt.s32.f32 q13, q13, #31 + vst1.16 {q11}, [r0,:128]! + vsri.32 q12, q0, #16 + vld1.32 {q8}, [r1,:128]! + vsri.32 q13, q1, #16 + vst1.16 {q12}, [r0,:128]! + vcvt.s32.f32 q8, q8, #31 + vld1.32 {q9}, [r1,:128]! + vcvt.s32.f32 q9, q9, #31 + vld1.32 {q10}, [r3,:128]! + vcvt.s32.f32 q10, q10, #31 + vld1.32 {q11}, [r3,:128]! + vcvt.s32.f32 q11, q11, #31 + vst1.16 {q13}, [r0,:128]! + bne 1b + ands r2, r2, #15 + beq 3f +2: vsri.32 q10, q8, #16 + vld1.32 {q0}, [r1,:128]! + vcvt.s32.f32 q0, q0, #31 + vld1.32 {q1}, [r1,:128]! + vcvt.s32.f32 q1, q1, #31 + vld1.32 {q12}, [r3,:128]! + vcvt.s32.f32 q12, q12, #31 + vsri.32 q11, q9, #16 + vld1.32 {q13}, [r3,:128]! + vcvt.s32.f32 q13, q13, #31 + vst1.16 {q10}, [r0,:128]! + vsri.32 q12, q0, #16 + vst1.16 {q11}, [r0,:128]! + vsri.32 q13, q1, #16 + vst1.16 {q12-q13},[r0,:128]! + bx lr +3: vsri.32 q10, q8, #16 + vsri.32 q11, q9, #16 + vst1.16 {q10-q11},[r0,:128]! + bx lr +endfunc + +function swri_oldapi_conv_fltp_to_s16_nch_neon, export=1 + cmp r3, #2 + itt lt + ldrlt r1, [r1] + blt X(swri_oldapi_conv_flt_to_s16_neon) + beq X(swri_oldapi_conv_fltp_to_s16_2ch_neon) + + push {r4-r8, lr} + cmp r3, #4 + lsl r12, r3, #1 + blt 4f + + @ 4 channels +5: ldm r1!, {r4-r7} + mov lr, r2 + mov r8, r0 + vld1.32 {q8}, [r4,:128]! + vcvt.s32.f32 q8, q8, #31 + vld1.32 {q9}, [r5,:128]! + vcvt.s32.f32 q9, q9, #31 + vld1.32 {q10}, [r6,:128]! + vcvt.s32.f32 q10, q10, #31 + vld1.32 {q11}, [r7,:128]! + vcvt.s32.f32 q11, q11, #31 +6: subs lr, lr, #8 + vld1.32 {q0}, [r4,:128]! + vcvt.s32.f32 q0, q0, #31 + vsri.32 q9, q8, #16 + vld1.32 {q1}, [r5,:128]! + vcvt.s32.f32 q1, q1, #31 + vsri.32 q11, q10, #16 + vld1.32 {q2}, [r6,:128]! + vcvt.s32.f32 q2, q2, #31 + vzip.32 d18, d22 + vld1.32 {q3}, [r7,:128]! + vcvt.s32.f32 q3, q3, #31 + vzip.32 d19, d23 + vst1.16 {d18}, [r8], r12 + vsri.32 q1, q0, #16 + vst1.16 {d22}, [r8], r12 + vsri.32 q3, q2, #16 + vst1.16 {d19}, [r8], r12 + vzip.32 d2, d6 + vst1.16 {d23}, [r8], r12 + vzip.32 d3, d7 + beq 7f + vld1.32 {q8}, [r4,:128]! + vcvt.s32.f32 q8, q8, #31 + vst1.16 {d2}, [r8], r12 + vld1.32 {q9}, [r5,:128]! + vcvt.s32.f32 q9, q9, #31 + vst1.16 {d6}, [r8], r12 + vld1.32 {q10}, [r6,:128]! + vcvt.s32.f32 q10, q10, #31 + vst1.16 {d3}, [r8], r12 + vld1.32 {q11}, [r7,:128]! + vcvt.s32.f32 q11, q11, #31 + vst1.16 {d7}, [r8], r12 + b 6b +7: vst1.16 {d2}, [r8], r12 + vst1.16 {d6}, [r8], r12 + vst1.16 {d3}, [r8], r12 + vst1.16 {d7}, [r8], r12 + subs r3, r3, #4 + it eq + popeq {r4-r8, pc} + cmp r3, #4 + add r0, r0, #8 + bge 5b + + @ 2 channels +4: cmp r3, #2 + blt 4f + ldm r1!, {r4-r5} + mov lr, r2 + mov r8, r0 + tst lr, #8 + vld1.32 {q8}, [r4,:128]! + vcvt.s32.f32 q8, q8, #31 + vld1.32 {q9}, [r5,:128]! + vcvt.s32.f32 q9, q9, #31 + vld1.32 {q10}, [r4,:128]! + vcvt.s32.f32 q10, q10, #31 + vld1.32 {q11}, [r5,:128]! + vcvt.s32.f32 q11, q11, #31 + beq 6f + subs lr, lr, #8 + beq 7f + vsri.32 d18, d16, #16 + vsri.32 d19, d17, #16 + vld1.32 {q8}, [r4,:128]! + vcvt.s32.f32 q8, q8, #31 + vst1.32 {d18[0]}, [r8], r12 + vsri.32 d22, d20, #16 + vst1.32 {d18[1]}, [r8], r12 + vsri.32 d23, d21, #16 + vst1.32 {d19[0]}, [r8], r12 + vst1.32 {d19[1]}, [r8], r12 + vld1.32 {q9}, [r5,:128]! + vcvt.s32.f32 q9, q9, #31 + vst1.32 {d22[0]}, [r8], r12 + vst1.32 {d22[1]}, [r8], r12 + vld1.32 {q10}, [r4,:128]! + vcvt.s32.f32 q10, q10, #31 + vst1.32 {d23[0]}, [r8], r12 + vst1.32 {d23[1]}, [r8], r12 + vld1.32 {q11}, [r5,:128]! + vcvt.s32.f32 q11, q11, #31 +6: subs lr, lr, #16 + vld1.32 {q0}, [r4,:128]! + vcvt.s32.f32 q0, q0, #31 + vsri.32 d18, d16, #16 + vld1.32 {q1}, [r5,:128]! + vcvt.s32.f32 q1, q1, #31 + vsri.32 d19, d17, #16 + vld1.32 {q2}, [r4,:128]! + vcvt.s32.f32 q2, q2, #31 + vld1.32 {q3}, [r5,:128]! + vcvt.s32.f32 q3, q3, #31 + vst1.32 {d18[0]}, [r8], r12 + vsri.32 d22, d20, #16 + vst1.32 {d18[1]}, [r8], r12 + vsri.32 d23, d21, #16 + vst1.32 {d19[0]}, [r8], r12 + vsri.32 d2, d0, #16 + vst1.32 {d19[1]}, [r8], r12 + vsri.32 d3, d1, #16 + vst1.32 {d22[0]}, [r8], r12 + vsri.32 d6, d4, #16 + vst1.32 {d22[1]}, [r8], r12 + vsri.32 d7, d5, #16 + vst1.32 {d23[0]}, [r8], r12 + vst1.32 {d23[1]}, [r8], r12 + beq 6f + vld1.32 {q8}, [r4,:128]! + vcvt.s32.f32 q8, q8, #31 + vst1.32 {d2[0]}, [r8], r12 + vst1.32 {d2[1]}, [r8], r12 + vld1.32 {q9}, [r5,:128]! + vcvt.s32.f32 q9, q9, #31 + vst1.32 {d3[0]}, [r8], r12 + vst1.32 {d3[1]}, [r8], r12 + vld1.32 {q10}, [r4,:128]! + vcvt.s32.f32 q10, q10, #31 + vst1.32 {d6[0]}, [r8], r12 + vst1.32 {d6[1]}, [r8], r12 + vld1.32 {q11}, [r5,:128]! + vcvt.s32.f32 q11, q11, #31 + vst1.32 {d7[0]}, [r8], r12 + vst1.32 {d7[1]}, [r8], r12 + bgt 6b +6: vst1.32 {d2[0]}, [r8], r12 + vst1.32 {d2[1]}, [r8], r12 + vst1.32 {d3[0]}, [r8], r12 + vst1.32 {d3[1]}, [r8], r12 + vst1.32 {d6[0]}, [r8], r12 + vst1.32 {d6[1]}, [r8], r12 + vst1.32 {d7[0]}, [r8], r12 + vst1.32 {d7[1]}, [r8], r12 + b 8f +7: vsri.32 d18, d16, #16 + vsri.32 d19, d17, #16 + vst1.32 {d18[0]}, [r8], r12 + vsri.32 d22, d20, #16 + vst1.32 {d18[1]}, [r8], r12 + vsri.32 d23, d21, #16 + vst1.32 {d19[0]}, [r8], r12 + vst1.32 {d19[1]}, [r8], r12 + vst1.32 {d22[0]}, [r8], r12 + vst1.32 {d22[1]}, [r8], r12 + vst1.32 {d23[0]}, [r8], r12 + vst1.32 {d23[1]}, [r8], r12 +8: subs r3, r3, #2 + add r0, r0, #4 + it eq + popeq {r4-r8, pc} + + @ 1 channel +4: ldr r4, [r1] + tst r2, #8 + mov lr, r2 + mov r5, r0 + vld1.32 {q0}, [r4,:128]! + vcvt.s32.f32 q0, q0, #31 + vld1.32 {q1}, [r4,:128]! + vcvt.s32.f32 q1, q1, #31 + bne 8f +6: subs lr, lr, #16 + vld1.32 {q2}, [r4,:128]! + vcvt.s32.f32 q2, q2, #31 + vld1.32 {q3}, [r4,:128]! + vcvt.s32.f32 q3, q3, #31 + vst1.16 {d0[1]}, [r5,:16], r12 + vst1.16 {d0[3]}, [r5,:16], r12 + vst1.16 {d1[1]}, [r5,:16], r12 + vst1.16 {d1[3]}, [r5,:16], r12 + vst1.16 {d2[1]}, [r5,:16], r12 + vst1.16 {d2[3]}, [r5,:16], r12 + vst1.16 {d3[1]}, [r5,:16], r12 + vst1.16 {d3[3]}, [r5,:16], r12 + beq 7f + vld1.32 {q0}, [r4,:128]! + vcvt.s32.f32 q0, q0, #31 + vld1.32 {q1}, [r4,:128]! + vcvt.s32.f32 q1, q1, #31 +7: vst1.16 {d4[1]}, [r5,:16], r12 + vst1.16 {d4[3]}, [r5,:16], r12 + vst1.16 {d5[1]}, [r5,:16], r12 + vst1.16 {d5[3]}, [r5,:16], r12 + vst1.16 {d6[1]}, [r5,:16], r12 + vst1.16 {d6[3]}, [r5,:16], r12 + vst1.16 {d7[1]}, [r5,:16], r12 + vst1.16 {d7[3]}, [r5,:16], r12 + bgt 6b + pop {r4-r8, pc} +8: subs lr, lr, #8 + vst1.16 {d0[1]}, [r5,:16], r12 + vst1.16 {d0[3]}, [r5,:16], r12 + vst1.16 {d1[1]}, [r5,:16], r12 + vst1.16 {d1[3]}, [r5,:16], r12 + vst1.16 {d2[1]}, [r5,:16], r12 + vst1.16 {d2[3]}, [r5,:16], r12 + vst1.16 {d3[1]}, [r5,:16], r12 + vst1.16 {d3[3]}, [r5,:16], r12 + it eq + popeq {r4-r8, pc} + vld1.32 {q0}, [r4,:128]! + vcvt.s32.f32 q0, q0, #31 + vld1.32 {q1}, [r4,:128]! + vcvt.s32.f32 q1, q1, #31 + b 6b +endfunc diff --git a/libswresample/arm/neontest.c b/libswresample/arm/neontest.c new file mode 100644 index 0000000000..2abbbc2367 --- /dev/null +++ b/libswresample/arm/neontest.c @@ -0,0 +1,29 @@ +/* + * check NEON registers for clobbers + * Copyright (c) 2013 Martin Storsjo + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libswresample/swresample.h" +#include "libavutil/arm/neontest.h" + +wrap(swr_convert(struct SwrContext *s, uint8_t **out, int out_count, + const uint8_t **in , int in_count)) +{ + testneonclobbers(swr_convert, s, out, out_count, in, in_count); +} diff --git a/libswresample/arm/resample.S b/libswresample/arm/resample.S new file mode 100644 index 0000000000..c231301b2b --- /dev/null +++ b/libswresample/arm/resample.S @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2016 Matthieu Bouron <matthieu.bouron stupeflix.com> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/arm/asm.S" + +function ff_resample_common_apply_filter_x4_float_neon, export=1 + vmov.f32 q0, #0.0 @ accumulator +1: vld1.32 {q1}, [r1]! @ src + vld1.32 {q2}, [r2]! @ filter + vmla.f32 q0, q1, q2 @ accumulator += src + {0..3} * filter + {0..3} + subs r3, #4 @ filter_length -= 4 + bgt 1b @ loop until filter_length + vpadd.f32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values + vpadd.f32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values + vst1.32 {d0[0]}, [r0] @ write accumulator + mov pc, lr +endfunc + +function ff_resample_common_apply_filter_x8_float_neon, export=1 + vmov.f32 q0, #0.0 @ accumulator +1: vld1.32 {q1}, [r1]! @ src + vld1.32 {q2}, [r2]! @ filter + vld1.32 {q8}, [r1]! @ src + vld1.32 {q9}, [r2]! @ filter + vmla.f32 q0, q1, q2 @ accumulator += src + {0..3} * filter + {0..3} + vmla.f32 q0, q8, q9 @ accumulator += src + {4..7} * filter + {4..7} + subs r3, #8 @ filter_length -= 8 + bgt 1b @ loop until filter_length + vpadd.f32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values + vpadd.f32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values + vst1.32 {d0[0]}, [r0] @ write accumulator + mov pc, lr +endfunc + +function ff_resample_common_apply_filter_x4_s16_neon, export=1 + vmov.s32 q0, #0 @ accumulator +1: vld1.16 {d2}, [r1]! @ src + vld1.16 {d4}, [r2]! @ filter + vmlal.s16 q0, d2, d4 @ accumulator += src + {0..3} * filter + {0..3} + subs r3, #4 @ filter_length -= 4 + bgt 1b @ loop until filter_length + vpadd.s32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values + vpadd.s32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values + vst1.32 {d0[0]}, [r0] @ write accumulator + mov pc, lr +endfunc + +function ff_resample_common_apply_filter_x8_s16_neon, export=1 + vmov.s32 q0, #0 @ accumulator +1: vld1.16 {q1}, [r1]! @ src + vld1.16 {q2}, [r2]! @ filter + vmlal.s16 q0, d2, d4 @ accumulator += src + {0..3} * filter + {0..3} + vmlal.s16 q0, d3, d5 @ accumulator += src + {4..7} * filter + {4..7} + subs r3, #8 @ filter_length -= 8 + bgt 1b @ loop until filter_length + vpadd.s32 d0, d0, d1 @ pair adding of the 4x32-bit accumulated values + vpadd.s32 d0, d0, d0 @ pair adding of the 4x32-bit accumulator values + vst1.32 {d0[0]}, [r0] @ write accumulator + mov pc, lr +endfunc diff --git a/libswresample/arm/resample_init.c b/libswresample/arm/resample_init.c new file mode 100644 index 0000000000..003fafd29b --- /dev/null +++ b/libswresample/arm/resample_init.c @@ -0,0 +1,122 @@ +/* + * Audio resampling + * + * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/cpu.h" +#include "libavutil/avassert.h" + +#include "libavutil/arm/cpu.h" +#include "libswresample/resample.h" + +#define DECLARE_RESAMPLE_COMMON_TEMPLATE(TYPE, DELEM, FELEM, FELEM2, OUT) \ + \ +void ff_resample_common_apply_filter_x4_##TYPE##_neon(FELEM2 *acc, const DELEM *src, \ + const FELEM *filter, int length); \ + \ +void ff_resample_common_apply_filter_x8_##TYPE##_neon(FELEM2 *acc, const DELEM *src, \ + const FELEM *filter, int length); \ + \ +static int ff_resample_common_##TYPE##_neon(ResampleContext *c, void *dest, const void *source, \ + int n, int update_ctx) \ +{ \ + DELEM *dst = dest; \ + const DELEM *src = source; \ + int dst_index; \ + int index= c->index; \ + int frac= c->frac; \ + int sample_index = 0; \ + int x4_aligned_filter_length = c->filter_length & ~3; \ + int x8_aligned_filter_length = c->filter_length & ~7; \ + \ + while (index >= c->phase_count) { \ + sample_index++; \ + index -= c->phase_count; \ + } \ + \ + for (dst_index = 0; dst_index < n; dst_index++) { \ + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; \ + \ + FELEM2 val=0; \ + int i = 0; \ + if (x8_aligned_filter_length >= 8) { \ + ff_resample_common_apply_filter_x8_##TYPE##_neon(&val, &src[sample_index], \ + filter, x8_aligned_filter_length); \ + i += x8_aligned_filter_length; \ + \ + } else if (x4_aligned_filter_length >= 4) { \ + ff_resample_common_apply_filter_x4_##TYPE##_neon(&val, &src[sample_index], \ + filter, x4_aligned_filter_length); \ + i += x4_aligned_filter_length; \ + } \ + for (; i < c->filter_length; i++) { \ + val += src[sample_index + i] * (FELEM2)filter[i]; \ + } \ + OUT(dst[dst_index], val); \ + \ + frac += c->dst_incr_mod; \ + index += c->dst_incr_div; \ + if (frac >= c->src_incr) { \ + frac -= c->src_incr; \ + index++; \ + } \ + \ + while (index >= c->phase_count) { \ + sample_index++; \ + index -= c->phase_count; \ + } \ + } \ + \ + if(update_ctx){ \ + c->frac= frac; \ + c->index= index; \ + } \ + \ + return sample_index; \ +} \ + +#define OUT(d, v) d = v +DECLARE_RESAMPLE_COMMON_TEMPLATE(float, float, float, float, OUT) +#undef OUT + +#define OUT(d, v) (v) = ((v) + (1<<(14)))>>15; (d) = av_clip_int16(v) +DECLARE_RESAMPLE_COMMON_TEMPLATE(s16, int16_t, int16_t, int32_t, OUT) +#undef OUT + +av_cold void swri_resample_dsp_arm_init(ResampleContext *c) +{ + int cpu_flags = av_get_cpu_flags(); + + if (!have_neon(cpu_flags)) + return; + + switch(c->format) { + case AV_SAMPLE_FMT_FLTP: + if (!c->linear) + c->dsp.resample = ff_resample_common_float_neon; + break; + case AV_SAMPLE_FMT_S16P: + if (!c->linear) + c->dsp.resample = ff_resample_common_s16_neon; + break; + } +} diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c new file mode 100644 index 0000000000..58b0bf33e9 --- /dev/null +++ b/libswresample/audioconvert.c @@ -0,0 +1,225 @@ +/* + * audio conversion + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio conversion + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "libavutil/avstring.h" +#include "libavutil/avassert.h" +#include "libavutil/libm.h" +#include "libavutil/samplefmt.h" +#include "audioconvert.h" + + +#define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt + +//FIXME rounding ? +#define CONV_FUNC(ofmt, otype, ifmt, expr)\ +static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end)\ +{\ + uint8_t *end2 = end - 3*os;\ + while(po < end2){\ + *(otype*)po = expr; pi += is; po += os;\ + *(otype*)po = expr; pi += is; po += os;\ + *(otype*)po = expr; pi += is; po += os;\ + *(otype*)po = expr; pi += is; po += os;\ + }\ + while(po < end){\ + *(otype*)po = expr; pi += is; po += os;\ + }\ +} + +//FIXME put things below under ifdefs so we do not waste space for cases no codec will need +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 , *(const uint8_t*)pi) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<8) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<24) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0f/ (1<<7))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16, *(const int16_t*)pi) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16, *(const int16_t*)pi<<16) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S16, *(const int16_t*)pi*(1.0f/ (1<<15))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16, *(const int16_t*)pi*(1.0 / (1<<15))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32, *(const int32_t*)pi>>16) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32, *(const int32_t*)pi) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_S32, *(const int32_t*)pi*(1.0f/ (1U<<31))) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32, *(const int32_t*)pi*(1.0 / (1U<<31))) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8( lrintf(*(const float*)pi * (1<<7)) + 0x80)) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16( lrintf(*(const float*)pi * (1<<15)))) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31)))) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_FLT, *(const float*)pi) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi) +CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8( lrint(*(const double*)pi * (1<<7)) + 0x80)) +CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16( lrint(*(const double*)pi * (1<<15)))) +CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31)))) +CONV_FUNC(AV_SAMPLE_FMT_FLT, float , AV_SAMPLE_FMT_DBL, *(const double*)pi) +CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi) + +#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = CONV_FUNC_NAME(out, in) + +static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = { + FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_U8 ), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8 ), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8 ), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8 ), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8 ), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_S16), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_S32), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_FLT), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_DBL), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), + FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), +}; + +static void cpy1(uint8_t **dst, const uint8_t **src, int len){ + memcpy(*dst, *src, len); +} +static void cpy2(uint8_t **dst, const uint8_t **src, int len){ + memcpy(*dst, *src, 2*len); +} +static void cpy4(uint8_t **dst, const uint8_t **src, int len){ + memcpy(*dst, *src, 4*len); +} +static void cpy8(uint8_t **dst, const uint8_t **src, int len){ + memcpy(*dst, *src, 8*len); +} + +AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels, const int *ch_map, + int flags) +{ + AudioConvert *ctx; + conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)]; + + if (!f) + return NULL; + ctx = av_mallocz(sizeof(*ctx)); + if (!ctx) + return NULL; + + if(channels == 1){ + in_fmt = av_get_planar_sample_fmt( in_fmt); + out_fmt = av_get_planar_sample_fmt(out_fmt); + } + + ctx->channels = channels; + ctx->conv_f = f; + ctx->ch_map = ch_map; + if (in_fmt == AV_SAMPLE_FMT_U8 || in_fmt == AV_SAMPLE_FMT_U8P) + memset(ctx->silence, 0x80, sizeof(ctx->silence)); + + if(out_fmt == in_fmt && !ch_map) { + switch(av_get_bytes_per_sample(in_fmt)){ + case 1:ctx->simd_f = cpy1; break; + case 2:ctx->simd_f = cpy2; break; + case 4:ctx->simd_f = cpy4; break; + case 8:ctx->simd_f = cpy8; break; + } + } + + if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels); + if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels); + if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels); + + return ctx; +} + +void swri_audio_convert_free(AudioConvert **ctx) +{ + av_freep(ctx); +} + +int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len) +{ + int ch; + int off=0; + const int os= (out->planar ? 1 :out->ch_count) *out->bps; + unsigned misaligned = 0; + + av_assert0(ctx->channels == out->ch_count); + + if (ctx->in_simd_align_mask) { + int planes = in->planar ? in->ch_count : 1; + unsigned m = 0; + for (ch = 0; ch < planes; ch++) + m |= (intptr_t)in->ch[ch]; + misaligned |= m & ctx->in_simd_align_mask; + } + if (ctx->out_simd_align_mask) { + int planes = out->planar ? out->ch_count : 1; + unsigned m = 0; + for (ch = 0; ch < planes; ch++) + m |= (intptr_t)out->ch[ch]; + misaligned |= m & ctx->out_simd_align_mask; + } + + //FIXME optimize common cases + + if(ctx->simd_f && !ctx->ch_map && !misaligned){ + off = len&~15; + av_assert1(off>=0); + av_assert1(off<=len); + av_assert2(ctx->channels == SWR_CH_MAX || !in->ch[ctx->channels]); + if(off>0){ + if(out->planar == in->planar){ + int planes = out->planar ? out->ch_count : 1; + for(ch=0; ch<planes; ch++){ + ctx->simd_f(out->ch+ch, (const uint8_t **)in->ch+ch, off * (out->planar ? 1 :out->ch_count)); + } + }else{ + ctx->simd_f(out->ch, (const uint8_t **)in->ch, off); + } + } + if(off == len) + return 0; + } + + for(ch=0; ch<ctx->channels; ch++){ + const int ich= ctx->ch_map ? ctx->ch_map[ch] : ch; + const int is= ich < 0 ? 0 : (in->planar ? 1 : in->ch_count) * in->bps; + const uint8_t *pi= ich < 0 ? ctx->silence : in->ch[ich]; + uint8_t *po= out->ch[ch]; + uint8_t *end= po + os*len; + if(!po) + continue; + ctx->conv_f(po+off*os, pi+off*is, is, os, end); + } + return 0; +} diff --git a/libswresample/audioconvert.h b/libswresample/audioconvert.h new file mode 100644 index 0000000000..1ca30c2a65 --- /dev/null +++ b/libswresample/audioconvert.h @@ -0,0 +1,78 @@ +/* + * audio conversion + * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> + * Copyright (c) 2008 Peter Ross + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWRESAMPLE_AUDIOCONVERT_H +#define SWRESAMPLE_AUDIOCONVERT_H + +/** + * @file + * Audio format conversion routines + */ + + +#include "swresample_internal.h" +#include "libavutil/cpu.h" + + +typedef void (conv_func_type)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end); +typedef void (simd_func_type)(uint8_t **dst, const uint8_t **src, int len); + +typedef struct AudioConvert { + int channels; + int in_simd_align_mask; + int out_simd_align_mask; + conv_func_type *conv_f; + simd_func_type *simd_f; + const int *ch_map; + uint8_t silence[8]; ///< silence input sample +}AudioConvert; + +/** + * Create an audio sample format converter context + * @param out_fmt Output sample format + * @param in_fmt Input sample format + * @param channels Number of channels + * @param flags See AV_CPU_FLAG_xx + * @param ch_map list of the channels id to pick from the source stream, NULL + * if all channels must be selected + * @return NULL on error + */ +AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels, const int *ch_map, + int flags); + +/** + * Free audio sample format converter context. + * and set the pointer to NULL + */ +void swri_audio_convert_free(AudioConvert **ctx); + +/** + * Convert between audio sample formats + * @param[in] out array of output buffers for each channel. set to NULL to ignore processing of the given channel. + * @param[in] in array of input buffers for each channel + * @param len length of audio frame size (measured in samples) + */ +int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len); + +#endif /* SWRESAMPLE_AUDIOCONVERT_H */ diff --git a/libswresample/dither.c b/libswresample/dither.c new file mode 100644 index 0000000000..08c793d4cf --- /dev/null +++ b/libswresample/dither.c @@ -0,0 +1,152 @@ +/* + * Copyright (C) 2012-2013 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "swresample_internal.h" + +#include "noise_shaping_data.c" + +int swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat noise_fmt) { + double scale = s->dither.noise_scale; +#define TMP_EXTRA 2 + double *tmp = av_malloc_array(len + TMP_EXTRA, sizeof(double)); + int i; + + if (!tmp) + return AVERROR(ENOMEM); + + for(i=0; i<len + TMP_EXTRA; i++){ + double v; + seed = seed* 1664525 + 1013904223; + + switch(s->dither.method){ + case SWR_DITHER_RECTANGULAR: v= ((double)seed) / UINT_MAX - 0.5; break; + default: + av_assert0(s->dither.method < SWR_DITHER_NB); + v = ((double)seed) / UINT_MAX; + seed = seed*1664525 + 1013904223; + v-= ((double)seed) / UINT_MAX; + break; + } + tmp[i] = v; + } + + for(i=0; i<len; i++){ + double v; + + switch(s->dither.method){ + default: + av_assert0(s->dither.method < SWR_DITHER_NB); + v = tmp[i]; + break; + case SWR_DITHER_TRIANGULAR_HIGHPASS : + v = (- tmp[i] + 2*tmp[i+1] - tmp[i+2]) / sqrt(6); + break; + } + + v*= scale; + + switch(noise_fmt){ + case AV_SAMPLE_FMT_S16P: ((int16_t*)dst)[i] = v; break; + case AV_SAMPLE_FMT_S32P: ((int32_t*)dst)[i] = v; break; + case AV_SAMPLE_FMT_FLTP: ((float *)dst)[i] = v; break; + case AV_SAMPLE_FMT_DBLP: ((double *)dst)[i] = v; break; + default: av_assert0(0); + } + } + + av_free(tmp); + return 0; +} + +av_cold int swri_dither_init(SwrContext *s, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt) +{ + int i; + double scale = 0; + + if (s->dither.method > SWR_DITHER_TRIANGULAR_HIGHPASS && s->dither.method <= SWR_DITHER_NS) + return AVERROR(EINVAL); + + out_fmt = av_get_packed_sample_fmt(out_fmt); + in_fmt = av_get_packed_sample_fmt( in_fmt); + + if(in_fmt == AV_SAMPLE_FMT_FLT || in_fmt == AV_SAMPLE_FMT_DBL){ + if(out_fmt == AV_SAMPLE_FMT_S32) scale = 1.0/(1LL<<31); + if(out_fmt == AV_SAMPLE_FMT_S16) scale = 1.0/(1LL<<15); + if(out_fmt == AV_SAMPLE_FMT_U8 ) scale = 1.0/(1LL<< 7); + } + if(in_fmt == AV_SAMPLE_FMT_S32 && out_fmt == AV_SAMPLE_FMT_S32 && (s->dither.output_sample_bits&31)) scale = 1; + if(in_fmt == AV_SAMPLE_FMT_S32 && out_fmt == AV_SAMPLE_FMT_S16) scale = 1<<16; + if(in_fmt == AV_SAMPLE_FMT_S32 && out_fmt == AV_SAMPLE_FMT_U8 ) scale = 1<<24; + if(in_fmt == AV_SAMPLE_FMT_S16 && out_fmt == AV_SAMPLE_FMT_U8 ) scale = 1<<8; + + scale *= s->dither.scale; + + if (out_fmt == AV_SAMPLE_FMT_S32 && s->dither.output_sample_bits) + scale *= 1<<(32-s->dither.output_sample_bits); + + s->dither.ns_pos = 0; + s->dither.noise_scale= scale; + s->dither.ns_scale = scale; + s->dither.ns_scale_1 = scale ? 1/scale : 0; + memset(s->dither.ns_errors, 0, sizeof(s->dither.ns_errors)); + for (i=0; filters[i].coefs; i++) { + const filter_t *f = &filters[i]; + if (llabs(s->out_sample_rate - f->rate)*20 <= f->rate && f->name == s->dither.method) { + int j; + s->dither.ns_taps = f->len; + for (j=0; j<f->len; j++) + s->dither.ns_coeffs[j] = f->coefs[j]; + s->dither.ns_scale_1 *= 1 - exp(f->gain_cB * M_LN10 * 0.005) * 2 / (1<<(8*av_get_bytes_per_sample(out_fmt))); + break; + } + } + if (!filters[i].coefs && s->dither.method > SWR_DITHER_NS) { + av_log(s, AV_LOG_WARNING, "Requested noise shaping dither not available at this sampling rate, using triangular hp dither\n"); + s->dither.method = SWR_DITHER_TRIANGULAR_HIGHPASS; + } + + av_assert0(!s->preout.count); + s->dither.noise = s->preout; + s->dither.temp = s->preout; + if (s->dither.method > SWR_DITHER_NS) { + s->dither.noise.bps = 4; + s->dither.noise.fmt = AV_SAMPLE_FMT_FLTP; + s->dither.noise_scale = 1; + } + + return 0; +} + +#define TEMPLATE_DITHER_S16 +#include "dither_template.c" +#undef TEMPLATE_DITHER_S16 + +#define TEMPLATE_DITHER_S32 +#include "dither_template.c" +#undef TEMPLATE_DITHER_S32 + +#define TEMPLATE_DITHER_FLT +#include "dither_template.c" +#undef TEMPLATE_DITHER_FLT + +#define TEMPLATE_DITHER_DBL +#include "dither_template.c" +#undef TEMPLATE_DITHER_DBL diff --git a/libswresample/dither_template.c b/libswresample/dither_template.c new file mode 100644 index 0000000000..1f535de3dc --- /dev/null +++ b/libswresample/dither_template.c @@ -0,0 +1,84 @@ +/* + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if defined(TEMPLATE_DITHER_DBL) +# define RENAME(N) N ## _double +# define DELEM double +# define CLIP(v) while(0) + +#elif defined(TEMPLATE_DITHER_FLT) +# define RENAME(N) N ## _float +# define DELEM float +# define CLIP(v) while(0) + +#elif defined(TEMPLATE_DITHER_S32) +# define RENAME(N) N ## _int32 +# define DELEM int32_t +# define CLIP(v) v = FFMAX(FFMIN(v, INT32_MAX), INT32_MIN) + +#elif defined(TEMPLATE_DITHER_S16) +# define RENAME(N) N ## _int16 +# define DELEM int16_t +# define CLIP(v) v = FFMAX(FFMIN(v, INT16_MAX), INT16_MIN) + +#else +ERROR +#endif + +void RENAME(swri_noise_shaping)(SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count){ + int pos = s->dither.ns_pos; + int i, j, ch; + int taps = s->dither.ns_taps; + float S = s->dither.ns_scale; + float S_1 = s->dither.ns_scale_1; + + av_assert2((taps&3) != 2); + av_assert2((taps&3) != 3 || s->dither.ns_coeffs[taps] == 0); + + for (ch=0; ch<srcs->ch_count; ch++) { + const float *noise = ((const float *)noises->ch[ch]) + s->dither.noise_pos; + const DELEM *src = (const DELEM*)srcs->ch[ch]; + DELEM *dst = (DELEM*)dsts->ch[ch]; + float *ns_errors = s->dither.ns_errors[ch]; + const float *ns_coeffs = s->dither.ns_coeffs; + pos = s->dither.ns_pos; + for (i=0; i<count; i++) { + double d1, d = src[i]*S_1; + for(j=0; j<taps-2; j+=4) { + d -= ns_coeffs[j ] * ns_errors[pos + j ] + +ns_coeffs[j + 1] * ns_errors[pos + j + 1] + +ns_coeffs[j + 2] * ns_errors[pos + j + 2] + +ns_coeffs[j + 3] * ns_errors[pos + j + 3]; + } + if(j < taps) + d -= ns_coeffs[j] * ns_errors[pos + j]; + pos = pos ? pos - 1 : taps - 1; + d1 = rint(d + noise[i]); + ns_errors[pos + taps] = ns_errors[pos] = d1 - d; + d1 *= S; + CLIP(d1); + dst[i] = d1; + } + } + + s->dither.ns_pos = pos; +} + +#undef RENAME +#undef DELEM +#undef CLIP diff --git a/libswresample/libswresample.v b/libswresample/libswresample.v new file mode 100644 index 0000000000..3b3508d707 --- /dev/null +++ b/libswresample/libswresample.v @@ -0,0 +1,7 @@ +LIBSWRESAMPLE_MAJOR { + global: + swr_*; + swresample_*; + local: + *; +}; diff --git a/libswresample/log2_tab.c b/libswresample/log2_tab.c new file mode 100644 index 0000000000..47a1df03b7 --- /dev/null +++ b/libswresample/log2_tab.c @@ -0,0 +1 @@ +#include "libavutil/log2_tab.c" diff --git a/libswresample/noise_shaping_data.c b/libswresample/noise_shaping_data.c new file mode 100644 index 0000000000..77e0f2eafc --- /dev/null +++ b/libswresample/noise_shaping_data.c @@ -0,0 +1,224 @@ +/* Effect: dither/noise-shape Copyright (c) 2008-9 robs@users.sourceforge.net + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or (at + * your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser + * General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +typedef struct { + int rate; + enum {fir, iir} type; + size_t len; + int gain_cB; /* Chosen so clips are few if any, but not guaranteed none. */ + double const * coefs; + enum SwrDitherType name; +} filter_t; + +static double const lip44[] = {2.033, -2.165, 1.959, -1.590, .6149}; +static double const fwe44[] = { + 2.412, -3.370, 3.937, -4.174, 3.353, -2.205, 1.281, -.569, .0847}; +static double const mew44[] = { + 1.662, -1.263, .4827, -.2913, .1268, -.1124, .03252, -.01265, -.03524}; +static double const iew44[] = { + 2.847, -4.685, 6.214, -7.184, 6.639, -5.032, 3.263, -1.632, .4191}; +static double const ges44[] = { + 2.2061, -.4706, -.2534, -.6214, 1.0587, .0676, -.6054, -.2738}; +static double const ges48[] = { + 2.2374, -.7339, -.1251, -.6033, .903, .0116, -.5853, -.2571}; + +static double const shi48[] = { + 2.8720729351043701172, -5.0413231849670410156, 6.2442994117736816406, + -5.8483986854553222656, 3.7067542076110839844, -1.0495119094848632812, + -1.1830236911773681641, 2.1126792430877685547, -1.9094531536102294922, + 0.99913084506988525391, -0.17090806365013122559, -0.32615602016448974609, + 0.39127644896507263184, -0.26876461505889892578, 0.097676105797290802002, + -0.023473845794796943665, +}; +static double const shi44[] = { + 2.6773197650909423828, -4.8308925628662109375, 6.570110321044921875, + -7.4572014808654785156, 6.7263274192810058594, -4.8481650352478027344, + 2.0412089824676513672, 0.7006359100341796875, -2.9537565708160400391, + 4.0800385475158691406, -4.1845216751098632812, 3.3311812877655029297, + -2.1179926395416259766, 0.879302978515625, -0.031759146600961685181, + -0.42382788658142089844, 0.47882103919982910156, -0.35490813851356506348, + 0.17496839165687561035, -0.060908168554306030273, +}; +static double const shi38[] = { + 1.6335992813110351562, -2.2615492343902587891, 2.4077029228210449219, + -2.6341717243194580078, 2.1440362930297851562, -1.8153258562088012695, + 1.0816224813461303711, -0.70302653312683105469, 0.15991993248462677002, + 0.041549518704414367676, -0.29416576027870178223, 0.2518316805362701416, + -0.27766478061676025391, 0.15785403549671173096, -0.10165894031524658203, + 0.016833892092108726501, +}; +static double const shi32[] = +{ /* dmaker 32000: bestmax=4.99659 (inverted) */ +0.82118552923202515, +-1.0063692331314087, +0.62341964244842529, +-1.0447187423706055, +0.64532512426376343, +-0.87615132331848145, +0.52219754457473755, +-0.67434263229370117, +0.44954317808151245, +-0.52557498216629028, +0.34567299485206604, +-0.39618203043937683, +0.26791760325431824, +-0.28936097025871277, +0.1883765310049057, +-0.19097308814525604, +0.10431359708309174, +-0.10633844882249832, +0.046832218766212463, +-0.039653312414884567, +}; +static double const shi22[] = +{ /* dmaker 22050: bestmax=5.77762 (inverted) */ +0.056581053882837296, +-0.56956905126571655, +-0.40727734565734863, +-0.33870288729667664, +-0.29810553789138794, +-0.19039161503314972, +-0.16510021686553955, +-0.13468159735202789, +-0.096633769571781158, +-0.081049129366874695, +-0.064953058958053589, +-0.054459091275930405, +-0.043378707021474838, +-0.03660014271736145, +-0.026256965473294258, +-0.018786206841468811, +-0.013387725688517094, +-0.0090983230620622635, +-0.0026585909072309732, +-0.00042083300650119781, +}; +static double const shi16[] = +{ /* dmaker 16000: bestmax=5.97128 (inverted) */ +-0.37251132726669312, +-0.81423574686050415, +-0.55010956525802612, +-0.47405767440795898, +-0.32624706625938416, +-0.3161766529083252, +-0.2286367267370224, +-0.22916607558727264, +-0.19565616548061371, +-0.18160104751586914, +-0.15423151850700378, +-0.14104481041431427, +-0.11844276636838913, +-0.097583092749118805, +-0.076493598520755768, +-0.068106919527053833, +-0.041881654411554337, +-0.036922425031661987, +-0.019364040344953537, +-0.014994367957115173, +}; +static double const shi11[] = +{ /* dmaker 11025: bestmax=5.9406 (inverted) */ +-0.9264228343963623, +-0.98695987462997437, +-0.631156325340271, +-0.51966935396194458, +-0.39738872647285461, +-0.35679301619529724, +-0.29720726609230042, +-0.26310476660728455, +-0.21719355881214142, +-0.18561814725399017, +-0.15404847264289856, +-0.12687471508979797, +-0.10339745879173279, +-0.083688631653785706, +-0.05875682458281517, +-0.046893671154975891, +-0.027950936928391457, +-0.020740609616041183, +-0.009366452693939209, +-0.0060260160826146603, +}; +static double const shi08[] = +{ /* dmaker 8000: bestmax=5.56234 (inverted) */ +-1.202863335609436, +-0.94103097915649414, +-0.67878556251525879, +-0.57650017738342285, +-0.50004476308822632, +-0.44349345564842224, +-0.37833768129348755, +-0.34028723835945129, +-0.29413089156150818, +-0.24994957447052002, +-0.21715600788593292, +-0.18792112171649933, +-0.15268312394618988, +-0.12135542929172516, +-0.099610626697540283, +-0.075273610651493073, +-0.048787496984004974, +-0.042586319148540497, +-0.028991291299462318, +-0.011869125068187714, +}; +static double const shl48[] = { + 2.3925774097442626953, -3.4350297451019287109, 3.1853709220886230469, + -1.8117271661758422852, -0.20124770700931549072, 1.4759907722473144531, + -1.7210904359817504883, 0.97746700048446655273, -0.13790138065814971924, + -0.38185903429985046387, 0.27421241998672485352, 0.066584214568138122559, + -0.35223302245140075684, 0.37672343850135803223, -0.23964276909828186035, + 0.068674825131893157959, +}; +static double const shl44[] = { + 2.0833916664123535156, -3.0418450832366943359, 3.2047898769378662109, + -2.7571926116943359375, 1.4978630542755126953, -0.3427594602108001709, + -0.71733748912811279297, 1.0737057924270629883, -1.0225815773010253906, + 0.56649994850158691406, -0.20968692004680633545, -0.065378531813621520996, + 0.10322438180446624756, -0.067442022264003753662, -0.00495197344571352005, + 0, +}; +static double const shh44[] = { + 3.0259189605712890625, -6.0268716812133789062, 9.195003509521484375, + -11.824929237365722656, 12.767142295837402344, -11.917946815490722656, + 9.1739168167114257812, -5.3712320327758789062, 1.1393624544143676758, + 2.4484779834747314453, -4.9719839096069335938, 6.0392003059387207031, + -5.9359521865844726562, 4.903278350830078125, -3.5527443885803222656, + 2.1909697055816650391, -1.1672389507293701172, 0.4903914332389831543, + -0.16519790887832641602, 0.023217858746647834778, +}; + +static const filter_t filters[] = { + {44100, fir, 5, 210, lip44, SWR_DITHER_NS_LIPSHITZ}, + {46000, fir, 9, 276, fwe44, SWR_DITHER_NS_F_WEIGHTED}, + {46000, fir, 9, 160, mew44, SWR_DITHER_NS_MODIFIED_E_WEIGHTED}, + {46000, fir, 9, 321, iew44, SWR_DITHER_NS_IMPROVED_E_WEIGHTED}, +// {48000, iir, 4, 220, ges48, SWR_DITHER_NS_GESEMANN}, +// {44100, iir, 4, 230, ges44, SWR_DITHER_NS_GESEMANN}, + {48000, fir, 16, 301, shi48, SWR_DITHER_NS_SHIBATA}, + {44100, fir, 20, 333, shi44, SWR_DITHER_NS_SHIBATA}, + {37800, fir, 16, 240, shi38, SWR_DITHER_NS_SHIBATA}, + {32000, fir, 20, 240/*TBD*/, shi32, SWR_DITHER_NS_SHIBATA}, + {22050, fir, 20, 240/*TBD*/, shi22, SWR_DITHER_NS_SHIBATA}, + {16000, fir, 20, 240/*TBD*/, shi16, SWR_DITHER_NS_SHIBATA}, + {11025, fir, 20, 240/*TBD*/, shi11, SWR_DITHER_NS_SHIBATA}, + { 8000, fir, 20, 240/*TBD*/, shi08, SWR_DITHER_NS_SHIBATA}, + {48000, fir, 16, 250, shl48, SWR_DITHER_NS_LOW_SHIBATA}, + {44100, fir, 15, 250, shl44, SWR_DITHER_NS_LOW_SHIBATA}, + {44100, fir, 20, 383, shh44, SWR_DITHER_NS_HIGH_SHIBATA}, + { 0, fir, 0, 0, NULL, SWR_DITHER_NONE}, +}; diff --git a/libswresample/options.c b/libswresample/options.c new file mode 100644 index 0000000000..816ce47750 --- /dev/null +++ b/libswresample/options.c @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "swresample_internal.h" + +#include <float.h> + +#define C30DB M_SQRT2 +#define C15DB 1.189207115 +#define C__0DB 1.0 +#define C_15DB 0.840896415 +#define C_30DB M_SQRT1_2 +#define C_45DB 0.594603558 +#define C_60DB 0.5 + +#define OFFSET(x) offsetof(SwrContext,x) +#define PARAM AV_OPT_FLAG_AUDIO_PARAM + +static const AVOption options[]={ +{"ich" , "set input channel count" , OFFSET(user_in_ch_count ), AV_OPT_TYPE_INT, {.i64=0 }, 0 , SWR_CH_MAX, PARAM}, +{"in_channel_count" , "set input channel count" , OFFSET(user_in_ch_count ), AV_OPT_TYPE_INT, {.i64=0 }, 0 , SWR_CH_MAX, PARAM}, +{"och" , "set output channel count" , OFFSET(user_out_ch_count ), AV_OPT_TYPE_INT, {.i64=0 }, 0 , SWR_CH_MAX, PARAM}, +{"out_channel_count" , "set output channel count" , OFFSET(user_out_ch_count ), AV_OPT_TYPE_INT, {.i64=0 }, 0 , SWR_CH_MAX, PARAM}, +{"uch" , "set used channel count" , OFFSET(user_used_ch_count), AV_OPT_TYPE_INT, {.i64=0 }, 0 , SWR_CH_MAX, PARAM}, +{"used_channel_count" , "set used channel count" , OFFSET(user_used_ch_count), AV_OPT_TYPE_INT, {.i64=0 }, 0 , SWR_CH_MAX, PARAM}, +{"isr" , "set input sample rate" , OFFSET( in_sample_rate), AV_OPT_TYPE_INT , {.i64=0 }, 0 , INT_MAX , PARAM}, +{"in_sample_rate" , "set input sample rate" , OFFSET( in_sample_rate), AV_OPT_TYPE_INT , {.i64=0 }, 0 , INT_MAX , PARAM}, +{"osr" , "set output sample rate" , OFFSET(out_sample_rate), AV_OPT_TYPE_INT , {.i64=0 }, 0 , INT_MAX , PARAM}, +{"out_sample_rate" , "set output sample rate" , OFFSET(out_sample_rate), AV_OPT_TYPE_INT , {.i64=0 }, 0 , INT_MAX , PARAM}, +{"isf" , "set input sample format" , OFFSET( in_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1 , INT_MAX, PARAM}, +{"in_sample_fmt" , "set input sample format" , OFFSET( in_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1 , INT_MAX, PARAM}, +{"osf" , "set output sample format" , OFFSET(out_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1 , INT_MAX, PARAM}, +{"out_sample_fmt" , "set output sample format" , OFFSET(out_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1 , INT_MAX, PARAM}, +{"tsf" , "set internal sample format" , OFFSET(user_int_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1 , INT_MAX, PARAM}, +{"internal_sample_fmt" , "set internal sample format" , OFFSET(user_int_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1 , INT_MAX, PARAM}, +{"icl" , "set input channel layout" , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0 }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"}, +{"in_channel_layout" , "set input channel layout" , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0 }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"}, +{"ocl" , "set output channel layout" , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0 }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"}, +{"out_channel_layout" , "set output channel layout" , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0 }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"}, +{"clev" , "set center mix level" , OFFSET(clev ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB }, -32 , 32 , PARAM}, +{"center_mix_level" , "set center mix level" , OFFSET(clev ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB }, -32 , 32 , PARAM}, +{"slev" , "set surround mix level" , OFFSET(slev ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB }, -32 , 32 , PARAM}, +{"surround_mix_level" , "set surround mix Level" , OFFSET(slev ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB }, -32 , 32 , PARAM}, +{"lfe_mix_level" , "set LFE mix level" , OFFSET(lfe_mix_level ), AV_OPT_TYPE_FLOAT, {.dbl=0 }, -32 , 32 , PARAM}, +{"rmvol" , "set rematrix volume" , OFFSET(rematrix_volume), AV_OPT_TYPE_FLOAT, {.dbl=1.0 }, -1000 , 1000 , PARAM}, +{"rematrix_volume" , "set rematrix volume" , OFFSET(rematrix_volume), AV_OPT_TYPE_FLOAT, {.dbl=1.0 }, -1000 , 1000 , PARAM}, +{"rematrix_maxval" , "set rematrix maxval" , OFFSET(rematrix_maxval), AV_OPT_TYPE_FLOAT, {.dbl=0.0 }, 0 , 1000 , PARAM}, + +{"flags" , "set flags" , OFFSET(flags ), AV_OPT_TYPE_FLAGS, {.i64=0 }, 0 , UINT_MAX , PARAM, "flags"}, +{"swr_flags" , "set flags" , OFFSET(flags ), AV_OPT_TYPE_FLAGS, {.i64=0 }, 0 , UINT_MAX , PARAM, "flags"}, +{"res" , "force resampling" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_RESAMPLE }, INT_MIN, INT_MAX , PARAM, "flags"}, + +{"dither_scale" , "set dither scale" , OFFSET(dither.scale ), AV_OPT_TYPE_FLOAT, {.dbl=1 }, 0 , INT_MAX , PARAM}, + +{"dither_method" , "set dither method" , OFFSET(dither.method ), AV_OPT_TYPE_INT , {.i64=0 }, 0 , SWR_DITHER_NB-1, PARAM, "dither_method"}, +{"rectangular" , "select rectangular dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_RECTANGULAR}, INT_MIN, INT_MAX , PARAM, "dither_method"}, +{"triangular" , "select triangular dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR }, INT_MIN, INT_MAX , PARAM, "dither_method"}, +{"triangular_hp" , "select triangular dither with high pass" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR_HIGHPASS }, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"lipshitz" , "select Lipshitz noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LIPSHITZ}, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"shibata" , "select Shibata noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"low_shibata" , "select low Shibata noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LOW_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"high_shibata" , "select high Shibata noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_HIGH_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"f_weighted" , "select f-weighted noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_F_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"modified_e_weighted" , "select modified-e-weighted noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_MODIFIED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"}, +{"improved_e_weighted" , "select improved-e-weighted noise shaping dither" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_IMPROVED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"}, + +{"filter_size" , "set swr resampling filter size", OFFSET(filter_size) , AV_OPT_TYPE_INT , {.i64=32 }, 0 , INT_MAX , PARAM }, +{"phase_shift" , "set swr resampling phase shift", OFFSET(phase_shift) , AV_OPT_TYPE_INT , {.i64=10 }, 0 , 24 , PARAM }, +{"linear_interp" , "enable linear interpolation" , OFFSET(linear_interp) , AV_OPT_TYPE_BOOL , {.i64=0 }, 0 , 1 , PARAM }, +{"exact_rational" , "enable exact rational" , OFFSET(exact_rational) , AV_OPT_TYPE_BOOL , {.i64=0 }, 0 , 1 , PARAM }, +{"cutoff" , "set cutoff frequency ratio" , OFFSET(cutoff) , AV_OPT_TYPE_DOUBLE,{.dbl=0. }, 0 , 1 , PARAM }, + +/* duplicate option in order to work with avconv */ +{"resample_cutoff" , "set cutoff frequency ratio" , OFFSET(cutoff) , AV_OPT_TYPE_DOUBLE,{.dbl=0. }, 0 , 1 , PARAM }, + +{"resampler" , "set resampling Engine" , OFFSET(engine) , AV_OPT_TYPE_INT , {.i64=0 }, 0 , SWR_ENGINE_NB-1, PARAM, "resampler"}, +{"swr" , "select SW Resampler" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_ENGINE_SWR }, INT_MIN, INT_MAX , PARAM, "resampler"}, +{"soxr" , "select SoX Resampler" , 0 , AV_OPT_TYPE_CONST, {.i64=SWR_ENGINE_SOXR }, INT_MIN, INT_MAX , PARAM, "resampler"}, +{"precision" , "set soxr resampling precision (in bits)" + , OFFSET(precision) , AV_OPT_TYPE_DOUBLE,{.dbl=20.0 }, 15.0 , 33.0 , PARAM }, +{"cheby" , "enable soxr Chebyshev passband & higher-precision irrational ratio approximation" + , OFFSET(cheby) , AV_OPT_TYPE_BOOL , {.i64=0 }, 0 , 1 , PARAM }, +{"min_comp" , "set minimum difference between timestamps and audio data (in seconds) below which no timestamp compensation of either kind is applied" + , OFFSET(min_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=FLT_MAX }, 0 , FLT_MAX , PARAM }, +{"min_hard_comp" , "set minimum difference between timestamps and audio data (in seconds) to trigger padding/trimming the data." + , OFFSET(min_hard_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=0.1 }, 0 , INT_MAX , PARAM }, +{"comp_duration" , "set duration (in seconds) over which data is stretched/squeezed to make it match the timestamps." + , OFFSET(soft_compensation_duration),AV_OPT_TYPE_FLOAT ,{.dbl=1 }, 0 , INT_MAX , PARAM }, +{"max_soft_comp" , "set maximum factor by which data is stretched/squeezed to make it match the timestamps." + , OFFSET(max_soft_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=0 }, INT_MIN, INT_MAX , PARAM }, +{"async" , "simplified 1 parameter audio timestamp matching, 0(disabled), 1(filling and trimming), >1(maximum stretch/squeeze in samples per second)" + , OFFSET(async) , AV_OPT_TYPE_FLOAT ,{.dbl=0 }, INT_MIN, INT_MAX , PARAM }, +{"first_pts" , "Assume the first pts should be this value (in samples)." + , OFFSET(firstpts_in_samples), AV_OPT_TYPE_INT64 ,{.i64=AV_NOPTS_VALUE }, INT64_MIN,INT64_MAX, PARAM }, + +{ "matrix_encoding" , "set matrixed stereo encoding" , OFFSET(matrix_encoding), AV_OPT_TYPE_INT ,{.i64 = AV_MATRIX_ENCODING_NONE}, AV_MATRIX_ENCODING_NONE, AV_MATRIX_ENCODING_NB-1, PARAM, "matrix_encoding" }, + { "none", "select none", 0, AV_OPT_TYPE_CONST, { .i64 = AV_MATRIX_ENCODING_NONE }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" }, + { "dolby", "select Dolby", 0, AV_OPT_TYPE_CONST, { .i64 = AV_MATRIX_ENCODING_DOLBY }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" }, + { "dplii", "select Dolby Pro Logic II", 0, AV_OPT_TYPE_CONST, { .i64 = AV_MATRIX_ENCODING_DPLII }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" }, + +{ "filter_type" , "select swr filter type" , OFFSET(filter_type) , AV_OPT_TYPE_INT , { .i64 = SWR_FILTER_TYPE_KAISER }, SWR_FILTER_TYPE_CUBIC, SWR_FILTER_TYPE_KAISER, PARAM, "filter_type" }, + { "cubic" , "select cubic" , 0 , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_CUBIC }, INT_MIN, INT_MAX, PARAM, "filter_type" }, + { "blackman_nuttall", "select Blackman Nuttall windowed sinc", 0 , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_BLACKMAN_NUTTALL }, INT_MIN, INT_MAX, PARAM, "filter_type" }, + { "kaiser" , "select Kaiser windowed sinc" , 0 , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_KAISER }, INT_MIN, INT_MAX, PARAM, "filter_type" }, + +{ "kaiser_beta" , "set swr Kaiser window beta" , OFFSET(kaiser_beta) , AV_OPT_TYPE_DOUBLE , {.dbl=9 }, 2 , 16 , PARAM }, + +{ "output_sample_bits" , "set swr number of output sample bits", OFFSET(dither.output_sample_bits), AV_OPT_TYPE_INT , {.i64=0 }, 0 , 64 , PARAM }, +{0} +}; + +static const char* context_to_name(void* ptr) { + return "SWR"; +} + +static const AVClass av_class = { + .class_name = "SWResampler", + .item_name = context_to_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, + .log_level_offset_offset = OFFSET(log_level_offset), + .parent_log_context_offset = OFFSET(log_ctx), + .category = AV_CLASS_CATEGORY_SWRESAMPLER, +}; + +const AVClass *swr_get_class(void) +{ + return &av_class; +} + +av_cold struct SwrContext *swr_alloc(void){ + SwrContext *s= av_mallocz(sizeof(SwrContext)); + if(s){ + s->av_class= &av_class; + av_opt_set_defaults(s); + } + return s; +} diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c new file mode 100644 index 0000000000..ddba0433e8 --- /dev/null +++ b/libswresample/rematrix.c @@ -0,0 +1,542 @@ +/* + * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "swresample_internal.h" +#include "libavutil/avassert.h" +#include "libavutil/channel_layout.h" + +#define TEMPLATE_REMATRIX_FLT +#include "rematrix_template.c" +#undef TEMPLATE_REMATRIX_FLT + +#define TEMPLATE_REMATRIX_DBL +#include "rematrix_template.c" +#undef TEMPLATE_REMATRIX_DBL + +#define TEMPLATE_REMATRIX_S16 +#include "rematrix_template.c" +#define TEMPLATE_CLIP +#include "rematrix_template.c" +#undef TEMPLATE_CLIP +#undef TEMPLATE_REMATRIX_S16 + +#define TEMPLATE_REMATRIX_S32 +#include "rematrix_template.c" +#undef TEMPLATE_REMATRIX_S32 + +#define FRONT_LEFT 0 +#define FRONT_RIGHT 1 +#define FRONT_CENTER 2 +#define LOW_FREQUENCY 3 +#define BACK_LEFT 4 +#define BACK_RIGHT 5 +#define FRONT_LEFT_OF_CENTER 6 +#define FRONT_RIGHT_OF_CENTER 7 +#define BACK_CENTER 8 +#define SIDE_LEFT 9 +#define SIDE_RIGHT 10 +#define TOP_CENTER 11 +#define TOP_FRONT_LEFT 12 +#define TOP_FRONT_CENTER 13 +#define TOP_FRONT_RIGHT 14 +#define TOP_BACK_LEFT 15 +#define TOP_BACK_CENTER 16 +#define TOP_BACK_RIGHT 17 +#define NUM_NAMED_CHANNELS 18 + +int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride) +{ + int nb_in, nb_out, in, out; + + if (!s || s->in_convert) // s needs to be allocated but not initialized + return AVERROR(EINVAL); + memset(s->matrix, 0, sizeof(s->matrix)); + nb_in = av_get_channel_layout_nb_channels(s->user_in_ch_layout); + nb_out = av_get_channel_layout_nb_channels(s->user_out_ch_layout); + for (out = 0; out < nb_out; out++) { + for (in = 0; in < nb_in; in++) + s->matrix[out][in] = matrix[in]; + matrix += stride; + } + s->rematrix_custom = 1; + return 0; +} + +static int even(int64_t layout){ + if(!layout) return 1; + if(layout&(layout-1)) return 1; + return 0; +} + +static int clean_layout(SwrContext *s, int64_t layout){ + if(layout && layout != AV_CH_FRONT_CENTER && !(layout&(layout-1))) { + char buf[128]; + av_get_channel_layout_string(buf, sizeof(buf), -1, layout); + av_log(s, AV_LOG_VERBOSE, "Treating %s as mono\n", buf); + return AV_CH_FRONT_CENTER; + } + + return layout; +} + +static int sane_layout(int64_t layout){ + if(!(layout & AV_CH_LAYOUT_SURROUND)) // at least 1 front speaker + return 0; + if(!even(layout & (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT))) // no asymetric front + return 0; + if(!even(layout & (AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT))) // no asymetric side + return 0; + if(!even(layout & (AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT))) + return 0; + if(!even(layout & (AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER))) + return 0; + if(av_get_channel_layout_nb_channels(layout) >= SWR_CH_MAX) + return 0; + + return 1; +} + +av_cold static int auto_matrix(SwrContext *s) +{ + int i, j, out_i; + double matrix[NUM_NAMED_CHANNELS][NUM_NAMED_CHANNELS]={{0}}; + int64_t unaccounted, in_ch_layout, out_ch_layout; + double maxcoef=0; + char buf[128]; + const int matrix_encoding = s->matrix_encoding; + float maxval; + + in_ch_layout = clean_layout(s, s->in_ch_layout); + out_ch_layout = clean_layout(s, s->out_ch_layout); + + if( out_ch_layout == AV_CH_LAYOUT_STEREO_DOWNMIX + && (in_ch_layout & AV_CH_LAYOUT_STEREO_DOWNMIX) == 0 + ) + out_ch_layout = AV_CH_LAYOUT_STEREO; + + if( in_ch_layout == AV_CH_LAYOUT_STEREO_DOWNMIX + && (out_ch_layout & AV_CH_LAYOUT_STEREO_DOWNMIX) == 0 + ) + in_ch_layout = AV_CH_LAYOUT_STEREO; + + if(!sane_layout(in_ch_layout)){ + av_get_channel_layout_string(buf, sizeof(buf), -1, s->in_ch_layout); + av_log(s, AV_LOG_ERROR, "Input channel layout '%s' is not supported\n", buf); + return AVERROR(EINVAL); + } + + if(!sane_layout(out_ch_layout)){ + av_get_channel_layout_string(buf, sizeof(buf), -1, s->out_ch_layout); + av_log(s, AV_LOG_ERROR, "Output channel layout '%s' is not supported\n", buf); + return AVERROR(EINVAL); + } + + memset(s->matrix, 0, sizeof(s->matrix)); + for(i=0; i<FF_ARRAY_ELEMS(matrix); i++){ + if(in_ch_layout & out_ch_layout & (1ULL<<i)) + matrix[i][i]= 1.0; + } + + unaccounted= in_ch_layout & ~out_ch_layout; + +//FIXME implement dolby surround +//FIXME implement full ac3 + + + if(unaccounted & AV_CH_FRONT_CENTER){ + if((out_ch_layout & AV_CH_LAYOUT_STEREO) == AV_CH_LAYOUT_STEREO){ + if(in_ch_layout & AV_CH_LAYOUT_STEREO) { + matrix[ FRONT_LEFT][FRONT_CENTER]+= s->clev; + matrix[FRONT_RIGHT][FRONT_CENTER]+= s->clev; + } else { + matrix[ FRONT_LEFT][FRONT_CENTER]+= M_SQRT1_2; + matrix[FRONT_RIGHT][FRONT_CENTER]+= M_SQRT1_2; + } + }else + av_assert0(0); + } + if(unaccounted & AV_CH_LAYOUT_STEREO){ + if(out_ch_layout & AV_CH_FRONT_CENTER){ + matrix[FRONT_CENTER][ FRONT_LEFT]+= M_SQRT1_2; + matrix[FRONT_CENTER][FRONT_RIGHT]+= M_SQRT1_2; + if(in_ch_layout & AV_CH_FRONT_CENTER) + matrix[FRONT_CENTER][ FRONT_CENTER] = s->clev*sqrt(2); + }else + av_assert0(0); + } + + if(unaccounted & AV_CH_BACK_CENTER){ + if(out_ch_layout & AV_CH_BACK_LEFT){ + matrix[ BACK_LEFT][BACK_CENTER]+= M_SQRT1_2; + matrix[BACK_RIGHT][BACK_CENTER]+= M_SQRT1_2; + }else if(out_ch_layout & AV_CH_SIDE_LEFT){ + matrix[ SIDE_LEFT][BACK_CENTER]+= M_SQRT1_2; + matrix[SIDE_RIGHT][BACK_CENTER]+= M_SQRT1_2; + }else if(out_ch_layout & AV_CH_FRONT_LEFT){ + if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY || + matrix_encoding == AV_MATRIX_ENCODING_DPLII) { + if (unaccounted & (AV_CH_BACK_LEFT | AV_CH_SIDE_LEFT)) { + matrix[FRONT_LEFT ][BACK_CENTER] -= s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][BACK_CENTER] += s->slev * M_SQRT1_2; + } else { + matrix[FRONT_LEFT ][BACK_CENTER] -= s->slev; + matrix[FRONT_RIGHT][BACK_CENTER] += s->slev; + } + } else { + matrix[ FRONT_LEFT][BACK_CENTER]+= s->slev*M_SQRT1_2; + matrix[FRONT_RIGHT][BACK_CENTER]+= s->slev*M_SQRT1_2; + } + }else if(out_ch_layout & AV_CH_FRONT_CENTER){ + matrix[ FRONT_CENTER][BACK_CENTER]+= s->slev*M_SQRT1_2; + }else + av_assert0(0); + } + if(unaccounted & AV_CH_BACK_LEFT){ + if(out_ch_layout & AV_CH_BACK_CENTER){ + matrix[BACK_CENTER][ BACK_LEFT]+= M_SQRT1_2; + matrix[BACK_CENTER][BACK_RIGHT]+= M_SQRT1_2; + }else if(out_ch_layout & AV_CH_SIDE_LEFT){ + if(in_ch_layout & AV_CH_SIDE_LEFT){ + matrix[ SIDE_LEFT][ BACK_LEFT]+= M_SQRT1_2; + matrix[SIDE_RIGHT][BACK_RIGHT]+= M_SQRT1_2; + }else{ + matrix[ SIDE_LEFT][ BACK_LEFT]+= 1.0; + matrix[SIDE_RIGHT][BACK_RIGHT]+= 1.0; + } + }else if(out_ch_layout & AV_CH_FRONT_LEFT){ + if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) { + matrix[FRONT_LEFT ][BACK_LEFT ] -= s->slev * M_SQRT1_2; + matrix[FRONT_LEFT ][BACK_RIGHT] -= s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][BACK_LEFT ] += s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][BACK_RIGHT] += s->slev * M_SQRT1_2; + } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) { + matrix[FRONT_LEFT ][BACK_LEFT ] -= s->slev * SQRT3_2; + matrix[FRONT_LEFT ][BACK_RIGHT] -= s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][BACK_LEFT ] += s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][BACK_RIGHT] += s->slev * SQRT3_2; + } else { + matrix[ FRONT_LEFT][ BACK_LEFT] += s->slev; + matrix[FRONT_RIGHT][BACK_RIGHT] += s->slev; + } + }else if(out_ch_layout & AV_CH_FRONT_CENTER){ + matrix[ FRONT_CENTER][BACK_LEFT ]+= s->slev*M_SQRT1_2; + matrix[ FRONT_CENTER][BACK_RIGHT]+= s->slev*M_SQRT1_2; + }else + av_assert0(0); + } + + if(unaccounted & AV_CH_SIDE_LEFT){ + if(out_ch_layout & AV_CH_BACK_LEFT){ + /* if back channels do not exist in the input, just copy side + channels to back channels, otherwise mix side into back */ + if (in_ch_layout & AV_CH_BACK_LEFT) { + matrix[BACK_LEFT ][SIDE_LEFT ] += M_SQRT1_2; + matrix[BACK_RIGHT][SIDE_RIGHT] += M_SQRT1_2; + } else { + matrix[BACK_LEFT ][SIDE_LEFT ] += 1.0; + matrix[BACK_RIGHT][SIDE_RIGHT] += 1.0; + } + }else if(out_ch_layout & AV_CH_BACK_CENTER){ + matrix[BACK_CENTER][ SIDE_LEFT]+= M_SQRT1_2; + matrix[BACK_CENTER][SIDE_RIGHT]+= M_SQRT1_2; + }else if(out_ch_layout & AV_CH_FRONT_LEFT){ + if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) { + matrix[FRONT_LEFT ][SIDE_LEFT ] -= s->slev * M_SQRT1_2; + matrix[FRONT_LEFT ][SIDE_RIGHT] -= s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][SIDE_LEFT ] += s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][SIDE_RIGHT] += s->slev * M_SQRT1_2; + } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) { + matrix[FRONT_LEFT ][SIDE_LEFT ] -= s->slev * SQRT3_2; + matrix[FRONT_LEFT ][SIDE_RIGHT] -= s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][SIDE_LEFT ] += s->slev * M_SQRT1_2; + matrix[FRONT_RIGHT][SIDE_RIGHT] += s->slev * SQRT3_2; + } else { + matrix[ FRONT_LEFT][ SIDE_LEFT] += s->slev; + matrix[FRONT_RIGHT][SIDE_RIGHT] += s->slev; + } + }else if(out_ch_layout & AV_CH_FRONT_CENTER){ + matrix[ FRONT_CENTER][SIDE_LEFT ]+= s->slev*M_SQRT1_2; + matrix[ FRONT_CENTER][SIDE_RIGHT]+= s->slev*M_SQRT1_2; + }else + av_assert0(0); + } + + if(unaccounted & AV_CH_FRONT_LEFT_OF_CENTER){ + if(out_ch_layout & AV_CH_FRONT_LEFT){ + matrix[ FRONT_LEFT][ FRONT_LEFT_OF_CENTER]+= 1.0; + matrix[FRONT_RIGHT][FRONT_RIGHT_OF_CENTER]+= 1.0; + }else if(out_ch_layout & AV_CH_FRONT_CENTER){ + matrix[ FRONT_CENTER][ FRONT_LEFT_OF_CENTER]+= M_SQRT1_2; + matrix[ FRONT_CENTER][FRONT_RIGHT_OF_CENTER]+= M_SQRT1_2; + }else + av_assert0(0); + } + /* mix LFE into front left/right or center */ + if (unaccounted & AV_CH_LOW_FREQUENCY) { + if (out_ch_layout & AV_CH_FRONT_CENTER) { + matrix[FRONT_CENTER][LOW_FREQUENCY] += s->lfe_mix_level; + } else if (out_ch_layout & AV_CH_FRONT_LEFT) { + matrix[FRONT_LEFT ][LOW_FREQUENCY] += s->lfe_mix_level * M_SQRT1_2; + matrix[FRONT_RIGHT][LOW_FREQUENCY] += s->lfe_mix_level * M_SQRT1_2; + } else + av_assert0(0); + } + + for(out_i=i=0; i<64; i++){ + double sum=0; + int in_i=0; + if((out_ch_layout & (1ULL<<i)) == 0) + continue; + for(j=0; j<64; j++){ + if((in_ch_layout & (1ULL<<j)) == 0) + continue; + if (i < FF_ARRAY_ELEMS(matrix) && j < FF_ARRAY_ELEMS(matrix[0])) + s->matrix[out_i][in_i]= matrix[i][j]; + else + s->matrix[out_i][in_i]= i == j && (in_ch_layout & out_ch_layout & (1ULL<<i)); + sum += fabs(s->matrix[out_i][in_i]); + in_i++; + } + maxcoef= FFMAX(maxcoef, sum); + out_i++; + } + if(s->rematrix_volume < 0) + maxcoef = -s->rematrix_volume; + + if (s->rematrix_maxval > 0) { + maxval = s->rematrix_maxval; + } else if ( av_get_packed_sample_fmt(s->out_sample_fmt) < AV_SAMPLE_FMT_FLT + || av_get_packed_sample_fmt(s->int_sample_fmt) < AV_SAMPLE_FMT_FLT) { + maxval = 1.0; + } else + maxval = INT_MAX; + + if(maxcoef > maxval || s->rematrix_volume < 0){ + maxcoef /= maxval; + for(i=0; i<SWR_CH_MAX; i++) + for(j=0; j<SWR_CH_MAX; j++){ + s->matrix[i][j] /= maxcoef; + } + } + + if(s->rematrix_volume > 0){ + for(i=0; i<SWR_CH_MAX; i++) + for(j=0; j<SWR_CH_MAX; j++){ + s->matrix[i][j] *= s->rematrix_volume; + } + } + + av_log(s, AV_LOG_DEBUG, "Matrix coefficients:\n"); + for(i=0; i<av_get_channel_layout_nb_channels(out_ch_layout); i++){ + const char *c = + av_get_channel_name(av_channel_layout_extract_channel(out_ch_layout, i)); + av_log(s, AV_LOG_DEBUG, "%s: ", c ? c : "?"); + for(j=0; j<av_get_channel_layout_nb_channels(in_ch_layout); j++){ + c = av_get_channel_name(av_channel_layout_extract_channel(in_ch_layout, j)); + av_log(s, AV_LOG_DEBUG, "%s:%f ", c ? c : "?", s->matrix[i][j]); + } + av_log(s, AV_LOG_DEBUG, "\n"); + } + return 0; +} + +av_cold int swri_rematrix_init(SwrContext *s){ + int i, j; + int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout); + int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout); + + s->mix_any_f = NULL; + + if (!s->rematrix_custom) { + int r = auto_matrix(s); + if (r) + return r; + } + if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ + int maxsum = 0; + s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int)); + s->native_one = av_mallocz(sizeof(int)); + if (!s->native_matrix || !s->native_one) + return AVERROR(ENOMEM); + for (i = 0; i < nb_out; i++) { + double rem = 0; + int sum = 0; + + for (j = 0; j < nb_in; j++) { + double target = s->matrix[i][j] * 32768 + rem; + ((int*)s->native_matrix)[i * nb_in + j] = lrintf(target); + rem += target - ((int*)s->native_matrix)[i * nb_in + j]; + sum += FFABS(((int*)s->native_matrix)[i * nb_in + j]); + } + maxsum = FFMAX(maxsum, sum); + } + *((int*)s->native_one) = 32768; + if (maxsum <= 32768) { + s->mix_1_1_f = (mix_1_1_func_type*)copy_s16; + s->mix_2_1_f = (mix_2_1_func_type*)sum2_s16; + s->mix_any_f = (mix_any_func_type*)get_mix_any_func_s16(s); + } else { + s->mix_1_1_f = (mix_1_1_func_type*)copy_clip_s16; + s->mix_2_1_f = (mix_2_1_func_type*)sum2_clip_s16; + s->mix_any_f = (mix_any_func_type*)get_mix_any_func_clip_s16(s); + } + }else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ + s->native_matrix = av_calloc(nb_in * nb_out, sizeof(float)); + s->native_one = av_mallocz(sizeof(float)); + if (!s->native_matrix || !s->native_one) + return AVERROR(ENOMEM); + for (i = 0; i < nb_out; i++) + for (j = 0; j < nb_in; j++) + ((float*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j]; + *((float*)s->native_one) = 1.0; + s->mix_1_1_f = (mix_1_1_func_type*)copy_float; + s->mix_2_1_f = (mix_2_1_func_type*)sum2_float; + s->mix_any_f = (mix_any_func_type*)get_mix_any_func_float(s); + }else if(s->midbuf.fmt == AV_SAMPLE_FMT_DBLP){ + s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double)); + s->native_one = av_mallocz(sizeof(double)); + if (!s->native_matrix || !s->native_one) + return AVERROR(ENOMEM); + for (i = 0; i < nb_out; i++) + for (j = 0; j < nb_in; j++) + ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j]; + *((double*)s->native_one) = 1.0; + s->mix_1_1_f = (mix_1_1_func_type*)copy_double; + s->mix_2_1_f = (mix_2_1_func_type*)sum2_double; + s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s); + }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){ + // Only for dithering currently +// s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double)); + s->native_one = av_mallocz(sizeof(int)); + if (!s->native_one) + return AVERROR(ENOMEM); +// for (i = 0; i < nb_out; i++) +// for (j = 0; j < nb_in; j++) +// ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j]; + *((int*)s->native_one) = 32768; + s->mix_1_1_f = (mix_1_1_func_type*)copy_s32; + s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32; + s->mix_any_f = (mix_any_func_type*)get_mix_any_func_s32(s); + }else + av_assert0(0); + //FIXME quantize for integeres + for (i = 0; i < SWR_CH_MAX; i++) { + int ch_in=0; + for (j = 0; j < SWR_CH_MAX; j++) { + s->matrix32[i][j]= lrintf(s->matrix[i][j] * 32768); + if(s->matrix[i][j]) + s->matrix_ch[i][++ch_in]= j; + } + s->matrix_ch[i][0]= ch_in; + } + + if(HAVE_YASM && HAVE_MMX) + return swri_rematrix_init_x86(s); + + return 0; +} + +av_cold void swri_rematrix_free(SwrContext *s){ + av_freep(&s->native_matrix); + av_freep(&s->native_one); + av_freep(&s->native_simd_matrix); + av_freep(&s->native_simd_one); +} + +int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){ + int out_i, in_i, i, j; + int len1 = 0; + int off = 0; + + if(s->mix_any_f) { + s->mix_any_f(out->ch, (const uint8_t **)in->ch, s->native_matrix, len); + return 0; + } + + if(s->mix_2_1_simd || s->mix_1_1_simd){ + len1= len&~15; + off = len1 * out->bps; + } + + av_assert0(!s->out_ch_layout || out->ch_count == av_get_channel_layout_nb_channels(s->out_ch_layout)); + av_assert0(!s-> in_ch_layout || in ->ch_count == av_get_channel_layout_nb_channels(s-> in_ch_layout)); + + for(out_i=0; out_i<out->ch_count; out_i++){ + switch(s->matrix_ch[out_i][0]){ + case 0: + if(mustcopy) + memset(out->ch[out_i], 0, len * av_get_bytes_per_sample(s->int_sample_fmt)); + break; + case 1: + in_i= s->matrix_ch[out_i][1]; + if(s->matrix[out_i][in_i]!=1.0){ + if(s->mix_1_1_simd && len1) + s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_simd_matrix, in->ch_count*out_i + in_i, len1); + if(len != len1) + s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1); + }else if(mustcopy){ + memcpy(out->ch[out_i], in->ch[in_i], len*out->bps); + }else{ + out->ch[out_i]= in->ch[in_i]; + } + break; + case 2: { + int in_i1 = s->matrix_ch[out_i][1]; + int in_i2 = s->matrix_ch[out_i][2]; + if(s->mix_2_1_simd && len1) + s->mix_2_1_simd(out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_simd_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1); + else + s->mix_2_1_f (out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1); + if(len != len1) + s->mix_2_1_f (out->ch[out_i]+off, in->ch[in_i1]+off, in->ch[in_i2]+off, s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len-len1); + break;} + default: + if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ + for(i=0; i<len; i++){ + float v=0; + for(j=0; j<s->matrix_ch[out_i][0]; j++){ + in_i= s->matrix_ch[out_i][1+j]; + v+= ((float*)in->ch[in_i])[i] * s->matrix[out_i][in_i]; + } + ((float*)out->ch[out_i])[i]= v; + } + }else if(s->int_sample_fmt == AV_SAMPLE_FMT_DBLP){ + for(i=0; i<len; i++){ + double v=0; + for(j=0; j<s->matrix_ch[out_i][0]; j++){ + in_i= s->matrix_ch[out_i][1+j]; + v+= ((double*)in->ch[in_i])[i] * s->matrix[out_i][in_i]; + } + ((double*)out->ch[out_i])[i]= v; + } + }else{ + for(i=0; i<len; i++){ + int v=0; + for(j=0; j<s->matrix_ch[out_i][0]; j++){ + in_i= s->matrix_ch[out_i][1+j]; + v+= ((int16_t*)in->ch[in_i])[i] * s->matrix32[out_i][in_i]; + } + ((int16_t*)out->ch[out_i])[i]= (v + 16384)>>15; + } + } + } + } + return 0; +} diff --git a/libswresample/rematrix_template.c b/libswresample/rematrix_template.c new file mode 100644 index 0000000000..add65e3155 --- /dev/null +++ b/libswresample/rematrix_template.c @@ -0,0 +1,111 @@ +/* + * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#if defined(TEMPLATE_REMATRIX_FLT) +# define R(x) x +# define SAMPLE float +# define COEFF float +# define INTER float +# define RENAME(x) x ## _float +#elif defined(TEMPLATE_REMATRIX_DBL) +# define R(x) x +# define SAMPLE double +# define COEFF double +# define INTER double +# define RENAME(x) x ## _double +#elif defined(TEMPLATE_REMATRIX_S16) +# define SAMPLE int16_t +# define COEFF int +# define INTER int +# ifdef TEMPLATE_CLIP +# define R(x) av_clip_int16(((x) + 16384)>>15) +# define RENAME(x) x ## _clip_s16 +# else +# define R(x) (((x) + 16384)>>15) +# define RENAME(x) x ## _s16 +# endif +#elif defined(TEMPLATE_REMATRIX_S32) +# define R(x) (((x) + 16384)>>15) +# define SAMPLE int32_t +# define COEFF int +# define INTER int64_t +# define RENAME(x) x ## _s32 +#endif + +typedef void (RENAME(mix_any_func_type))(SAMPLE **out, const SAMPLE **in1, COEFF *coeffp, integer len); + +static void RENAME(sum2)(SAMPLE *out, const SAMPLE *in1, const SAMPLE *in2, COEFF *coeffp, integer index1, integer index2, integer len){ + int i; + INTER coeff1 = coeffp[index1]; + INTER coeff2 = coeffp[index2]; + + for(i=0; i<len; i++) + out[i] = R(coeff1*in1[i] + coeff2*in2[i]); +} + +static void RENAME(copy)(SAMPLE *out, const SAMPLE *in, COEFF *coeffp, integer index, integer len){ + int i; + INTER coeff = coeffp[index]; + for(i=0; i<len; i++) + out[i] = R(coeff*in[i]); +} + +static void RENAME(mix6to2)(SAMPLE **out, const SAMPLE **in, COEFF *coeffp, integer len){ + int i; + + for(i=0; i<len; i++) { + INTER t = in[2][i]*(INTER)coeffp[0*6+2] + in[3][i]*(INTER)coeffp[0*6+3]; + out[0][i] = R(t + in[0][i]*(INTER)coeffp[0*6+0] + in[4][i]*(INTER)coeffp[0*6+4]); + out[1][i] = R(t + in[1][i]*(INTER)coeffp[1*6+1] + in[5][i]*(INTER)coeffp[1*6+5]); + } +} + +static void RENAME(mix8to2)(SAMPLE **out, const SAMPLE **in, COEFF *coeffp, integer len){ + int i; + + for(i=0; i<len; i++) { + INTER t = in[2][i]*(INTER)coeffp[0*8+2] + in[3][i]*(INTER)coeffp[0*8+3]; + out[0][i] = R(t + in[0][i]*(INTER)coeffp[0*8+0] + in[4][i]*(INTER)coeffp[0*8+4] + in[6][i]*(INTER)coeffp[0*8+6]); + out[1][i] = R(t + in[1][i]*(INTER)coeffp[1*8+1] + in[5][i]*(INTER)coeffp[1*8+5] + in[7][i]*(INTER)coeffp[1*8+7]); + } +} + +static RENAME(mix_any_func_type) *RENAME(get_mix_any_func)(SwrContext *s){ + if( s->out_ch_layout == AV_CH_LAYOUT_STEREO && (s->in_ch_layout == AV_CH_LAYOUT_5POINT1 || s->in_ch_layout == AV_CH_LAYOUT_5POINT1_BACK) + && s->matrix[0][2] == s->matrix[1][2] && s->matrix[0][3] == s->matrix[1][3] + && !s->matrix[0][1] && !s->matrix[0][5] && !s->matrix[1][0] && !s->matrix[1][4] + ) + return RENAME(mix6to2); + + if( s->out_ch_layout == AV_CH_LAYOUT_STEREO && s->in_ch_layout == AV_CH_LAYOUT_7POINT1 + && s->matrix[0][2] == s->matrix[1][2] && s->matrix[0][3] == s->matrix[1][3] + && !s->matrix[0][1] && !s->matrix[0][5] && !s->matrix[1][0] && !s->matrix[1][4] + && !s->matrix[0][7] && !s->matrix[1][6] + ) + return RENAME(mix8to2); + + return NULL; +} + +#undef R +#undef SAMPLE +#undef COEFF +#undef INTER +#undef RENAME diff --git a/libswresample/resample.c b/libswresample/resample.c new file mode 100644 index 0000000000..b834248167 --- /dev/null +++ b/libswresample/resample.c @@ -0,0 +1,638 @@ +/* + * audio resampling + * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at> + * bessel function: Copyright (c) 2006 Xiaogang Zhang + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio resampling + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "libavutil/avassert.h" +#include "resample.h" + +static inline double eval_poly(const double *coeff, int size, double x) { + double sum = coeff[size-1]; + int i; + for (i = size-2; i >= 0; --i) { + sum *= x; + sum += coeff[i]; + } + return sum; +} + +/** + * 0th order modified bessel function of the first kind. + * Algorithm taken from the Boost project, source: + * https://searchcode.com/codesearch/view/14918379/ + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0 (see notice below). + * Boost Software License - Version 1.0 - August 17th, 2003 +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + */ + +static double bessel(double x) { +// Modified Bessel function of the first kind of order zero +// minimax rational approximations on intervals, see +// Blair and Edwards, Chalk River Report AECL-4928, 1974 + static const double p1[] = { + -2.2335582639474375249e+15, + -5.5050369673018427753e+14, + -3.2940087627407749166e+13, + -8.4925101247114157499e+11, + -1.1912746104985237192e+10, + -1.0313066708737980747e+08, + -5.9545626019847898221e+05, + -2.4125195876041896775e+03, + -7.0935347449210549190e+00, + -1.5453977791786851041e-02, + -2.5172644670688975051e-05, + -3.0517226450451067446e-08, + -2.6843448573468483278e-11, + -1.5982226675653184646e-14, + -5.2487866627945699800e-18, + }; + static const double q1[] = { + -2.2335582639474375245e+15, + 7.8858692566751002988e+12, + -1.2207067397808979846e+10, + 1.0377081058062166144e+07, + -4.8527560179962773045e+03, + 1.0, + }; + static const double p2[] = { + -2.2210262233306573296e-04, + 1.3067392038106924055e-02, + -4.4700805721174453923e-01, + 5.5674518371240761397e+00, + -2.3517945679239481621e+01, + 3.1611322818701131207e+01, + -9.6090021968656180000e+00, + }; + static const double q2[] = { + -5.5194330231005480228e-04, + 3.2547697594819615062e-02, + -1.1151759188741312645e+00, + 1.3982595353892851542e+01, + -6.0228002066743340583e+01, + 8.5539563258012929600e+01, + -3.1446690275135491500e+01, + 1.0, + }; + double y, r, factor; + if (x == 0) + return 1.0; + x = fabs(x); + if (x <= 15) { + y = x * x; + return eval_poly(p1, FF_ARRAY_ELEMS(p1), y) / eval_poly(q1, FF_ARRAY_ELEMS(q1), y); + } + else { + y = 1 / x - 1.0 / 15; + r = eval_poly(p2, FF_ARRAY_ELEMS(p2), y) / eval_poly(q2, FF_ARRAY_ELEMS(q2), y); + factor = exp(x) / sqrt(x); + return factor * r; + } +} + +/** + * builds a polyphase filterbank. + * @param factor resampling factor + * @param scale wanted sum of coefficients for each filter + * @param filter_type filter type + * @param kaiser_beta kaiser window beta + * @return 0 on success, negative on error + */ +static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale, + int filter_type, double kaiser_beta){ + int ph, i; + int ph_nb = phase_count % 2 ? phase_count : phase_count / 2 + 1; + double x, y, w, t, s; + double *tab = av_malloc_array(tap_count+1, sizeof(*tab)); + double *sin_lut = av_malloc_array(ph_nb, sizeof(*sin_lut)); + const int center= (tap_count-1)/2; + + if (!tab || !sin_lut) + goto fail; + + /* if upsampling, only need to interpolate, no filter */ + if (factor > 1.0) + factor = 1.0; + + if (factor == 1.0) { + for (ph = 0; ph < ph_nb; ph++) + sin_lut[ph] = sin(M_PI * ph / phase_count); + } + for(ph = 0; ph < ph_nb; ph++) { + double norm = 0; + s = sin_lut[ph]; + for(i=0;i<=tap_count;i++) { + x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor; + if (x == 0) y = 1.0; + else if (factor == 1.0) + y = s / x; + else + y = sin(x) / x; + switch(filter_type){ + case SWR_FILTER_TYPE_CUBIC:{ + const float d= -0.5; //first order derivative = -0.5 + x = fabs(((double)(i - center) - (double)ph / phase_count) * factor); + if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*( -x*x + x*x*x); + else y= d*(-4 + 8*x - 5*x*x + x*x*x); + break;} + case SWR_FILTER_TYPE_BLACKMAN_NUTTALL: + w = 2.0*x / (factor*tap_count); + t = -cos(w); + y *= 0.3635819 - 0.4891775 * t + 0.1365995 * (2*t*t-1) - 0.0106411 * (4*t*t*t - 3*t); + break; + case SWR_FILTER_TYPE_KAISER: + w = 2.0*x / (factor*tap_count*M_PI); + y *= bessel(kaiser_beta*sqrt(FFMAX(1-w*w, 0))); + break; + default: + av_assert0(0); + } + + tab[i] = y; + s = -s; + if (i < tap_count) + norm += y; + } + + /* normalize so that an uniform color remains the same */ + switch(c->format){ + case AV_SAMPLE_FMT_S16P: + for(i=0;i<tap_count;i++) + ((int16_t*)filter)[ph * alloc + i] = av_clip_int16(lrintf(tab[i] * scale / norm)); + if (phase_count % 2) break; + if (tap_count % 2 == 0 || tap_count == 1) { + for (i = 0; i < tap_count; i++) + ((int16_t*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((int16_t*)filter)[ph * alloc + i]; + } + else { + for (i = 1; i <= tap_count; i++) + ((int16_t*)filter)[(phase_count-ph) * alloc + tap_count-i] = + av_clip_int16(lrintf(tab[i] * scale / (norm - tab[0] + tab[tap_count]))); + } + break; + case AV_SAMPLE_FMT_S32P: + for(i=0;i<tap_count;i++) + ((int32_t*)filter)[ph * alloc + i] = av_clipl_int32(llrint(tab[i] * scale / norm)); + if (phase_count % 2) break; + if (tap_count % 2 == 0 || tap_count == 1) { + for (i = 0; i < tap_count; i++) + ((int32_t*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((int32_t*)filter)[ph * alloc + i]; + } + else { + for (i = 1; i <= tap_count; i++) + ((int32_t*)filter)[(phase_count-ph) * alloc + tap_count-i] = + av_clipl_int32(llrint(tab[i] * scale / (norm - tab[0] + tab[tap_count]))); + } + break; + case AV_SAMPLE_FMT_FLTP: + for(i=0;i<tap_count;i++) + ((float*)filter)[ph * alloc + i] = tab[i] * scale / norm; + if (phase_count % 2) break; + if (tap_count % 2 == 0 || tap_count == 1) { + for (i = 0; i < tap_count; i++) + ((float*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((float*)filter)[ph * alloc + i]; + } + else { + for (i = 1; i <= tap_count; i++) + ((float*)filter)[(phase_count-ph) * alloc + tap_count-i] = tab[i] * scale / (norm - tab[0] + tab[tap_count]); + } + break; + case AV_SAMPLE_FMT_DBLP: + for(i=0;i<tap_count;i++) + ((double*)filter)[ph * alloc + i] = tab[i] * scale / norm; + if (phase_count % 2) break; + if (tap_count % 2 == 0 || tap_count == 1) { + for (i = 0; i < tap_count; i++) + ((double*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((double*)filter)[ph * alloc + i]; + } + else { + for (i = 1; i <= tap_count; i++) + ((double*)filter)[(phase_count-ph) * alloc + tap_count-i] = tab[i] * scale / (norm - tab[0] + tab[tap_count]); + } + break; + } + } +#if 0 + { +#define LEN 1024 + int j,k; + double sine[LEN + tap_count]; + double filtered[LEN]; + double maxff=-2, minff=2, maxsf=-2, minsf=2; + for(i=0; i<LEN; i++){ + double ss=0, sf=0, ff=0; + for(j=0; j<LEN+tap_count; j++) + sine[j]= cos(i*j*M_PI/LEN); + for(j=0; j<LEN; j++){ + double sum=0; + ph=0; + for(k=0; k<tap_count; k++) + sum += filter[ph * tap_count + k] * sine[k+j]; + filtered[j]= sum / (1<<FILTER_SHIFT); + ss+= sine[j + center] * sine[j + center]; + ff+= filtered[j] * filtered[j]; + sf+= sine[j + center] * filtered[j]; + } + ss= sqrt(2*ss/LEN); + ff= sqrt(2*ff/LEN); + sf= 2*sf/LEN; + maxff= FFMAX(maxff, ff); + minff= FFMIN(minff, ff); + maxsf= FFMAX(maxsf, sf); + minsf= FFMIN(minsf, sf); + if(i%11==0){ + av_log(NULL, AV_LOG_ERROR, "i:%4d ss:%f ff:%13.6e-%13.6e sf:%13.6e-%13.6e\n", i, ss, maxff, minff, maxsf, minsf); + minff=minsf= 2; + maxff=maxsf= -2; + } + } + } +#endif + +fail: + av_free(tab); + av_free(sin_lut); + return 0; +} + +static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear, + double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, + double precision, int cheby, int exact_rational) +{ + double cutoff = cutoff0? cutoff0 : 0.97; + double factor= FFMIN(out_rate * cutoff / in_rate, 1.0); + int phase_count= 1<<phase_shift; + int phase_count_compensation = phase_count; + + if (exact_rational) { + int phase_count_exact, phase_count_exact_den; + + av_reduce(&phase_count_exact, &phase_count_exact_den, out_rate, in_rate, INT_MAX); + if (phase_count_exact <= phase_count) { + phase_count_compensation = phase_count_exact * (phase_count / phase_count_exact); + phase_count = phase_count_exact; + } + } + + if (!c || c->phase_count != phase_count || c->linear!=linear || c->factor != factor + || c->filter_length != FFMAX((int)ceil(filter_size/factor), 1) || c->format != format + || c->filter_type != filter_type || c->kaiser_beta != kaiser_beta) { + c = av_mallocz(sizeof(*c)); + if (!c) + return NULL; + + c->format= format; + + c->felem_size= av_get_bytes_per_sample(c->format); + + switch(c->format){ + case AV_SAMPLE_FMT_S16P: + c->filter_shift = 15; + break; + case AV_SAMPLE_FMT_S32P: + c->filter_shift = 30; + break; + case AV_SAMPLE_FMT_FLTP: + case AV_SAMPLE_FMT_DBLP: + c->filter_shift = 0; + break; + default: + av_log(NULL, AV_LOG_ERROR, "Unsupported sample format\n"); + av_assert0(0); + } + + if (filter_size/factor > INT32_MAX/256) { + av_log(NULL, AV_LOG_ERROR, "Filter length too large\n"); + goto error; + } + + c->phase_count = phase_count; + c->linear = linear; + c->factor = factor; + c->filter_length = FFMAX((int)ceil(filter_size/factor), 1); + c->filter_alloc = FFALIGN(c->filter_length, 8); + c->filter_bank = av_calloc(c->filter_alloc, (phase_count+1)*c->felem_size); + c->filter_type = filter_type; + c->kaiser_beta = kaiser_beta; + c->phase_count_compensation = phase_count_compensation; + if (!c->filter_bank) + goto error; + if (build_filter(c, (void*)c->filter_bank, factor, c->filter_length, c->filter_alloc, phase_count, 1<<c->filter_shift, filter_type, kaiser_beta)) + goto error; + memcpy(c->filter_bank + (c->filter_alloc*phase_count+1)*c->felem_size, c->filter_bank, (c->filter_alloc-1)*c->felem_size); + memcpy(c->filter_bank + (c->filter_alloc*phase_count )*c->felem_size, c->filter_bank + (c->filter_alloc - 1)*c->felem_size, c->felem_size); + } + + c->compensation_distance= 0; + if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2)) + goto error; + while (c->dst_incr < (1<<20) && c->src_incr < (1<<20)) { + c->dst_incr *= 2; + c->src_incr *= 2; + } + c->ideal_dst_incr = c->dst_incr; + c->dst_incr_div = c->dst_incr / c->src_incr; + c->dst_incr_mod = c->dst_incr % c->src_incr; + + c->index= -phase_count*((c->filter_length-1)/2); + c->frac= 0; + + swri_resample_dsp_init(c); + + return c; +error: + av_freep(&c->filter_bank); + av_free(c); + return NULL; +} + +static void resample_free(ResampleContext **c){ + if(!*c) + return; + av_freep(&(*c)->filter_bank); + av_freep(c); +} + +static int rebuild_filter_bank_with_compensation(ResampleContext *c) +{ + uint8_t *new_filter_bank; + int new_src_incr, new_dst_incr; + int phase_count = c->phase_count_compensation; + int ret; + + if (phase_count == c->phase_count) + return 0; + + av_assert0(!c->frac && !c->dst_incr_mod && !c->compensation_distance); + + new_filter_bank = av_calloc(c->filter_alloc, (phase_count + 1) * c->felem_size); + if (!new_filter_bank) + return AVERROR(ENOMEM); + + ret = build_filter(c, new_filter_bank, c->factor, c->filter_length, c->filter_alloc, + phase_count, 1 << c->filter_shift, c->filter_type, c->kaiser_beta); + if (ret < 0) { + av_freep(&new_filter_bank); + return ret; + } + memcpy(new_filter_bank + (c->filter_alloc*phase_count+1)*c->felem_size, new_filter_bank, (c->filter_alloc-1)*c->felem_size); + memcpy(new_filter_bank + (c->filter_alloc*phase_count )*c->felem_size, new_filter_bank + (c->filter_alloc - 1)*c->felem_size, c->felem_size); + + if (!av_reduce(&new_src_incr, &new_dst_incr, c->src_incr, + c->dst_incr * (int64_t)(phase_count/c->phase_count), INT32_MAX/2)) + { + av_freep(&new_filter_bank); + return AVERROR(EINVAL); + } + + c->src_incr = new_src_incr; + c->dst_incr = new_dst_incr; + while (c->dst_incr < (1<<20) && c->src_incr < (1<<20)) { + c->dst_incr *= 2; + c->src_incr *= 2; + } + c->ideal_dst_incr = c->dst_incr; + c->dst_incr_div = c->dst_incr / c->src_incr; + c->dst_incr_mod = c->dst_incr % c->src_incr; + c->index *= phase_count / c->phase_count; + c->phase_count = phase_count; + av_freep(&c->filter_bank); + c->filter_bank = new_filter_bank; + return 0; +} + +static int set_compensation(ResampleContext *c, int sample_delta, int compensation_distance){ + int ret; + + if (compensation_distance) { + ret = rebuild_filter_bank_with_compensation(c); + if (ret < 0) + return ret; + } + + c->compensation_distance= compensation_distance; + if (compensation_distance) + c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance; + else + c->dst_incr = c->ideal_dst_incr; + + c->dst_incr_div = c->dst_incr / c->src_incr; + c->dst_incr_mod = c->dst_incr % c->src_incr; + + return 0; +} + +static int swri_resample(ResampleContext *c, + uint8_t *dst, const uint8_t *src, int *consumed, + int src_size, int dst_size, int update_ctx) +{ + if (c->filter_length == 1 && c->phase_count == 1) { + int index= c->index; + int frac= c->frac; + int64_t index2= (1LL<<32)*c->frac/c->src_incr + (1LL<<32)*index; + int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr; + int new_size = (src_size * (int64_t)c->src_incr - frac + c->dst_incr - 1) / c->dst_incr; + + dst_size= FFMIN(dst_size, new_size); + c->dsp.resample_one(dst, src, dst_size, index2, incr); + + index += dst_size * c->dst_incr_div; + index += (frac + dst_size * (int64_t)c->dst_incr_mod) / c->src_incr; + av_assert2(index >= 0); + *consumed= index; + if (update_ctx) { + c->frac = (frac + dst_size * (int64_t)c->dst_incr_mod) % c->src_incr; + c->index = 0; + } + } else { + int64_t end_index = (1LL + src_size - c->filter_length) * c->phase_count; + int64_t delta_frac = (end_index - c->index) * c->src_incr - c->frac; + int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr; + + dst_size = FFMIN(dst_size, delta_n); + if (dst_size > 0) { + *consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx); + } else { + *consumed = 0; + } + } + + return dst_size; +} + +static int multiple_resample(ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed){ + int i, ret= -1; + int av_unused mm_flags = av_get_cpu_flags(); + int need_emms = c->format == AV_SAMPLE_FMT_S16P && ARCH_X86_32 && + (mm_flags & (AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE2)) == AV_CPU_FLAG_MMX2; + int64_t max_src_size = (INT64_MAX/2 / c->phase_count) / c->src_incr; + + if (c->compensation_distance) + dst_size = FFMIN(dst_size, c->compensation_distance); + src_size = FFMIN(src_size, max_src_size); + + for(i=0; i<dst->ch_count; i++){ + ret= swri_resample(c, dst->ch[i], src->ch[i], + consumed, src_size, dst_size, i+1==dst->ch_count); + } + if(need_emms) + emms_c(); + + if (c->compensation_distance) { + c->compensation_distance -= ret; + if (!c->compensation_distance) { + c->dst_incr = c->ideal_dst_incr; + c->dst_incr_div = c->dst_incr / c->src_incr; + c->dst_incr_mod = c->dst_incr % c->src_incr; + } + } + + return ret; +} + +static int64_t get_delay(struct SwrContext *s, int64_t base){ + ResampleContext *c = s->resample; + int64_t num = s->in_buffer_count - (c->filter_length-1)/2; + num *= c->phase_count; + num -= c->index; + num *= c->src_incr; + num -= c->frac; + return av_rescale(num, base, s->in_sample_rate*(int64_t)c->src_incr * c->phase_count); +} + +static int64_t get_out_samples(struct SwrContext *s, int in_samples) { + ResampleContext *c = s->resample; + // The + 2 are added to allow implementations to be slightly inaccurate, they should not be needed currently. + // They also make it easier to proof that changes and optimizations do not + // break the upper bound. + int64_t num = s->in_buffer_count + 2LL + in_samples; + num *= c->phase_count; + num -= c->index; + num = av_rescale_rnd(num, s->out_sample_rate, ((int64_t)s->in_sample_rate) * c->phase_count, AV_ROUND_UP) + 2; + + if (c->compensation_distance) { + if (num > INT_MAX) + return AVERROR(EINVAL); + + num = FFMAX(num, (num * c->ideal_dst_incr - 1) / c->dst_incr + 1); + } + return num; +} + +static int resample_flush(struct SwrContext *s) { + AudioData *a= &s->in_buffer; + int i, j, ret; + if((ret = swri_realloc_audio(a, s->in_buffer_index + 2*s->in_buffer_count)) < 0) + return ret; + av_assert0(a->planar); + for(i=0; i<a->ch_count; i++){ + for(j=0; j<s->in_buffer_count; j++){ + memcpy(a->ch[i] + (s->in_buffer_index+s->in_buffer_count+j )*a->bps, + a->ch[i] + (s->in_buffer_index+s->in_buffer_count-j-1)*a->bps, a->bps); + } + } + s->in_buffer_count += (s->in_buffer_count+1)/2; + return 0; +} + +// in fact the whole handle multiple ridiculously small buffers might need more thinking... +static int invert_initial_buffer(ResampleContext *c, AudioData *dst, const AudioData *src, + int in_count, int *out_idx, int *out_sz) +{ + int n, ch, num = FFMIN(in_count + *out_sz, c->filter_length + 1), res; + + if (c->index >= 0) + return 0; + + if ((res = swri_realloc_audio(dst, c->filter_length * 2 + 1)) < 0) + return res; + + // copy + for (n = *out_sz; n < num; n++) { + for (ch = 0; ch < src->ch_count; ch++) { + memcpy(dst->ch[ch] + ((c->filter_length + n) * c->felem_size), + src->ch[ch] + ((n - *out_sz) * c->felem_size), c->felem_size); + } + } + + // if not enough data is in, return and wait for more + if (num < c->filter_length + 1) { + *out_sz = num; + *out_idx = c->filter_length; + return INT_MAX; + } + + // else invert + for (n = 1; n <= c->filter_length; n++) { + for (ch = 0; ch < src->ch_count; ch++) { + memcpy(dst->ch[ch] + ((c->filter_length - n) * c->felem_size), + dst->ch[ch] + ((c->filter_length + n) * c->felem_size), + c->felem_size); + } + } + + res = num - *out_sz; + *out_idx = c->filter_length; + while (c->index < 0) { + --*out_idx; + c->index += c->phase_count; + } + *out_sz = FFMAX(*out_sz + c->filter_length, + 1 + c->filter_length * 2) - *out_idx; + + return FFMAX(res, 0); +} + +struct Resampler const swri_resampler={ + resample_init, + resample_free, + multiple_resample, + resample_flush, + set_compensation, + get_delay, + invert_initial_buffer, + get_out_samples, +}; diff --git a/libswresample/resample.h b/libswresample/resample.h new file mode 100644 index 0000000000..7fe9b97f7f --- /dev/null +++ b/libswresample/resample.h @@ -0,0 +1,65 @@ +/* + * audio resampling + * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWRESAMPLE_RESAMPLE_H +#define SWRESAMPLE_RESAMPLE_H + +#include "libavutil/log.h" +#include "libavutil/samplefmt.h" + +#include "swresample_internal.h" + +typedef struct ResampleContext { + const AVClass *av_class; + uint8_t *filter_bank; + int filter_length; + int filter_alloc; + int ideal_dst_incr; + int dst_incr; + int dst_incr_div; + int dst_incr_mod; + int index; + int frac; + int src_incr; + int compensation_distance; + int phase_count; + int linear; + enum SwrFilterType filter_type; + double kaiser_beta; + double factor; + enum AVSampleFormat format; + int felem_size; + int filter_shift; + int phase_count_compensation; /* desired phase_count when compensation is enabled */ + + struct { + void (*resample_one)(void *dst, const void *src, + int n, int64_t index, int64_t incr); + int (*resample)(struct ResampleContext *c, void *dst, + const void *src, int n, int update_ctx); + } dsp; +} ResampleContext; + +void swri_resample_dsp_init(ResampleContext *c); +void swri_resample_dsp_x86_init(ResampleContext *c); +void swri_resample_dsp_arm_init(ResampleContext *c); + +#endif /* SWRESAMPLE_RESAMPLE_H */ diff --git a/libswresample/resample_dsp.c b/libswresample/resample_dsp.c new file mode 100644 index 0000000000..41369f3f8a --- /dev/null +++ b/libswresample/resample_dsp.c @@ -0,0 +1,69 @@ +/* + * audio resampling + * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio resampling + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "resample.h" + +#define TEMPLATE_RESAMPLE_S16 +#include "resample_template.c" +#undef TEMPLATE_RESAMPLE_S16 + +#define TEMPLATE_RESAMPLE_S32 +#include "resample_template.c" +#undef TEMPLATE_RESAMPLE_S32 + +#define TEMPLATE_RESAMPLE_FLT +#include "resample_template.c" +#undef TEMPLATE_RESAMPLE_FLT + +#define TEMPLATE_RESAMPLE_DBL +#include "resample_template.c" +#undef TEMPLATE_RESAMPLE_DBL + +void swri_resample_dsp_init(ResampleContext *c) +{ + switch(c->format){ + case AV_SAMPLE_FMT_S16P: + c->dsp.resample_one = resample_one_int16; + c->dsp.resample = c->linear ? resample_linear_int16 : resample_common_int16; + break; + case AV_SAMPLE_FMT_S32P: + c->dsp.resample_one = resample_one_int32; + c->dsp.resample = c->linear ? resample_linear_int32 : resample_common_int32; + break; + case AV_SAMPLE_FMT_FLTP: + c->dsp.resample_one = resample_one_float; + c->dsp.resample = c->linear ? resample_linear_float : resample_common_float; + break; + case AV_SAMPLE_FMT_DBLP: + c->dsp.resample_one = resample_one_double; + c->dsp.resample = c->linear ? resample_linear_double : resample_common_double; + break; + } + + if (ARCH_X86) swri_resample_dsp_x86_init(c); + else if (ARCH_ARM) swri_resample_dsp_arm_init(c); +} diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c new file mode 100644 index 0000000000..1636f4e95d --- /dev/null +++ b/libswresample/resample_template.c @@ -0,0 +1,201 @@ +/* + * audio resampling + * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio resampling + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#if defined(TEMPLATE_RESAMPLE_DBL) + +# define RENAME(N) N ## _double +# define FILTER_SHIFT 0 +# define DELEM double +# define FELEM double +# define FELEM2 double +# define OUT(d, v) d = v + +#elif defined(TEMPLATE_RESAMPLE_FLT) + +# define RENAME(N) N ## _float +# define FILTER_SHIFT 0 +# define DELEM float +# define FELEM float +# define FELEM2 float +# define OUT(d, v) d = v + +#elif defined(TEMPLATE_RESAMPLE_S32) + +# define RENAME(N) N ## _int32 +# define FILTER_SHIFT 30 +# define DELEM int32_t +# define FELEM int32_t +# define FELEM2 int64_t +# define FELEM_MAX INT32_MAX +# define FELEM_MIN INT32_MIN +# define OUT(d, v) (v) = ((v) + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\ + (d) = av_clipl_int32(v) + +#elif defined(TEMPLATE_RESAMPLE_S16) + +# define RENAME(N) N ## _int16 +# define FILTER_SHIFT 15 +# define DELEM int16_t +# define FELEM int16_t +# define FELEM2 int32_t +# define FELEML int64_t +# define FELEM_MAX INT16_MAX +# define FELEM_MIN INT16_MIN +# define OUT(d, v) (v) = ((v) + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\ + (d) = av_clip_int16(v) + +#endif + +static void RENAME(resample_one)(void *dest, const void *source, + int dst_size, int64_t index2, int64_t incr) +{ + DELEM *dst = dest; + const DELEM *src = source; + int dst_index; + + for (dst_index = 0; dst_index < dst_size; dst_index++) { + dst[dst_index] = src[index2 >> 32]; + index2 += incr; + } +} + +static int RENAME(resample_common)(ResampleContext *c, + void *dest, const void *source, + int n, int update_ctx) +{ + DELEM *dst = dest; + const DELEM *src = source; + int dst_index; + int index= c->index; + int frac= c->frac; + int sample_index = 0; + + while (index >= c->phase_count) { + sample_index++; + index -= c->phase_count; + } + + for (dst_index = 0; dst_index < n; dst_index++) { + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; + + FELEM2 val=0; + int i; + for (i = 0; i < c->filter_length; i++) { + val += src[sample_index + i] * (FELEM2)filter[i]; + } + OUT(dst[dst_index], val); + + frac += c->dst_incr_mod; + index += c->dst_incr_div; + if (frac >= c->src_incr) { + frac -= c->src_incr; + index++; + } + + while (index >= c->phase_count) { + sample_index++; + index -= c->phase_count; + } + } + + if(update_ctx){ + c->frac= frac; + c->index= index; + } + + return sample_index; +} + +static int RENAME(resample_linear)(ResampleContext *c, + void *dest, const void *source, + int n, int update_ctx) +{ + DELEM *dst = dest; + const DELEM *src = source; + int dst_index; + int index= c->index; + int frac= c->frac; + int sample_index = 0; +#if FILTER_SHIFT == 0 + double inv_src_incr = 1.0 / c->src_incr; +#endif + + while (index >= c->phase_count) { + sample_index++; + index -= c->phase_count; + } + + for (dst_index = 0; dst_index < n; dst_index++) { + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; + FELEM2 val=0, v2 = 0; + + int i; + for (i = 0; i < c->filter_length; i++) { + val += src[sample_index + i] * (FELEM2)filter[i]; + v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc]; + } +#ifdef FELEML + val += (v2 - val) * (FELEML) frac / c->src_incr; +#else +# if FILTER_SHIFT == 0 + val += (v2 - val) * inv_src_incr * frac; +# else + val += (v2 - val) / c->src_incr * frac; +# endif +#endif + OUT(dst[dst_index], val); + + frac += c->dst_incr_mod; + index += c->dst_incr_div; + if (frac >= c->src_incr) { + frac -= c->src_incr; + index++; + } + + while (index >= c->phase_count) { + sample_index++; + index -= c->phase_count; + } + } + + if(update_ctx){ + c->frac= frac; + c->index= index; + } + + return sample_index; +} + +#undef RENAME +#undef FILTER_SHIFT +#undef DELEM +#undef FELEM +#undef FELEM2 +#undef FELEML +#undef FELEM_MAX +#undef FELEM_MIN +#undef OUT diff --git a/libswresample/soxr_resample.c b/libswresample/soxr_resample.c new file mode 100644 index 0000000000..b9c6735028 --- /dev/null +++ b/libswresample/soxr_resample.c @@ -0,0 +1,130 @@ +/* + * audio resampling with soxr + * Copyright (c) 2012 Rob Sykes <robs@users.sourceforge.net> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio resampling with soxr + */ + +#include "libavutil/log.h" +#include "swresample_internal.h" + +#include <soxr.h> + +static struct ResampleContext *create(struct ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear, + double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, double precision, int cheby, int exact_rational){ + soxr_error_t error; + + soxr_datatype_t type = + format == AV_SAMPLE_FMT_S16P? SOXR_INT16_S : + format == AV_SAMPLE_FMT_S16 ? SOXR_INT16_I : + format == AV_SAMPLE_FMT_S32P? SOXR_INT32_S : + format == AV_SAMPLE_FMT_S32 ? SOXR_INT32_I : + format == AV_SAMPLE_FMT_FLTP? SOXR_FLOAT32_S : + format == AV_SAMPLE_FMT_FLT ? SOXR_FLOAT32_I : + format == AV_SAMPLE_FMT_DBLP? SOXR_FLOAT64_S : + format == AV_SAMPLE_FMT_DBL ? SOXR_FLOAT64_I : (soxr_datatype_t)-1; + + soxr_io_spec_t io_spec = soxr_io_spec(type, type); + + soxr_quality_spec_t q_spec = soxr_quality_spec((int)((precision-2)/4), (SOXR_HI_PREC_CLOCK|SOXR_ROLLOFF_NONE)*!!cheby); + q_spec.precision = linear? 0 : precision; +#if !defined SOXR_VERSION /* Deprecated @ March 2013: */ + q_spec.bw_pc = cutoff? FFMAX(FFMIN(cutoff,.995),.8)*100 : q_spec.bw_pc; +#else + q_spec.passband_end = cutoff? FFMAX(FFMIN(cutoff,.995),.8) : q_spec.passband_end; +#endif + + soxr_delete((soxr_t)c); + c = (struct ResampleContext *) + soxr_create(in_rate, out_rate, 0, &error, &io_spec, &q_spec, 0); + if (!c) + av_log(NULL, AV_LOG_ERROR, "soxr_create: %s\n", error); + return c; +} + +static void destroy(struct ResampleContext * *c){ + soxr_delete((soxr_t)*c); + *c = NULL; +} + +static int flush(struct SwrContext *s){ + s->delayed_samples_fixup = soxr_delay((soxr_t)s->resample); + + soxr_process((soxr_t)s->resample, NULL, 0, NULL, NULL, 0, NULL); + + { + float f; + size_t idone, odone; + soxr_process((soxr_t)s->resample, &f, 0, &idone, &f, 0, &odone); + s->delayed_samples_fixup -= soxr_delay((soxr_t)s->resample); + } + + return 0; +} + +static int process( + struct ResampleContext * c, AudioData *dst, int dst_size, + AudioData *src, int src_size, int *consumed){ + size_t idone, odone; + soxr_error_t error = soxr_set_error((soxr_t)c, soxr_set_num_channels((soxr_t)c, src->ch_count)); + if (!error) + error = soxr_process((soxr_t)c, src->ch, (size_t)src_size, + &idone, dst->ch, (size_t)dst_size, &odone); + else + idone = 0; + + *consumed = (int)idone; + return error? -1 : odone; +} + +static int64_t get_delay(struct SwrContext *s, int64_t base){ + double delayed_samples = soxr_delay((soxr_t)s->resample); + double delay_s; + + if (s->flushed) + delayed_samples += s->delayed_samples_fixup; + + delay_s = delayed_samples / s->out_sample_rate; + + return (int64_t)(delay_s * base + .5); +} + +static int invert_initial_buffer(struct ResampleContext *c, AudioData *dst, const AudioData *src, + int in_count, int *out_idx, int *out_sz){ + return 0; +} + +static int64_t get_out_samples(struct SwrContext *s, int in_samples){ + double out_samples = (double)s->out_sample_rate / s->in_sample_rate * in_samples; + double delayed_samples = soxr_delay((soxr_t)s->resample); + + if (s->flushed) + delayed_samples += s->delayed_samples_fixup; + + return (int64_t)(out_samples + delayed_samples + 1 + .5); +} + +struct Resampler const swri_soxr_resampler={ + create, destroy, process, flush, NULL /* set_compensation */, get_delay, + invert_initial_buffer, get_out_samples +}; + diff --git a/libswresample/swresample.c b/libswresample/swresample.c new file mode 100644 index 0000000000..351623b9d6 --- /dev/null +++ b/libswresample/swresample.c @@ -0,0 +1,930 @@ +/* + * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/opt.h" +#include "swresample_internal.h" +#include "audioconvert.h" +#include "libavutil/avassert.h" +#include "libavutil/channel_layout.h" +#include "libavutil/internal.h" + +#include <float.h> + +#define ALIGN 32 + +#include "libavutil/ffversion.h" +const char swr_ffversion[] = "FFmpeg version " FFMPEG_VERSION; + +unsigned swresample_version(void) +{ + av_assert0(LIBSWRESAMPLE_VERSION_MICRO >= 100); + return LIBSWRESAMPLE_VERSION_INT; +} + +const char *swresample_configuration(void) +{ + return FFMPEG_CONFIGURATION; +} + +const char *swresample_license(void) +{ +#define LICENSE_PREFIX "libswresample license: " + return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1; +} + +int swr_set_channel_mapping(struct SwrContext *s, const int *channel_map){ + if(!s || s->in_convert) // s needs to be allocated but not initialized + return AVERROR(EINVAL); + s->channel_map = channel_map; + return 0; +} + +struct SwrContext *swr_alloc_set_opts(struct SwrContext *s, + int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate, + int64_t in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate, + int log_offset, void *log_ctx){ + if(!s) s= swr_alloc(); + if(!s) return NULL; + + s->log_level_offset= log_offset; + s->log_ctx= log_ctx; + + if (av_opt_set_int(s, "ocl", out_ch_layout, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "osf", out_sample_fmt, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "osr", out_sample_rate, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "icl", in_ch_layout, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "isf", in_sample_fmt, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "isr", in_sample_rate, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "tsf", AV_SAMPLE_FMT_NONE, 0) < 0) + goto fail; + + if (av_opt_set_int(s, "ich", av_get_channel_layout_nb_channels(s-> user_in_ch_layout), 0) < 0) + goto fail; + + if (av_opt_set_int(s, "och", av_get_channel_layout_nb_channels(s->user_out_ch_layout), 0) < 0) + goto fail; + + av_opt_set_int(s, "uch", 0, 0); + return s; +fail: + av_log(s, AV_LOG_ERROR, "Failed to set option\n"); + swr_free(&s); + return NULL; +} + +static void set_audiodata_fmt(AudioData *a, enum AVSampleFormat fmt){ + a->fmt = fmt; + a->bps = av_get_bytes_per_sample(fmt); + a->planar= av_sample_fmt_is_planar(fmt); + if (a->ch_count == 1) + a->planar = 1; +} + +static void free_temp(AudioData *a){ + av_free(a->data); + memset(a, 0, sizeof(*a)); +} + +static void clear_context(SwrContext *s){ + s->in_buffer_index= 0; + s->in_buffer_count= 0; + s->resample_in_constraint= 0; + memset(s->in.ch, 0, sizeof(s->in.ch)); + memset(s->out.ch, 0, sizeof(s->out.ch)); + free_temp(&s->postin); + free_temp(&s->midbuf); + free_temp(&s->preout); + free_temp(&s->in_buffer); + free_temp(&s->silence); + free_temp(&s->drop_temp); + free_temp(&s->dither.noise); + free_temp(&s->dither.temp); + swri_audio_convert_free(&s-> in_convert); + swri_audio_convert_free(&s->out_convert); + swri_audio_convert_free(&s->full_convert); + swri_rematrix_free(s); + + s->delayed_samples_fixup = 0; + s->flushed = 0; +} + +av_cold void swr_free(SwrContext **ss){ + SwrContext *s= *ss; + if(s){ + clear_context(s); + if (s->resampler) + s->resampler->free(&s->resample); + } + + av_freep(ss); +} + +av_cold void swr_close(SwrContext *s){ + clear_context(s); +} + +av_cold int swr_init(struct SwrContext *s){ + int ret; + char l1[1024], l2[1024]; + + clear_context(s); + + if(s-> in_sample_fmt >= AV_SAMPLE_FMT_NB){ + av_log(s, AV_LOG_ERROR, "Requested input sample format %d is invalid\n", s->in_sample_fmt); + return AVERROR(EINVAL); + } + if(s->out_sample_fmt >= AV_SAMPLE_FMT_NB){ + av_log(s, AV_LOG_ERROR, "Requested output sample format %d is invalid\n", s->out_sample_fmt); + return AVERROR(EINVAL); + } + + s->out.ch_count = s-> user_out_ch_count; + s-> in.ch_count = s-> user_in_ch_count; + s->used_ch_count = s->user_used_ch_count; + + s-> in_ch_layout = s-> user_in_ch_layout; + s->out_ch_layout = s->user_out_ch_layout; + + s->int_sample_fmt= s->user_int_sample_fmt; + + if(av_get_channel_layout_nb_channels(s-> in_ch_layout) > SWR_CH_MAX) { + av_log(s, AV_LOG_WARNING, "Input channel layout 0x%"PRIx64" is invalid or unsupported.\n", s-> in_ch_layout); + s->in_ch_layout = 0; + } + + if(av_get_channel_layout_nb_channels(s->out_ch_layout) > SWR_CH_MAX) { + av_log(s, AV_LOG_WARNING, "Output channel layout 0x%"PRIx64" is invalid or unsupported.\n", s->out_ch_layout); + s->out_ch_layout = 0; + } + + switch(s->engine){ +#if CONFIG_LIBSOXR + case SWR_ENGINE_SOXR: s->resampler = &swri_soxr_resampler; break; +#endif + case SWR_ENGINE_SWR : s->resampler = &swri_resampler; break; + default: + av_log(s, AV_LOG_ERROR, "Requested resampling engine is unavailable\n"); + return AVERROR(EINVAL); + } + + if(!s->used_ch_count) + s->used_ch_count= s->in.ch_count; + + if(s->used_ch_count && s-> in_ch_layout && s->used_ch_count != av_get_channel_layout_nb_channels(s-> in_ch_layout)){ + av_log(s, AV_LOG_WARNING, "Input channel layout has a different number of channels than the number of used channels, ignoring layout\n"); + s-> in_ch_layout= 0; + } + + if(!s-> in_ch_layout) + s-> in_ch_layout= av_get_default_channel_layout(s->used_ch_count); + if(!s->out_ch_layout) + s->out_ch_layout= av_get_default_channel_layout(s->out.ch_count); + + s->rematrix= s->out_ch_layout !=s->in_ch_layout || s->rematrix_volume!=1.0 || + s->rematrix_custom; + + if(s->int_sample_fmt == AV_SAMPLE_FMT_NONE){ + if( av_get_bytes_per_sample(s-> in_sample_fmt) <= 2 + && av_get_bytes_per_sample(s->out_sample_fmt) <= 2){ + s->int_sample_fmt= AV_SAMPLE_FMT_S16P; + }else if( av_get_bytes_per_sample(s-> in_sample_fmt) <= 2 + && !s->rematrix + && s->out_sample_rate==s->in_sample_rate + && !(s->flags & SWR_FLAG_RESAMPLE)){ + s->int_sample_fmt= AV_SAMPLE_FMT_S16P; + }else if( av_get_planar_sample_fmt(s-> in_sample_fmt) == AV_SAMPLE_FMT_S32P + && av_get_planar_sample_fmt(s->out_sample_fmt) == AV_SAMPLE_FMT_S32P + && !s->rematrix + && s->engine != SWR_ENGINE_SOXR){ + s->int_sample_fmt= AV_SAMPLE_FMT_S32P; + }else if(av_get_bytes_per_sample(s->in_sample_fmt) <= 4){ + s->int_sample_fmt= AV_SAMPLE_FMT_FLTP; + }else{ + s->int_sample_fmt= AV_SAMPLE_FMT_DBLP; + } + } + av_log(s, AV_LOG_DEBUG, "Using %s internally between filters\n", av_get_sample_fmt_name(s->int_sample_fmt)); + + if( s->int_sample_fmt != AV_SAMPLE_FMT_S16P + &&s->int_sample_fmt != AV_SAMPLE_FMT_S32P + &&s->int_sample_fmt != AV_SAMPLE_FMT_FLTP + &&s->int_sample_fmt != AV_SAMPLE_FMT_DBLP){ + av_log(s, AV_LOG_ERROR, "Requested sample format %s is not supported internally, S16/S32/FLT/DBL is supported\n", av_get_sample_fmt_name(s->int_sample_fmt)); + return AVERROR(EINVAL); + } + + set_audiodata_fmt(&s-> in, s-> in_sample_fmt); + set_audiodata_fmt(&s->out, s->out_sample_fmt); + + if (s->firstpts_in_samples != AV_NOPTS_VALUE) { + if (!s->async && s->min_compensation >= FLT_MAX/2) + s->async = 1; + s->firstpts = + s->outpts = s->firstpts_in_samples * s->out_sample_rate; + } else + s->firstpts = AV_NOPTS_VALUE; + + if (s->async) { + if (s->min_compensation >= FLT_MAX/2) + s->min_compensation = 0.001; + if (s->async > 1.0001) { + s->max_soft_compensation = s->async / (double) s->in_sample_rate; + } + } + + if (s->out_sample_rate!=s->in_sample_rate || (s->flags & SWR_FLAG_RESAMPLE)){ + s->resample = s->resampler->init(s->resample, s->out_sample_rate, s->in_sample_rate, s->filter_size, s->phase_shift, s->linear_interp, s->cutoff, s->int_sample_fmt, s->filter_type, s->kaiser_beta, s->precision, s->cheby, s->exact_rational); + if (!s->resample) { + av_log(s, AV_LOG_ERROR, "Failed to initialize resampler\n"); + return AVERROR(ENOMEM); + } + }else + s->resampler->free(&s->resample); + if( s->int_sample_fmt != AV_SAMPLE_FMT_S16P + && s->int_sample_fmt != AV_SAMPLE_FMT_S32P + && s->int_sample_fmt != AV_SAMPLE_FMT_FLTP + && s->int_sample_fmt != AV_SAMPLE_FMT_DBLP + && s->resample){ + av_log(s, AV_LOG_ERROR, "Resampling only supported with internal s16/s32/flt/dbl\n"); + ret = AVERROR(EINVAL); + goto fail; + } + +#define RSC 1 //FIXME finetune + if(!s-> in.ch_count) + s-> in.ch_count= av_get_channel_layout_nb_channels(s-> in_ch_layout); + if(!s->used_ch_count) + s->used_ch_count= s->in.ch_count; + if(!s->out.ch_count) + s->out.ch_count= av_get_channel_layout_nb_channels(s->out_ch_layout); + + if(!s-> in.ch_count){ + av_assert0(!s->in_ch_layout); + av_log(s, AV_LOG_ERROR, "Input channel count and layout are unset\n"); + ret = AVERROR(EINVAL); + goto fail; + } + + av_get_channel_layout_string(l1, sizeof(l1), s-> in.ch_count, s-> in_ch_layout); + av_get_channel_layout_string(l2, sizeof(l2), s->out.ch_count, s->out_ch_layout); + if (s->out_ch_layout && s->out.ch_count != av_get_channel_layout_nb_channels(s->out_ch_layout)) { + av_log(s, AV_LOG_ERROR, "Output channel layout %s mismatches specified channel count %d\n", l2, s->out.ch_count); + ret = AVERROR(EINVAL); + goto fail; + } + if (s->in_ch_layout && s->used_ch_count != av_get_channel_layout_nb_channels(s->in_ch_layout)) { + av_log(s, AV_LOG_ERROR, "Input channel layout %s mismatches specified channel count %d\n", l1, s->used_ch_count); + ret = AVERROR(EINVAL); + goto fail; + } + + if ((!s->out_ch_layout || !s->in_ch_layout) && s->used_ch_count != s->out.ch_count && !s->rematrix_custom) { + av_log(s, AV_LOG_ERROR, "Rematrix is needed between %s and %s " + "but there is not enough information to do it\n", l1, l2); + ret = AVERROR(EINVAL); + goto fail; + } + +av_assert0(s->used_ch_count); +av_assert0(s->out.ch_count); + s->resample_first= RSC*s->out.ch_count/s->in.ch_count - RSC < s->out_sample_rate/(float)s-> in_sample_rate - 1.0; + + s->in_buffer= s->in; + s->silence = s->in; + s->drop_temp= s->out; + + if(!s->resample && !s->rematrix && !s->channel_map && !s->dither.method){ + s->full_convert = swri_audio_convert_alloc(s->out_sample_fmt, + s-> in_sample_fmt, s-> in.ch_count, NULL, 0); + return 0; + } + + s->in_convert = swri_audio_convert_alloc(s->int_sample_fmt, + s-> in_sample_fmt, s->used_ch_count, s->channel_map, 0); + s->out_convert= swri_audio_convert_alloc(s->out_sample_fmt, + s->int_sample_fmt, s->out.ch_count, NULL, 0); + + if (!s->in_convert || !s->out_convert) { + ret = AVERROR(ENOMEM); + goto fail; + } + + s->postin= s->in; + s->preout= s->out; + s->midbuf= s->in; + + if(s->channel_map){ + s->postin.ch_count= + s->midbuf.ch_count= s->used_ch_count; + if(s->resample) + s->in_buffer.ch_count= s->used_ch_count; + } + if(!s->resample_first){ + s->midbuf.ch_count= s->out.ch_count; + if(s->resample) + s->in_buffer.ch_count = s->out.ch_count; + } + + set_audiodata_fmt(&s->postin, s->int_sample_fmt); + set_audiodata_fmt(&s->midbuf, s->int_sample_fmt); + set_audiodata_fmt(&s->preout, s->int_sample_fmt); + + if(s->resample){ + set_audiodata_fmt(&s->in_buffer, s->int_sample_fmt); + } + + if ((ret = swri_dither_init(s, s->out_sample_fmt, s->int_sample_fmt)) < 0) + goto fail; + + if(s->rematrix || s->dither.method) { + ret = swri_rematrix_init(s); + if (ret < 0) + goto fail; + } + + return 0; +fail: + swr_close(s); + return ret; + +} + +int swri_realloc_audio(AudioData *a, int count){ + int i, countb; + AudioData old; + + if(count < 0 || count > INT_MAX/2/a->bps/a->ch_count) + return AVERROR(EINVAL); + + if(a->count >= count) + return 0; + + count*=2; + + countb= FFALIGN(count*a->bps, ALIGN); + old= *a; + + av_assert0(a->bps); + av_assert0(a->ch_count); + + a->data= av_mallocz_array(countb, a->ch_count); + if(!a->data) + return AVERROR(ENOMEM); + for(i=0; i<a->ch_count; i++){ + a->ch[i]= a->data + i*(a->planar ? countb : a->bps); + if(a->planar) memcpy(a->ch[i], old.ch[i], a->count*a->bps); + } + if(!a->planar) memcpy(a->ch[0], old.ch[0], a->count*a->ch_count*a->bps); + av_freep(&old.data); + a->count= count; + + return 1; +} + +static void copy(AudioData *out, AudioData *in, + int count){ + av_assert0(out->planar == in->planar); + av_assert0(out->bps == in->bps); + av_assert0(out->ch_count == in->ch_count); + if(out->planar){ + int ch; + for(ch=0; ch<out->ch_count; ch++) + memcpy(out->ch[ch], in->ch[ch], count*out->bps); + }else + memcpy(out->ch[0], in->ch[0], count*out->ch_count*out->bps); +} + +static void fill_audiodata(AudioData *out, uint8_t *in_arg [SWR_CH_MAX]){ + int i; + if(!in_arg){ + memset(out->ch, 0, sizeof(out->ch)); + }else if(out->planar){ + for(i=0; i<out->ch_count; i++) + out->ch[i]= in_arg[i]; + }else{ + for(i=0; i<out->ch_count; i++) + out->ch[i]= in_arg[0] + i*out->bps; + } +} + +static void reversefill_audiodata(AudioData *out, uint8_t *in_arg [SWR_CH_MAX]){ + int i; + if(out->planar){ + for(i=0; i<out->ch_count; i++) + in_arg[i]= out->ch[i]; + }else{ + in_arg[0]= out->ch[0]; + } +} + +/** + * + * out may be equal in. + */ +static void buf_set(AudioData *out, AudioData *in, int count){ + int ch; + if(in->planar){ + for(ch=0; ch<out->ch_count; ch++) + out->ch[ch]= in->ch[ch] + count*out->bps; + }else{ + for(ch=out->ch_count-1; ch>=0; ch--) + out->ch[ch]= in->ch[0] + (ch + count*out->ch_count) * out->bps; + } +} + +/** + * + * @return number of samples output per channel + */ +static int resample(SwrContext *s, AudioData *out_param, int out_count, + const AudioData * in_param, int in_count){ + AudioData in, out, tmp; + int ret_sum=0; + int border=0; + int padless = ARCH_X86 && s->engine == SWR_ENGINE_SWR ? 7 : 0; + + av_assert1(s->in_buffer.ch_count == in_param->ch_count); + av_assert1(s->in_buffer.planar == in_param->planar); + av_assert1(s->in_buffer.fmt == in_param->fmt); + + tmp=out=*out_param; + in = *in_param; + + border = s->resampler->invert_initial_buffer(s->resample, &s->in_buffer, + &in, in_count, &s->in_buffer_index, &s->in_buffer_count); + if (border == INT_MAX) { + return 0; + } else if (border < 0) { + return border; + } else if (border) { + buf_set(&in, &in, border); + in_count -= border; + s->resample_in_constraint = 0; + } + + do{ + int ret, size, consumed; + if(!s->resample_in_constraint && s->in_buffer_count){ + buf_set(&tmp, &s->in_buffer, s->in_buffer_index); + ret= s->resampler->multiple_resample(s->resample, &out, out_count, &tmp, s->in_buffer_count, &consumed); + out_count -= ret; + ret_sum += ret; + buf_set(&out, &out, ret); + s->in_buffer_count -= consumed; + s->in_buffer_index += consumed; + + if(!in_count) + break; + if(s->in_buffer_count <= border){ + buf_set(&in, &in, -s->in_buffer_count); + in_count += s->in_buffer_count; + s->in_buffer_count=0; + s->in_buffer_index=0; + border = 0; + } + } + + if((s->flushed || in_count > padless) && !s->in_buffer_count){ + s->in_buffer_index=0; + ret= s->resampler->multiple_resample(s->resample, &out, out_count, &in, FFMAX(in_count-padless, 0), &consumed); + out_count -= ret; + ret_sum += ret; + buf_set(&out, &out, ret); + in_count -= consumed; + buf_set(&in, &in, consumed); + } + + //TODO is this check sane considering the advanced copy avoidance below + size= s->in_buffer_index + s->in_buffer_count + in_count; + if( size > s->in_buffer.count + && s->in_buffer_count + in_count <= s->in_buffer_index){ + buf_set(&tmp, &s->in_buffer, s->in_buffer_index); + copy(&s->in_buffer, &tmp, s->in_buffer_count); + s->in_buffer_index=0; + }else + if((ret=swri_realloc_audio(&s->in_buffer, size)) < 0) + return ret; + + if(in_count){ + int count= in_count; + if(s->in_buffer_count && s->in_buffer_count+2 < count && out_count) count= s->in_buffer_count+2; + + buf_set(&tmp, &s->in_buffer, s->in_buffer_index + s->in_buffer_count); + copy(&tmp, &in, /*in_*/count); + s->in_buffer_count += count; + in_count -= count; + border += count; + buf_set(&in, &in, count); + s->resample_in_constraint= 0; + if(s->in_buffer_count != count || in_count) + continue; + if (padless) { + padless = 0; + continue; + } + } + break; + }while(1); + + s->resample_in_constraint= !!out_count; + + return ret_sum; +} + +static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_count, + AudioData *in , int in_count){ + AudioData *postin, *midbuf, *preout; + int ret/*, in_max*/; + AudioData preout_tmp, midbuf_tmp; + + if(s->full_convert){ + av_assert0(!s->resample); + swri_audio_convert(s->full_convert, out, in, in_count); + return out_count; + } + +// in_max= out_count*(int64_t)s->in_sample_rate / s->out_sample_rate + resample_filter_taps; +// in_count= FFMIN(in_count, in_in + 2 - s->hist_buffer_count); + + if((ret=swri_realloc_audio(&s->postin, in_count))<0) + return ret; + if(s->resample_first){ + av_assert0(s->midbuf.ch_count == s->used_ch_count); + if((ret=swri_realloc_audio(&s->midbuf, out_count))<0) + return ret; + }else{ + av_assert0(s->midbuf.ch_count == s->out.ch_count); + if((ret=swri_realloc_audio(&s->midbuf, in_count))<0) + return ret; + } + if((ret=swri_realloc_audio(&s->preout, out_count))<0) + return ret; + + postin= &s->postin; + + midbuf_tmp= s->midbuf; + midbuf= &midbuf_tmp; + preout_tmp= s->preout; + preout= &preout_tmp; + + if(s->int_sample_fmt == s-> in_sample_fmt && s->in.planar && !s->channel_map) + postin= in; + + if(s->resample_first ? !s->resample : !s->rematrix) + midbuf= postin; + + if(s->resample_first ? !s->rematrix : !s->resample) + preout= midbuf; + + if(s->int_sample_fmt == s->out_sample_fmt && s->out.planar + && !(s->out_sample_fmt==AV_SAMPLE_FMT_S32P && (s->dither.output_sample_bits&31))){ + if(preout==in){ + out_count= FFMIN(out_count, in_count); //TODO check at the end if this is needed or redundant + av_assert0(s->in.planar); //we only support planar internally so it has to be, we support copying non planar though + copy(out, in, out_count); + return out_count; + } + else if(preout==postin) preout= midbuf= postin= out; + else if(preout==midbuf) preout= midbuf= out; + else preout= out; + } + + if(in != postin){ + swri_audio_convert(s->in_convert, postin, in, in_count); + } + + if(s->resample_first){ + if(postin != midbuf) + out_count= resample(s, midbuf, out_count, postin, in_count); + if(midbuf != preout) + swri_rematrix(s, preout, midbuf, out_count, preout==out); + }else{ + if(postin != midbuf) + swri_rematrix(s, midbuf, postin, in_count, midbuf==out); + if(midbuf != preout) + out_count= resample(s, preout, out_count, midbuf, in_count); + } + + if(preout != out && out_count){ + AudioData *conv_src = preout; + if(s->dither.method){ + int ch; + int dither_count= FFMAX(out_count, 1<<16); + + if (preout == in) { + conv_src = &s->dither.temp; + if((ret=swri_realloc_audio(&s->dither.temp, dither_count))<0) + return ret; + } + + if((ret=swri_realloc_audio(&s->dither.noise, dither_count))<0) + return ret; + if(ret) + for(ch=0; ch<s->dither.noise.ch_count; ch++) + if((ret=swri_get_dither(s, s->dither.noise.ch[ch], s->dither.noise.count, (12345678913579ULL*ch + 3141592) % 2718281828U, s->dither.noise.fmt))<0) + return ret; + av_assert0(s->dither.noise.ch_count == preout->ch_count); + + if(s->dither.noise_pos + out_count > s->dither.noise.count) + s->dither.noise_pos = 0; + + if (s->dither.method < SWR_DITHER_NS){ + if (s->mix_2_1_simd) { + int len1= out_count&~15; + int off = len1 * preout->bps; + + if(len1) + for(ch=0; ch<preout->ch_count; ch++) + s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1); + if(out_count != len1) + for(ch=0; ch<preout->ch_count; ch++) + s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off + len1, s->native_one, 0, 0, out_count - len1); + } else { + for(ch=0; ch<preout->ch_count; ch++) + s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count); + } + } else { + switch(s->int_sample_fmt) { + case AV_SAMPLE_FMT_S16P :swri_noise_shaping_int16(s, conv_src, preout, &s->dither.noise, out_count); break; + case AV_SAMPLE_FMT_S32P :swri_noise_shaping_int32(s, conv_src, preout, &s->dither.noise, out_count); break; + case AV_SAMPLE_FMT_FLTP :swri_noise_shaping_float(s, conv_src, preout, &s->dither.noise, out_count); break; + case AV_SAMPLE_FMT_DBLP :swri_noise_shaping_double(s,conv_src, preout, &s->dither.noise, out_count); break; + } + } + s->dither.noise_pos += out_count; + } +//FIXME packed doesn't need more than 1 chan here! + swri_audio_convert(s->out_convert, out, conv_src, out_count); + } + return out_count; +} + +int swr_is_initialized(struct SwrContext *s) { + return !!s->in_buffer.ch_count; +} + +int attribute_align_arg swr_convert(struct SwrContext *s, uint8_t *out_arg[SWR_CH_MAX], int out_count, + const uint8_t *in_arg [SWR_CH_MAX], int in_count){ + AudioData * in= &s->in; + AudioData *out= &s->out; + int av_unused max_output; + + if (!swr_is_initialized(s)) { + av_log(s, AV_LOG_ERROR, "Context has not been initialized\n"); + return AVERROR(EINVAL); + } +#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >1 + max_output = swr_get_out_samples(s, in_count); +#endif + + while(s->drop_output > 0){ + int ret; + uint8_t *tmp_arg[SWR_CH_MAX]; +#define MAX_DROP_STEP 16384 + if((ret=swri_realloc_audio(&s->drop_temp, FFMIN(s->drop_output, MAX_DROP_STEP)))<0) + return ret; + + reversefill_audiodata(&s->drop_temp, tmp_arg); + s->drop_output *= -1; //FIXME find a less hackish solution + ret = swr_convert(s, tmp_arg, FFMIN(-s->drop_output, MAX_DROP_STEP), in_arg, in_count); //FIXME optimize but this is as good as never called so maybe it doesn't matter + s->drop_output *= -1; + in_count = 0; + if(ret>0) { + s->drop_output -= ret; + if (!s->drop_output && !out_arg) + return 0; + continue; + } + + av_assert0(s->drop_output); + return 0; + } + + if(!in_arg){ + if(s->resample){ + if (!s->flushed) + s->resampler->flush(s); + s->resample_in_constraint = 0; + s->flushed = 1; + }else if(!s->in_buffer_count){ + return 0; + } + }else + fill_audiodata(in , (void*)in_arg); + + fill_audiodata(out, out_arg); + + if(s->resample){ + int ret = swr_convert_internal(s, out, out_count, in, in_count); + if(ret>0 && !s->drop_output) + s->outpts += ret * (int64_t)s->in_sample_rate; + + av_assert2(max_output < 0 || ret < 0 || ret <= max_output); + + return ret; + }else{ + AudioData tmp= *in; + int ret2=0; + int ret, size; + size = FFMIN(out_count, s->in_buffer_count); + if(size){ + buf_set(&tmp, &s->in_buffer, s->in_buffer_index); + ret= swr_convert_internal(s, out, size, &tmp, size); + if(ret<0) + return ret; + ret2= ret; + s->in_buffer_count -= ret; + s->in_buffer_index += ret; + buf_set(out, out, ret); + out_count -= ret; + if(!s->in_buffer_count) + s->in_buffer_index = 0; + } + + if(in_count){ + size= s->in_buffer_index + s->in_buffer_count + in_count - out_count; + + if(in_count > out_count) { //FIXME move after swr_convert_internal + if( size > s->in_buffer.count + && s->in_buffer_count + in_count - out_count <= s->in_buffer_index){ + buf_set(&tmp, &s->in_buffer, s->in_buffer_index); + copy(&s->in_buffer, &tmp, s->in_buffer_count); + s->in_buffer_index=0; + }else + if((ret=swri_realloc_audio(&s->in_buffer, size)) < 0) + return ret; + } + + if(out_count){ + size = FFMIN(in_count, out_count); + ret= swr_convert_internal(s, out, size, in, size); + if(ret<0) + return ret; + buf_set(in, in, ret); + in_count -= ret; + ret2 += ret; + } + if(in_count){ + buf_set(&tmp, &s->in_buffer, s->in_buffer_index + s->in_buffer_count); + copy(&tmp, in, in_count); + s->in_buffer_count += in_count; + } + } + if(ret2>0 && !s->drop_output) + s->outpts += ret2 * (int64_t)s->in_sample_rate; + av_assert2(max_output < 0 || ret2 < 0 || ret2 <= max_output); + return ret2; + } +} + +int swr_drop_output(struct SwrContext *s, int count){ + const uint8_t *tmp_arg[SWR_CH_MAX]; + s->drop_output += count; + + if(s->drop_output <= 0) + return 0; + + av_log(s, AV_LOG_VERBOSE, "discarding %d audio samples\n", count); + return swr_convert(s, NULL, s->drop_output, tmp_arg, 0); +} + +int swr_inject_silence(struct SwrContext *s, int count){ + int ret, i; + uint8_t *tmp_arg[SWR_CH_MAX]; + + if(count <= 0) + return 0; + +#define MAX_SILENCE_STEP 16384 + while (count > MAX_SILENCE_STEP) { + if ((ret = swr_inject_silence(s, MAX_SILENCE_STEP)) < 0) + return ret; + count -= MAX_SILENCE_STEP; + } + + if((ret=swri_realloc_audio(&s->silence, count))<0) + return ret; + + if(s->silence.planar) for(i=0; i<s->silence.ch_count; i++) { + memset(s->silence.ch[i], s->silence.bps==1 ? 0x80 : 0, count*s->silence.bps); + } else + memset(s->silence.ch[0], s->silence.bps==1 ? 0x80 : 0, count*s->silence.bps*s->silence.ch_count); + + reversefill_audiodata(&s->silence, tmp_arg); + av_log(s, AV_LOG_VERBOSE, "adding %d audio samples of silence\n", count); + ret = swr_convert(s, NULL, 0, (const uint8_t**)tmp_arg, count); + return ret; +} + +int64_t swr_get_delay(struct SwrContext *s, int64_t base){ + if (s->resampler && s->resample){ + return s->resampler->get_delay(s, base); + }else{ + return (s->in_buffer_count*base + (s->in_sample_rate>>1))/ s->in_sample_rate; + } +} + +int swr_get_out_samples(struct SwrContext *s, int in_samples) +{ + int64_t out_samples; + + if (in_samples < 0) + return AVERROR(EINVAL); + + if (s->resampler && s->resample) { + if (!s->resampler->get_out_samples) + return AVERROR(ENOSYS); + out_samples = s->resampler->get_out_samples(s, in_samples); + } else { + out_samples = s->in_buffer_count + in_samples; + av_assert0(s->out_sample_rate == s->in_sample_rate); + } + + if (out_samples > INT_MAX) + return AVERROR(EINVAL); + + return out_samples; +} + +int swr_set_compensation(struct SwrContext *s, int sample_delta, int compensation_distance){ + int ret; + + if (!s || compensation_distance < 0) + return AVERROR(EINVAL); + if (!compensation_distance && sample_delta) + return AVERROR(EINVAL); + if (!s->resample) { + s->flags |= SWR_FLAG_RESAMPLE; + ret = swr_init(s); + if (ret < 0) + return ret; + } + if (!s->resampler->set_compensation){ + return AVERROR(EINVAL); + }else{ + return s->resampler->set_compensation(s->resample, sample_delta, compensation_distance); + } +} + +int64_t swr_next_pts(struct SwrContext *s, int64_t pts){ + if(pts == INT64_MIN) + return s->outpts; + + if (s->firstpts == AV_NOPTS_VALUE) + s->outpts = s->firstpts = pts; + + if(s->min_compensation >= FLT_MAX) { + return (s->outpts = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate)); + } else { + int64_t delta = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate) - s->outpts + s->drop_output*(int64_t)s->in_sample_rate; + double fdelta = delta /(double)(s->in_sample_rate * (int64_t)s->out_sample_rate); + + if(fabs(fdelta) > s->min_compensation) { + if(s->outpts == s->firstpts || fabs(fdelta) > s->min_hard_compensation){ + int ret; + if(delta > 0) ret = swr_inject_silence(s, delta / s->out_sample_rate); + else ret = swr_drop_output (s, -delta / s-> in_sample_rate); + if(ret<0){ + av_log(s, AV_LOG_ERROR, "Failed to compensate for timestamp delta of %f\n", fdelta); + } + } else if(s->soft_compensation_duration && s->max_soft_compensation) { + int duration = s->out_sample_rate * s->soft_compensation_duration; + double max_soft_compensation = s->max_soft_compensation / (s->max_soft_compensation < 0 ? -s->in_sample_rate : 1); + int comp = av_clipf(fdelta, -max_soft_compensation, max_soft_compensation) * duration ; + av_log(s, AV_LOG_VERBOSE, "compensating audio timestamp drift:%f compensation:%d in:%d\n", fdelta, comp, duration); + swr_set_compensation(s, comp, duration); + } + } + + return s->outpts; + } +} diff --git a/libswresample/swresample.h b/libswresample/swresample.h new file mode 100644 index 0000000000..10eaebc439 --- /dev/null +++ b/libswresample/swresample.h @@ -0,0 +1,553 @@ +/* + * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWRESAMPLE_SWRESAMPLE_H +#define SWRESAMPLE_SWRESAMPLE_H + +/** + * @file + * @ingroup lswr + * libswresample public header + */ + +/** + * @defgroup lswr Libswresample + * @{ + * + * Libswresample (lswr) is a library that handles audio resampling, sample + * format conversion and mixing. + * + * Interaction with lswr is done through SwrContext, which is + * allocated with swr_alloc() or swr_alloc_set_opts(). It is opaque, so all parameters + * must be set with the @ref avoptions API. + * + * The first thing you will need to do in order to use lswr is to allocate + * SwrContext. This can be done with swr_alloc() or swr_alloc_set_opts(). If you + * are using the former, you must set options through the @ref avoptions API. + * The latter function provides the same feature, but it allows you to set some + * common options in the same statement. + * + * For example the following code will setup conversion from planar float sample + * format to interleaved signed 16-bit integer, downsampling from 48kHz to + * 44.1kHz and downmixing from 5.1 channels to stereo (using the default mixing + * matrix). This is using the swr_alloc() function. + * @code + * SwrContext *swr = swr_alloc(); + * av_opt_set_channel_layout(swr, "in_channel_layout", AV_CH_LAYOUT_5POINT1, 0); + * av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0); + * av_opt_set_int(swr, "in_sample_rate", 48000, 0); + * av_opt_set_int(swr, "out_sample_rate", 44100, 0); + * av_opt_set_sample_fmt(swr, "in_sample_fmt", AV_SAMPLE_FMT_FLTP, 0); + * av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16, 0); + * @endcode + * + * The same job can be done using swr_alloc_set_opts() as well: + * @code + * SwrContext *swr = swr_alloc_set_opts(NULL, // we're allocating a new context + * AV_CH_LAYOUT_STEREO, // out_ch_layout + * AV_SAMPLE_FMT_S16, // out_sample_fmt + * 44100, // out_sample_rate + * AV_CH_LAYOUT_5POINT1, // in_ch_layout + * AV_SAMPLE_FMT_FLTP, // in_sample_fmt + * 48000, // in_sample_rate + * 0, // log_offset + * NULL); // log_ctx + * @endcode + * + * Once all values have been set, it must be initialized with swr_init(). If + * you need to change the conversion parameters, you can change the parameters + * using @ref AVOptions, as described above in the first example; or by using + * swr_alloc_set_opts(), but with the first argument the allocated context. + * You must then call swr_init() again. + * + * The conversion itself is done by repeatedly calling swr_convert(). + * Note that the samples may get buffered in swr if you provide insufficient + * output space or if sample rate conversion is done, which requires "future" + * samples. Samples that do not require future input can be retrieved at any + * time by using swr_convert() (in_count can be set to 0). + * At the end of conversion the resampling buffer can be flushed by calling + * swr_convert() with NULL in and 0 in_count. + * + * The samples used in the conversion process can be managed with the libavutil + * @ref lavu_sampmanip "samples manipulation" API, including av_samples_alloc() + * function used in the following example. + * + * The delay between input and output, can at any time be found by using + * swr_get_delay(). + * + * The following code demonstrates the conversion loop assuming the parameters + * from above and caller-defined functions get_input() and handle_output(): + * @code + * uint8_t **input; + * int in_samples; + * + * while (get_input(&input, &in_samples)) { + * uint8_t *output; + * int out_samples = av_rescale_rnd(swr_get_delay(swr, 48000) + + * in_samples, 44100, 48000, AV_ROUND_UP); + * av_samples_alloc(&output, NULL, 2, out_samples, + * AV_SAMPLE_FMT_S16, 0); + * out_samples = swr_convert(swr, &output, out_samples, + * input, in_samples); + * handle_output(output, out_samples); + * av_freep(&output); + * } + * @endcode + * + * When the conversion is finished, the conversion + * context and everything associated with it must be freed with swr_free(). + * A swr_close() function is also available, but it exists mainly for + * compatibility with libavresample, and is not required to be called. + * + * There will be no memory leak if the data is not completely flushed before + * swr_free(). + */ + +#include <stdint.h> +#include "libavutil/frame.h" +#include "libavutil/samplefmt.h" + +#include "libswresample/version.h" + +#if LIBSWRESAMPLE_VERSION_MAJOR < 1 +#define SWR_CH_MAX 32 ///< Maximum number of channels +#endif + +/** + * @name Option constants + * These constants are used for the @ref avoptions interface for lswr. + * @{ + * + */ + +#define SWR_FLAG_RESAMPLE 1 ///< Force resampling even if equal sample rate +//TODO use int resample ? +//long term TODO can we enable this dynamically? + +/** Dithering algorithms */ +enum SwrDitherType { + SWR_DITHER_NONE = 0, + SWR_DITHER_RECTANGULAR, + SWR_DITHER_TRIANGULAR, + SWR_DITHER_TRIANGULAR_HIGHPASS, + + SWR_DITHER_NS = 64, ///< not part of API/ABI + SWR_DITHER_NS_LIPSHITZ, + SWR_DITHER_NS_F_WEIGHTED, + SWR_DITHER_NS_MODIFIED_E_WEIGHTED, + SWR_DITHER_NS_IMPROVED_E_WEIGHTED, + SWR_DITHER_NS_SHIBATA, + SWR_DITHER_NS_LOW_SHIBATA, + SWR_DITHER_NS_HIGH_SHIBATA, + SWR_DITHER_NB, ///< not part of API/ABI +}; + +/** Resampling Engines */ +enum SwrEngine { + SWR_ENGINE_SWR, /**< SW Resampler */ + SWR_ENGINE_SOXR, /**< SoX Resampler */ + SWR_ENGINE_NB, ///< not part of API/ABI +}; + +/** Resampling Filter Types */ +enum SwrFilterType { + SWR_FILTER_TYPE_CUBIC, /**< Cubic */ + SWR_FILTER_TYPE_BLACKMAN_NUTTALL, /**< Blackman Nuttall windowed sinc */ + SWR_FILTER_TYPE_KAISER, /**< Kaiser windowed sinc */ +}; + +/** + * @} + */ + +/** + * The libswresample context. Unlike libavcodec and libavformat, this structure + * is opaque. This means that if you would like to set options, you must use + * the @ref avoptions API and cannot directly set values to members of the + * structure. + */ +typedef struct SwrContext SwrContext; + +/** + * Get the AVClass for SwrContext. It can be used in combination with + * AV_OPT_SEARCH_FAKE_OBJ for examining options. + * + * @see av_opt_find(). + * @return the AVClass of SwrContext + */ +const AVClass *swr_get_class(void); + +/** + * @name SwrContext constructor functions + * @{ + */ + +/** + * Allocate SwrContext. + * + * If you use this function you will need to set the parameters (manually or + * with swr_alloc_set_opts()) before calling swr_init(). + * + * @see swr_alloc_set_opts(), swr_init(), swr_free() + * @return NULL on error, allocated context otherwise + */ +struct SwrContext *swr_alloc(void); + +/** + * Initialize context after user parameters have been set. + * @note The context must be configured using the AVOption API. + * + * @see av_opt_set_int() + * @see av_opt_set_dict() + * + * @param[in,out] s Swr context to initialize + * @return AVERROR error code in case of failure. + */ +int swr_init(struct SwrContext *s); + +/** + * Check whether an swr context has been initialized or not. + * + * @param[in] s Swr context to check + * @see swr_init() + * @return positive if it has been initialized, 0 if not initialized + */ +int swr_is_initialized(struct SwrContext *s); + +/** + * Allocate SwrContext if needed and set/reset common parameters. + * + * This function does not require s to be allocated with swr_alloc(). On the + * other hand, swr_alloc() can use swr_alloc_set_opts() to set the parameters + * on the allocated context. + * + * @param s existing Swr context if available, or NULL if not + * @param out_ch_layout output channel layout (AV_CH_LAYOUT_*) + * @param out_sample_fmt output sample format (AV_SAMPLE_FMT_*). + * @param out_sample_rate output sample rate (frequency in Hz) + * @param in_ch_layout input channel layout (AV_CH_LAYOUT_*) + * @param in_sample_fmt input sample format (AV_SAMPLE_FMT_*). + * @param in_sample_rate input sample rate (frequency in Hz) + * @param log_offset logging level offset + * @param log_ctx parent logging context, can be NULL + * + * @see swr_init(), swr_free() + * @return NULL on error, allocated context otherwise + */ +struct SwrContext *swr_alloc_set_opts(struct SwrContext *s, + int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate, + int64_t in_ch_layout, enum AVSampleFormat in_sample_fmt, int in_sample_rate, + int log_offset, void *log_ctx); + +/** + * @} + * + * @name SwrContext destructor functions + * @{ + */ + +/** + * Free the given SwrContext and set the pointer to NULL. + * + * @param[in] s a pointer to a pointer to Swr context + */ +void swr_free(struct SwrContext **s); + +/** + * Closes the context so that swr_is_initialized() returns 0. + * + * The context can be brought back to life by running swr_init(), + * swr_init() can also be used without swr_close(). + * This function is mainly provided for simplifying the usecase + * where one tries to support libavresample and libswresample. + * + * @param[in,out] s Swr context to be closed + */ +void swr_close(struct SwrContext *s); + +/** + * @} + * + * @name Core conversion functions + * @{ + */ + +/** Convert audio. + * + * in and in_count can be set to 0 to flush the last few samples out at the + * end. + * + * If more input is provided than output space, then the input will be buffered. + * You can avoid this buffering by using swr_get_out_samples() to retrieve an + * upper bound on the required number of output samples for the given number of + * input samples. Conversion will run directly without copying whenever possible. + * + * @param s allocated Swr context, with parameters set + * @param out output buffers, only the first one need be set in case of packed audio + * @param out_count amount of space available for output in samples per channel + * @param in input buffers, only the first one need to be set in case of packed audio + * @param in_count number of input samples available in one channel + * + * @return number of samples output per channel, negative value on error + */ +int swr_convert(struct SwrContext *s, uint8_t **out, int out_count, + const uint8_t **in , int in_count); + +/** + * Convert the next timestamp from input to output + * timestamps are in 1/(in_sample_rate * out_sample_rate) units. + * + * @note There are 2 slightly differently behaving modes. + * @li When automatic timestamp compensation is not used, (min_compensation >= FLT_MAX) + * in this case timestamps will be passed through with delays compensated + * @li When automatic timestamp compensation is used, (min_compensation < FLT_MAX) + * in this case the output timestamps will match output sample numbers. + * See ffmpeg-resampler(1) for the two modes of compensation. + * + * @param s[in] initialized Swr context + * @param pts[in] timestamp for the next input sample, INT64_MIN if unknown + * @see swr_set_compensation(), swr_drop_output(), and swr_inject_silence() are + * function used internally for timestamp compensation. + * @return the output timestamp for the next output sample + */ +int64_t swr_next_pts(struct SwrContext *s, int64_t pts); + +/** + * @} + * + * @name Low-level option setting functions + * These functons provide a means to set low-level options that is not possible + * with the AVOption API. + * @{ + */ + +/** + * Activate resampling compensation ("soft" compensation). This function is + * internally called when needed in swr_next_pts(). + * + * @param[in,out] s allocated Swr context. If it is not initialized, + * or SWR_FLAG_RESAMPLE is not set, swr_init() is + * called with the flag set. + * @param[in] sample_delta delta in PTS per sample + * @param[in] compensation_distance number of samples to compensate for + * @return >= 0 on success, AVERROR error codes if: + * @li @c s is NULL, + * @li @c compensation_distance is less than 0, + * @li @c compensation_distance is 0 but sample_delta is not, + * @li compensation unsupported by resampler, or + * @li swr_init() fails when called. + */ +int swr_set_compensation(struct SwrContext *s, int sample_delta, int compensation_distance); + +/** + * Set a customized input channel mapping. + * + * @param[in,out] s allocated Swr context, not yet initialized + * @param[in] channel_map customized input channel mapping (array of channel + * indexes, -1 for a muted channel) + * @return >= 0 on success, or AVERROR error code in case of failure. + */ +int swr_set_channel_mapping(struct SwrContext *s, const int *channel_map); + +/** + * Set a customized remix matrix. + * + * @param s allocated Swr context, not yet initialized + * @param matrix remix coefficients; matrix[i + stride * o] is + * the weight of input channel i in output channel o + * @param stride offset between lines of the matrix + * @return >= 0 on success, or AVERROR error code in case of failure. + */ +int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride); + +/** + * @} + * + * @name Sample handling functions + * @{ + */ + +/** + * Drops the specified number of output samples. + * + * This function, along with swr_inject_silence(), is called by swr_next_pts() + * if needed for "hard" compensation. + * + * @param s allocated Swr context + * @param count number of samples to be dropped + * + * @return >= 0 on success, or a negative AVERROR code on failure + */ +int swr_drop_output(struct SwrContext *s, int count); + +/** + * Injects the specified number of silence samples. + * + * This function, along with swr_drop_output(), is called by swr_next_pts() + * if needed for "hard" compensation. + * + * @param s allocated Swr context + * @param count number of samples to be dropped + * + * @return >= 0 on success, or a negative AVERROR code on failure + */ +int swr_inject_silence(struct SwrContext *s, int count); + +/** + * Gets the delay the next input sample will experience relative to the next output sample. + * + * Swresample can buffer data if more input has been provided than available + * output space, also converting between sample rates needs a delay. + * This function returns the sum of all such delays. + * The exact delay is not necessarily an integer value in either input or + * output sample rate. Especially when downsampling by a large value, the + * output sample rate may be a poor choice to represent the delay, similarly + * for upsampling and the input sample rate. + * + * @param s swr context + * @param base timebase in which the returned delay will be: + * @li if it's set to 1 the returned delay is in seconds + * @li if it's set to 1000 the returned delay is in milliseconds + * @li if it's set to the input sample rate then the returned + * delay is in input samples + * @li if it's set to the output sample rate then the returned + * delay is in output samples + * @li if it's the least common multiple of in_sample_rate and + * out_sample_rate then an exact rounding-free delay will be + * returned + * @returns the delay in 1 / @c base units. + */ +int64_t swr_get_delay(struct SwrContext *s, int64_t base); + +/** + * Find an upper bound on the number of samples that the next swr_convert + * call will output, if called with in_samples of input samples. This + * depends on the internal state, and anything changing the internal state + * (like further swr_convert() calls) will may change the number of samples + * swr_get_out_samples() returns for the same number of input samples. + * + * @param in_samples number of input samples. + * @note any call to swr_inject_silence(), swr_convert(), swr_next_pts() + * or swr_set_compensation() invalidates this limit + * @note it is recommended to pass the correct available buffer size + * to all functions like swr_convert() even if swr_get_out_samples() + * indicates that less would be used. + * @returns an upper bound on the number of samples that the next swr_convert + * will output or a negative value to indicate an error + */ +int swr_get_out_samples(struct SwrContext *s, int in_samples); + +/** + * @} + * + * @name Configuration accessors + * @{ + */ + +/** + * Return the @ref LIBSWRESAMPLE_VERSION_INT constant. + * + * This is useful to check if the build-time libswresample has the same version + * as the run-time one. + * + * @returns the unsigned int-typed version + */ +unsigned swresample_version(void); + +/** + * Return the swr build-time configuration. + * + * @returns the build-time @c ./configure flags + */ +const char *swresample_configuration(void); + +/** + * Return the swr license. + * + * @returns the license of libswresample, determined at build-time + */ +const char *swresample_license(void); + +/** + * @} + * + * @name AVFrame based API + * @{ + */ + +/** + * Convert the samples in the input AVFrame and write them to the output AVFrame. + * + * Input and output AVFrames must have channel_layout, sample_rate and format set. + * + * If the output AVFrame does not have the data pointers allocated the nb_samples + * field will be set using av_frame_get_buffer() + * is called to allocate the frame. + * + * The output AVFrame can be NULL or have fewer allocated samples than required. + * In this case, any remaining samples not written to the output will be added + * to an internal FIFO buffer, to be returned at the next call to this function + * or to swr_convert(). + * + * If converting sample rate, there may be data remaining in the internal + * resampling delay buffer. swr_get_delay() tells the number of + * remaining samples. To get this data as output, call this function or + * swr_convert() with NULL input. + * + * If the SwrContext configuration does not match the output and + * input AVFrame settings the conversion does not take place and depending on + * which AVFrame is not matching AVERROR_OUTPUT_CHANGED, AVERROR_INPUT_CHANGED + * or the result of a bitwise-OR of them is returned. + * + * @see swr_delay() + * @see swr_convert() + * @see swr_get_delay() + * + * @param swr audio resample context + * @param output output AVFrame + * @param input input AVFrame + * @return 0 on success, AVERROR on failure or nonmatching + * configuration. + */ +int swr_convert_frame(SwrContext *swr, + AVFrame *output, const AVFrame *input); + +/** + * Configure or reconfigure the SwrContext using the information + * provided by the AVFrames. + * + * The original resampling context is reset even on failure. + * The function calls swr_close() internally if the context is open. + * + * @see swr_close(); + * + * @param swr audio resample context + * @param output output AVFrame + * @param input input AVFrame + * @return 0 on success, AVERROR on failure. + */ +int swr_config_frame(SwrContext *swr, const AVFrame *out, const AVFrame *in); + +/** + * @} + * @} + */ + +#endif /* SWRESAMPLE_SWRESAMPLE_H */ diff --git a/libswresample/swresample_frame.c b/libswresample/swresample_frame.c new file mode 100644 index 0000000000..71d3ed711a --- /dev/null +++ b/libswresample/swresample_frame.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2014 Luca Barbato <lu_zero@gentoo.org> + * Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "swresample_internal.h" +#include "libavutil/frame.h" +#include "libavutil/opt.h" + +int swr_config_frame(SwrContext *s, const AVFrame *out, const AVFrame *in) +{ + swr_close(s); + + if (in) { + if (av_opt_set_int(s, "icl", in->channel_layout, 0) < 0) + goto fail; + if (av_opt_set_int(s, "isf", in->format, 0) < 0) + goto fail; + if (av_opt_set_int(s, "isr", in->sample_rate, 0) < 0) + goto fail; + } + + if (out) { + if (av_opt_set_int(s, "ocl", out->channel_layout, 0) < 0) + goto fail; + if (av_opt_set_int(s, "osf", out->format, 0) < 0) + goto fail; + if (av_opt_set_int(s, "osr", out->sample_rate, 0) < 0) + goto fail; + } + + return 0; +fail: + av_log(s, AV_LOG_ERROR, "Failed to set option\n"); + return AVERROR(EINVAL); +} + +static int config_changed(SwrContext *s, + const AVFrame *out, const AVFrame *in) +{ + int ret = 0; + + if (in) { + if (s->in_ch_layout != in->channel_layout || + s->in_sample_rate != in->sample_rate || + s->in_sample_fmt != in->format) { + ret |= AVERROR_INPUT_CHANGED; + } + } + + if (out) { + if (s->out_ch_layout != out->channel_layout || + s->out_sample_rate != out->sample_rate || + s->out_sample_fmt != out->format) { + ret |= AVERROR_OUTPUT_CHANGED; + } + } + + return ret; +} + +static inline int convert_frame(SwrContext *s, + AVFrame *out, const AVFrame *in) +{ + int ret; + uint8_t **out_data = NULL; + const uint8_t **in_data = NULL; + int out_nb_samples = 0, in_nb_samples = 0; + + if (out) { + out_data = out->extended_data; + out_nb_samples = out->nb_samples; + } + + if (in) { + in_data = (const uint8_t **)in->extended_data; + in_nb_samples = in->nb_samples; + } + + ret = swr_convert(s, out_data, out_nb_samples, in_data, in_nb_samples); + + if (ret < 0) { + if (out) + out->nb_samples = 0; + return ret; + } + + if (out) + out->nb_samples = ret; + + return 0; +} + +static inline int available_samples(AVFrame *out) +{ + int bytes_per_sample = av_get_bytes_per_sample(out->format); + int samples = out->linesize[0] / bytes_per_sample; + + if (av_sample_fmt_is_planar(out->format)) { + return samples; + } else { + int channels = av_get_channel_layout_nb_channels(out->channel_layout); + return samples / channels; + } +} + +int swr_convert_frame(SwrContext *s, + AVFrame *out, const AVFrame *in) +{ + int ret, setup = 0; + + if (!swr_is_initialized(s)) { + if ((ret = swr_config_frame(s, out, in)) < 0) + return ret; + if ((ret = swr_init(s)) < 0) + return ret; + setup = 1; + } else { + // return as is or reconfigure for input changes? + if ((ret = config_changed(s, out, in))) + return ret; + } + + if (out) { + if (!out->linesize[0]) { + out->nb_samples = swr_get_delay(s, s->out_sample_rate) + + in->nb_samples*(int64_t)s->out_sample_rate / s->in_sample_rate + + 3; + if ((ret = av_frame_get_buffer(out, 0)) < 0) { + if (setup) + swr_close(s); + return ret; + } + } else { + if (!out->nb_samples) + out->nb_samples = available_samples(out); + } + } + + return convert_frame(s, out, in); +} + diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h new file mode 100644 index 0000000000..3828b722cc --- /dev/null +++ b/libswresample/swresample_internal.h @@ -0,0 +1,220 @@ +/* + * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWRESAMPLE_SWRESAMPLE_INTERNAL_H +#define SWRESAMPLE_SWRESAMPLE_INTERNAL_H + +#include "swresample.h" +#include "libavutil/channel_layout.h" +#include "config.h" + +#define SWR_CH_MAX 64 + +#define SQRT3_2 1.22474487139158904909 /* sqrt(3/2) */ + +#define NS_TAPS 20 + +#if ARCH_X86_64 +typedef int64_t integer; +#else +typedef int integer; +#endif + +typedef void (mix_1_1_func_type)(void *out, const void *in, void *coeffp, integer index, integer len); +typedef void (mix_2_1_func_type)(void *out, const void *in1, const void *in2, void *coeffp, integer index1, integer index2, integer len); + +typedef void (mix_any_func_type)(uint8_t **out, const uint8_t **in1, void *coeffp, integer len); + +typedef struct AudioData{ + uint8_t *ch[SWR_CH_MAX]; ///< samples buffer per channel + uint8_t *data; ///< samples buffer + int ch_count; ///< number of channels + int bps; ///< bytes per sample + int count; ///< number of samples + int planar; ///< 1 if planar audio, 0 otherwise + enum AVSampleFormat fmt; ///< sample format +} AudioData; + +struct DitherContext { + int method; + int noise_pos; + float scale; + float noise_scale; ///< Noise scale + int ns_taps; ///< Noise shaping dither taps + float ns_scale; ///< Noise shaping dither scale + float ns_scale_1; ///< Noise shaping dither scale^-1 + int ns_pos; ///< Noise shaping dither position + float ns_coeffs[NS_TAPS]; ///< Noise shaping filter coefficients + float ns_errors[SWR_CH_MAX][2*NS_TAPS]; + AudioData noise; ///< noise used for dithering + AudioData temp; ///< temporary storage when writing into the input buffer isn't possible + int output_sample_bits; ///< the number of used output bits, needed to scale dither correctly +}; + +typedef struct ResampleContext * (* resample_init_func)(struct ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear, + double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, double precision, int cheby, int exact_rational); +typedef void (* resample_free_func)(struct ResampleContext **c); +typedef int (* multiple_resample_func)(struct ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed); +typedef int (* resample_flush_func)(struct SwrContext *c); +typedef int (* set_compensation_func)(struct ResampleContext *c, int sample_delta, int compensation_distance); +typedef int64_t (* get_delay_func)(struct SwrContext *s, int64_t base); +typedef int (* invert_initial_buffer_func)(struct ResampleContext *c, AudioData *dst, const AudioData *src, int src_size, int *dst_idx, int *dst_count); +typedef int64_t (* get_out_samples_func)(struct SwrContext *s, int in_samples); + +struct Resampler { + resample_init_func init; + resample_free_func free; + multiple_resample_func multiple_resample; + resample_flush_func flush; + set_compensation_func set_compensation; + get_delay_func get_delay; + invert_initial_buffer_func invert_initial_buffer; + get_out_samples_func get_out_samples; +}; + +extern struct Resampler const swri_resampler; +extern struct Resampler const swri_soxr_resampler; + +struct SwrContext { + const AVClass *av_class; ///< AVClass used for AVOption and av_log() + int log_level_offset; ///< logging level offset + void *log_ctx; ///< parent logging context + enum AVSampleFormat in_sample_fmt; ///< input sample format + enum AVSampleFormat int_sample_fmt; ///< internal sample format (AV_SAMPLE_FMT_FLTP or AV_SAMPLE_FMT_S16P) + enum AVSampleFormat out_sample_fmt; ///< output sample format + int64_t in_ch_layout; ///< input channel layout + int64_t out_ch_layout; ///< output channel layout + int in_sample_rate; ///< input sample rate + int out_sample_rate; ///< output sample rate + int flags; ///< miscellaneous flags such as SWR_FLAG_RESAMPLE + float slev; ///< surround mixing level + float clev; ///< center mixing level + float lfe_mix_level; ///< LFE mixing level + float rematrix_volume; ///< rematrixing volume coefficient + float rematrix_maxval; ///< maximum value for rematrixing output + int matrix_encoding; /**< matrixed stereo encoding */ + const int *channel_map; ///< channel index (or -1 if muted channel) map + int used_ch_count; ///< number of used input channels (mapped channel count if channel_map, otherwise in.ch_count) + int engine; + + int user_in_ch_count; ///< User set input channel count + int user_out_ch_count; ///< User set output channel count + int user_used_ch_count; ///< User set used channel count + int64_t user_in_ch_layout; ///< User set input channel layout + int64_t user_out_ch_layout; ///< User set output channel layout + enum AVSampleFormat user_int_sample_fmt; ///< User set internal sample format + + struct DitherContext dither; + + int filter_size; /**< length of each FIR filter in the resampling filterbank relative to the cutoff frequency */ + int phase_shift; /**< log2 of the number of entries in the resampling polyphase filterbank */ + int linear_interp; /**< if 1 then the resampling FIR filter will be linearly interpolated */ + int exact_rational; /**< if 1 then enable non power of 2 phase_count */ + double cutoff; /**< resampling cutoff frequency (swr: 6dB point; soxr: 0dB point). 1.0 corresponds to half the output sample rate */ + int filter_type; /**< swr resampling filter type */ + double kaiser_beta; /**< swr beta value for Kaiser window (only applicable if filter_type == AV_FILTER_TYPE_KAISER) */ + double precision; /**< soxr resampling precision (in bits) */ + int cheby; /**< soxr: if 1 then passband rolloff will be none (Chebyshev) & irrational ratio approximation precision will be higher */ + + float min_compensation; ///< swr minimum below which no compensation will happen + float min_hard_compensation; ///< swr minimum below which no silence inject / sample drop will happen + float soft_compensation_duration; ///< swr duration over which soft compensation is applied + float max_soft_compensation; ///< swr maximum soft compensation in seconds over soft_compensation_duration + float async; ///< swr simple 1 parameter async, similar to ffmpegs -async + int64_t firstpts_in_samples; ///< swr first pts in samples + + int resample_first; ///< 1 if resampling must come first, 0 if rematrixing + int rematrix; ///< flag to indicate if rematrixing is needed (basically if input and output layouts mismatch) + int rematrix_custom; ///< flag to indicate that a custom matrix has been defined + + AudioData in; ///< input audio data + AudioData postin; ///< post-input audio data: used for rematrix/resample + AudioData midbuf; ///< intermediate audio data (postin/preout) + AudioData preout; ///< pre-output audio data: used for rematrix/resample + AudioData out; ///< converted output audio data + AudioData in_buffer; ///< cached audio data (convert and resample purpose) + AudioData silence; ///< temporary with silence + AudioData drop_temp; ///< temporary used to discard output + int in_buffer_index; ///< cached buffer position + int in_buffer_count; ///< cached buffer length + int resample_in_constraint; ///< 1 if the input end was reach before the output end, 0 otherwise + int flushed; ///< 1 if data is to be flushed and no further input is expected + int64_t outpts; ///< output PTS + int64_t firstpts; ///< first PTS + int drop_output; ///< number of output samples to drop + double delayed_samples_fixup; ///< soxr 0.1.1: needed to fixup delayed_samples after flush has been called. + + struct AudioConvert *in_convert; ///< input conversion context + struct AudioConvert *out_convert; ///< output conversion context + struct AudioConvert *full_convert; ///< full conversion context (single conversion for input and output) + struct ResampleContext *resample; ///< resampling context + struct Resampler const *resampler; ///< resampler virtual function table + + float matrix[SWR_CH_MAX][SWR_CH_MAX]; ///< floating point rematrixing coefficients + uint8_t *native_matrix; + uint8_t *native_one; + uint8_t *native_simd_one; + uint8_t *native_simd_matrix; + int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX]; ///< 17.15 fixed point rematrixing coefficients + uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1]; ///< Lists of input channels per output channel that have non zero rematrixing coefficients + mix_1_1_func_type *mix_1_1_f; + mix_1_1_func_type *mix_1_1_simd; + + mix_2_1_func_type *mix_2_1_f; + mix_2_1_func_type *mix_2_1_simd; + + mix_any_func_type *mix_any_f; + + /* TODO: callbacks for ASM optimizations */ +}; + +av_warn_unused_result +int swri_realloc_audio(AudioData *a, int count); + +void swri_noise_shaping_int16 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count); +void swri_noise_shaping_int32 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count); +void swri_noise_shaping_float (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count); +void swri_noise_shaping_double(SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count); + +av_warn_unused_result +int swri_rematrix_init(SwrContext *s); +void swri_rematrix_free(SwrContext *s); +int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy); +int swri_rematrix_init_x86(struct SwrContext *s); + +av_warn_unused_result +int swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat noise_fmt); +av_warn_unused_result +int swri_dither_init(SwrContext *s, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt); + +void swri_audio_convert_init_aarch64(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels); +void swri_audio_convert_init_arm(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels); +void swri_audio_convert_init_x86(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels); + +#endif diff --git a/libswresample/swresampleres.rc b/libswresample/swresampleres.rc new file mode 100644 index 0000000000..1320f78b9a --- /dev/null +++ b/libswresample/swresampleres.rc @@ -0,0 +1,55 @@ +/* + * Windows resource file for libswresample + * + * Copyright (C) 2012 James Almer + * Copyright (C) 2013 Tiancheng "Timothy" Gu + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <windows.h> +#include "libswresample/version.h" +#include "libavutil/ffversion.h" +#include "config.h" + +1 VERSIONINFO +FILEVERSION LIBSWRESAMPLE_VERSION_MAJOR, LIBSWRESAMPLE_VERSION_MINOR, LIBSWRESAMPLE_VERSION_MICRO, 0 +PRODUCTVERSION LIBSWRESAMPLE_VERSION_MAJOR, LIBSWRESAMPLE_VERSION_MINOR, LIBSWRESAMPLE_VERSION_MICRO, 0 +FILEFLAGSMASK VS_FFI_FILEFLAGSMASK +FILEOS VOS_NT_WINDOWS32 +FILETYPE VFT_DLL +{ + BLOCK "StringFileInfo" + { + BLOCK "040904B0" + { + VALUE "CompanyName", "FFmpeg Project" + VALUE "FileDescription", "FFmpeg audio resampling library" + VALUE "FileVersion", AV_STRINGIFY(LIBSWRESAMPLE_VERSION) + VALUE "InternalName", "libswresample" + VALUE "LegalCopyright", "Copyright (C) 2000-" AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project" + VALUE "OriginalFilename", "swresample" BUILDSUF "-" AV_STRINGIFY(LIBSWRESAMPLE_VERSION_MAJOR) SLIBSUF + VALUE "ProductName", "FFmpeg" + VALUE "ProductVersion", FFMPEG_VERSION + } + } + + BLOCK "VarFileInfo" + { + VALUE "Translation", 0x0409, 0x04B0 + } +} diff --git a/libswresample/tests/.gitignore b/libswresample/tests/.gitignore new file mode 100644 index 0000000000..2dc986bd0e --- /dev/null +++ b/libswresample/tests/.gitignore @@ -0,0 +1 @@ +/swresample diff --git a/libswresample/tests/swresample.c b/libswresample/tests/swresample.c new file mode 100644 index 0000000000..53896585e2 --- /dev/null +++ b/libswresample/tests/swresample.c @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at) + * Copyright (c) 2002 Fabrice Bellard + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/avassert.h" +#include "libavutil/channel_layout.h" +#include "libavutil/common.h" +#include "libavutil/opt.h" + +#include "libswresample/swresample.h" + +#undef time +#include <time.h> +#undef fprintf + +#define SAMPLES 1000 + +#define SWR_CH_MAX 32 + +#define ASSERT_LEVEL 2 + +static double get(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f){ + const uint8_t *p; + if(av_sample_fmt_is_planar(f)){ + f= av_get_alt_sample_fmt(f, 0); + p= a[ch]; + }else{ + p= a[0]; + index= ch + index*ch_count; + } + + switch(f){ + case AV_SAMPLE_FMT_U8 : return ((const uint8_t*)p)[index]/127.0-1.0; + case AV_SAMPLE_FMT_S16: return ((const int16_t*)p)[index]/32767.0; + case AV_SAMPLE_FMT_S32: return ((const int32_t*)p)[index]/2147483647.0; + case AV_SAMPLE_FMT_FLT: return ((const float *)p)[index]; + case AV_SAMPLE_FMT_DBL: return ((const double *)p)[index]; + default: av_assert0(0); + } +} + +static void set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v){ + uint8_t *p; + if(av_sample_fmt_is_planar(f)){ + f= av_get_alt_sample_fmt(f, 0); + p= a[ch]; + }else{ + p= a[0]; + index= ch + index*ch_count; + } + switch(f){ + case AV_SAMPLE_FMT_U8 : ((uint8_t*)p)[index]= av_clip_uint8 (lrint((v+1.0)*127)); break; + case AV_SAMPLE_FMT_S16: ((int16_t*)p)[index]= av_clip_int16 (lrint(v*32767)); break; + case AV_SAMPLE_FMT_S32: ((int32_t*)p)[index]= av_clipl_int32(llrint(v*2147483647)); break; + case AV_SAMPLE_FMT_FLT: ((float *)p)[index]= v; break; + case AV_SAMPLE_FMT_DBL: ((double *)p)[index]= v; break; + default: av_assert2(0); + } +} + +static void shift(uint8_t *a[], int index, int ch_count, enum AVSampleFormat f){ + int ch; + + if(av_sample_fmt_is_planar(f)){ + f= av_get_alt_sample_fmt(f, 0); + for(ch= 0; ch<ch_count; ch++) + a[ch] += index*av_get_bytes_per_sample(f); + }else{ + a[0] += index*ch_count*av_get_bytes_per_sample(f); + } +} + +static const enum AVSampleFormat formats[] = { + AV_SAMPLE_FMT_S16, + AV_SAMPLE_FMT_FLTP, + AV_SAMPLE_FMT_S16P, + AV_SAMPLE_FMT_FLT, + AV_SAMPLE_FMT_S32P, + AV_SAMPLE_FMT_S32, + AV_SAMPLE_FMT_U8P, + AV_SAMPLE_FMT_U8, + AV_SAMPLE_FMT_DBLP, + AV_SAMPLE_FMT_DBL, +}; + +static const int rates[] = { + 8000, + 11025, + 16000, + 22050, + 32000, + 48000, +}; + +static const uint64_t layouts[]={ + AV_CH_LAYOUT_MONO , + AV_CH_LAYOUT_STEREO , + AV_CH_LAYOUT_2_1 , + AV_CH_LAYOUT_SURROUND , + AV_CH_LAYOUT_4POINT0 , + AV_CH_LAYOUT_2_2 , + AV_CH_LAYOUT_QUAD , + AV_CH_LAYOUT_5POINT0 , + AV_CH_LAYOUT_5POINT1 , + AV_CH_LAYOUT_5POINT0_BACK , + AV_CH_LAYOUT_5POINT1_BACK , + AV_CH_LAYOUT_7POINT0 , + AV_CH_LAYOUT_7POINT1 , + AV_CH_LAYOUT_7POINT1_WIDE , +}; + +static void setup_array(uint8_t *out[SWR_CH_MAX], uint8_t *in, enum AVSampleFormat format, int samples){ + if(av_sample_fmt_is_planar(format)){ + int i; + int plane_size= av_get_bytes_per_sample(format&0xFF)*samples; + format&=0xFF; + for(i=0; i<SWR_CH_MAX; i++){ + out[i]= in + i*plane_size; + } + }else{ + out[0]= in; + } +} + +static int cmp(const void *a, const void *b){ + return *(const int *)a - *(const int *)b; +} + +static void audiogen(void *data, enum AVSampleFormat sample_fmt, + int channels, int sample_rate, int nb_samples) +{ + int i, ch, k; + double v, f, a, ampa; + double tabf1[SWR_CH_MAX]; + double tabf2[SWR_CH_MAX]; + double taba[SWR_CH_MAX]; + unsigned static rnd; + +#define PUT_SAMPLE set(data, ch, k, channels, sample_fmt, v); +#define uint_rand(x) ((x) = (x) * 1664525 + 1013904223) +#define dbl_rand(x) (uint_rand(x)*2.0 / (double)UINT_MAX - 1) + k = 0; + + /* 1 second of single freq sinus at 1000 Hz */ + a = 0; + for (i = 0; i < 1 * sample_rate && k < nb_samples; i++, k++) { + v = sin(a) * 0.30; + for (ch = 0; ch < channels; ch++) + PUT_SAMPLE + a += M_PI * 1000.0 * 2.0 / sample_rate; + } + + /* 1 second of varying frequency between 100 and 10000 Hz */ + a = 0; + for (i = 0; i < 1 * sample_rate && k < nb_samples; i++, k++) { + v = sin(a) * 0.30; + for (ch = 0; ch < channels; ch++) + PUT_SAMPLE + f = 100.0 + (((10000.0 - 100.0) * i) / sample_rate); + a += M_PI * f * 2.0 / sample_rate; + } + + /* 0.5 second of low amplitude white noise */ + for (i = 0; i < sample_rate / 2 && k < nb_samples; i++, k++) { + v = dbl_rand(rnd) * 0.30; + for (ch = 0; ch < channels; ch++) + PUT_SAMPLE + } + + /* 0.5 second of high amplitude white noise */ + for (i = 0; i < sample_rate / 2 && k < nb_samples; i++, k++) { + v = dbl_rand(rnd); + for (ch = 0; ch < channels; ch++) + PUT_SAMPLE + } + + /* 1 second of unrelated ramps for each channel */ + for (ch = 0; ch < channels; ch++) { + taba[ch] = 0; + tabf1[ch] = 100 + uint_rand(rnd) % 5000; + tabf2[ch] = 100 + uint_rand(rnd) % 5000; + } + for (i = 0; i < 1 * sample_rate && k < nb_samples; i++, k++) { + for (ch = 0; ch < channels; ch++) { + v = sin(taba[ch]) * 0.30; + PUT_SAMPLE + f = tabf1[ch] + (((tabf2[ch] - tabf1[ch]) * i) / sample_rate); + taba[ch] += M_PI * f * 2.0 / sample_rate; + } + } + + /* 2 seconds of 500 Hz with varying volume */ + a = 0; + ampa = 0; + for (i = 0; i < 2 * sample_rate && k < nb_samples; i++, k++) { + for (ch = 0; ch < channels; ch++) { + double amp = (1.0 + sin(ampa)) * 0.15; + if (ch & 1) + amp = 0.30 - amp; + v = sin(a) * amp; + PUT_SAMPLE + a += M_PI * 500.0 * 2.0 / sample_rate; + ampa += M_PI * 2.0 / sample_rate; + } + } +} + +int main(int argc, char **argv){ + int in_sample_rate, out_sample_rate, ch ,i, flush_count; + uint64_t in_ch_layout, out_ch_layout; + enum AVSampleFormat in_sample_fmt, out_sample_fmt; + uint8_t array_in[SAMPLES*8*8]; + uint8_t array_mid[SAMPLES*8*8*3]; + uint8_t array_out[SAMPLES*8*8+100]; + uint8_t *ain[SWR_CH_MAX]; + uint8_t *aout[SWR_CH_MAX]; + uint8_t *amid[SWR_CH_MAX]; + int flush_i=0; + int mode; + int num_tests = 10000; + uint32_t seed = 0; + uint32_t rand_seed = 0; + int remaining_tests[FF_ARRAY_ELEMS(rates) * FF_ARRAY_ELEMS(layouts) * FF_ARRAY_ELEMS(formats) * FF_ARRAY_ELEMS(layouts) * FF_ARRAY_ELEMS(formats)]; + int max_tests = FF_ARRAY_ELEMS(remaining_tests); + int test; + int specific_test= -1; + + struct SwrContext * forw_ctx= NULL; + struct SwrContext *backw_ctx= NULL; + + if (argc > 1) { + if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { + av_log(NULL, AV_LOG_INFO, "Usage: swresample-test [<num_tests>[ <test>]] \n" + "num_tests Default is %d\n", num_tests); + return 0; + } + num_tests = strtol(argv[1], NULL, 0); + if(num_tests < 0) { + num_tests = -num_tests; + rand_seed = time(0); + } + if(num_tests<= 0 || num_tests>max_tests) + num_tests = max_tests; + if(argc > 2) { + specific_test = strtol(argv[1], NULL, 0); + } + } + + for(i=0; i<max_tests; i++) + remaining_tests[i] = i; + + for(test=0; test<num_tests; test++){ + unsigned r; + uint_rand(seed); + r = (seed * (uint64_t)(max_tests - test)) >>32; + FFSWAP(int, remaining_tests[r], remaining_tests[max_tests - test - 1]); + } + qsort(remaining_tests + max_tests - num_tests, num_tests, sizeof(remaining_tests[0]), cmp); + in_sample_rate=16000; + for(test=0; test<num_tests; test++){ + char in_layout_string[256]; + char out_layout_string[256]; + unsigned vector= remaining_tests[max_tests - test - 1]; + int in_ch_count; + int out_count, mid_count, out_ch_count; + + in_ch_layout = layouts[vector % FF_ARRAY_ELEMS(layouts)]; vector /= FF_ARRAY_ELEMS(layouts); + out_ch_layout = layouts[vector % FF_ARRAY_ELEMS(layouts)]; vector /= FF_ARRAY_ELEMS(layouts); + in_sample_fmt = formats[vector % FF_ARRAY_ELEMS(formats)]; vector /= FF_ARRAY_ELEMS(formats); + out_sample_fmt = formats[vector % FF_ARRAY_ELEMS(formats)]; vector /= FF_ARRAY_ELEMS(formats); + out_sample_rate = rates [vector % FF_ARRAY_ELEMS(rates )]; vector /= FF_ARRAY_ELEMS(rates); + av_assert0(!vector); + + if(specific_test == 0){ + if(out_sample_rate != in_sample_rate || in_ch_layout != out_ch_layout) + continue; + } + + in_ch_count= av_get_channel_layout_nb_channels(in_ch_layout); + out_ch_count= av_get_channel_layout_nb_channels(out_ch_layout); + av_get_channel_layout_string( in_layout_string, sizeof( in_layout_string), in_ch_count, in_ch_layout); + av_get_channel_layout_string(out_layout_string, sizeof(out_layout_string), out_ch_count, out_ch_layout); + fprintf(stderr, "TEST: %s->%s, rate:%5d->%5d, fmt:%s->%s\n", + in_layout_string, out_layout_string, + in_sample_rate, out_sample_rate, + av_get_sample_fmt_name(in_sample_fmt), av_get_sample_fmt_name(out_sample_fmt)); + forw_ctx = swr_alloc_set_opts(forw_ctx, out_ch_layout, out_sample_fmt, out_sample_rate, + in_ch_layout, in_sample_fmt, in_sample_rate, + 0, 0); + backw_ctx = swr_alloc_set_opts(backw_ctx, in_ch_layout, in_sample_fmt, in_sample_rate, + out_ch_layout, out_sample_fmt, out_sample_rate, + 0, 0); + if(!forw_ctx) { + fprintf(stderr, "Failed to init forw_cts\n"); + return 1; + } + if(!backw_ctx) { + fprintf(stderr, "Failed to init backw_ctx\n"); + return 1; + } + if (uint_rand(rand_seed) % 3 == 0) + av_opt_set_int(forw_ctx, "ich", 0, 0); + if (uint_rand(rand_seed) % 3 == 0) + av_opt_set_int(forw_ctx, "och", 0, 0); + + if(swr_init( forw_ctx) < 0) + fprintf(stderr, "swr_init(->) failed\n"); + if(swr_init(backw_ctx) < 0) + fprintf(stderr, "swr_init(<-) failed\n"); + //FIXME test planar + setup_array(ain , array_in , in_sample_fmt, SAMPLES); + setup_array(amid, array_mid, out_sample_fmt, 3*SAMPLES); + setup_array(aout, array_out, in_sample_fmt , SAMPLES); +#if 0 + for(ch=0; ch<in_ch_count; ch++){ + for(i=0; i<SAMPLES; i++) + set(ain, ch, i, in_ch_count, in_sample_fmt, sin(i*i*3/SAMPLES)); + } +#else + audiogen(ain, in_sample_fmt, in_ch_count, SAMPLES/6+1, SAMPLES); +#endif + mode = uint_rand(rand_seed) % 3; + if(mode==0 /*|| out_sample_rate == in_sample_rate*/) { + mid_count= swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain, SAMPLES); + } else if(mode==1){ + mid_count= swr_convert(forw_ctx, amid, 0, (const uint8_t **)ain, SAMPLES); + mid_count+=swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain, 0); + } else { + int tmp_count; + mid_count= swr_convert(forw_ctx, amid, 0, (const uint8_t **)ain, 1); + av_assert0(mid_count==0); + shift(ain, 1, in_ch_count, in_sample_fmt); + mid_count+=swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain, 0); + shift(amid, mid_count, out_ch_count, out_sample_fmt); tmp_count = mid_count; + mid_count+=swr_convert(forw_ctx, amid, 2, (const uint8_t **)ain, 2); + shift(amid, mid_count-tmp_count, out_ch_count, out_sample_fmt); tmp_count = mid_count; + shift(ain, 2, in_ch_count, in_sample_fmt); + mid_count+=swr_convert(forw_ctx, amid, 1, (const uint8_t **)ain, SAMPLES-3); + shift(amid, mid_count-tmp_count, out_ch_count, out_sample_fmt); tmp_count = mid_count; + shift(ain, -3, in_ch_count, in_sample_fmt); + mid_count+=swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain, 0); + shift(amid, -tmp_count, out_ch_count, out_sample_fmt); + } + out_count= swr_convert(backw_ctx,aout, SAMPLES, (const uint8_t **)amid, mid_count); + + for(ch=0; ch<in_ch_count; ch++){ + double sse, maxdiff=0; + double sum_a= 0; + double sum_b= 0; + double sum_aa= 0; + double sum_bb= 0; + double sum_ab= 0; + for(i=0; i<out_count; i++){ + double a= get(ain , ch, i, in_ch_count, in_sample_fmt); + double b= get(aout, ch, i, in_ch_count, in_sample_fmt); + sum_a += a; + sum_b += b; + sum_aa+= a*a; + sum_bb+= b*b; + sum_ab+= a*b; + maxdiff= FFMAX(maxdiff, fabs(a-b)); + } + sse= sum_aa + sum_bb - 2*sum_ab; + if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error + + fprintf(stderr, "[e:%f c:%f max:%f] len:%5d\n", out_count ? sqrt(sse/out_count) : 0, sum_ab/(sqrt(sum_aa*sum_bb)), maxdiff, out_count); + } + + flush_i++; + flush_i%=21; + flush_count = swr_convert(backw_ctx,aout, flush_i, 0, 0); + shift(aout, flush_i, in_ch_count, in_sample_fmt); + flush_count+= swr_convert(backw_ctx,aout, SAMPLES-flush_i, 0, 0); + shift(aout, -flush_i, in_ch_count, in_sample_fmt); + if(flush_count){ + for(ch=0; ch<in_ch_count; ch++){ + double sse, maxdiff=0; + double sum_a= 0; + double sum_b= 0; + double sum_aa= 0; + double sum_bb= 0; + double sum_ab= 0; + for(i=0; i<flush_count; i++){ + double a= get(ain , ch, i+out_count, in_ch_count, in_sample_fmt); + double b= get(aout, ch, i, in_ch_count, in_sample_fmt); + sum_a += a; + sum_b += b; + sum_aa+= a*a; + sum_bb+= b*b; + sum_ab+= a*b; + maxdiff= FFMAX(maxdiff, fabs(a-b)); + } + sse= sum_aa + sum_bb - 2*sum_ab; + if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error + + fprintf(stderr, "[e:%f c:%f max:%f] len:%5d F:%3d\n", sqrt(sse/flush_count), sum_ab/(sqrt(sum_aa*sum_bb)), maxdiff, flush_count, flush_i); + } + } + + + fprintf(stderr, "\n"); + } + + return 0; +} diff --git a/libswresample/version.h b/libswresample/version.h new file mode 100644 index 0000000000..b8e32c0e41 --- /dev/null +++ b/libswresample/version.h @@ -0,0 +1,45 @@ +/* + * Version macros. + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef SWRESAMPLE_VERSION_H +#define SWRESAMPLE_VERSION_H + +/** + * @file + * Libswresample version macros + */ + +#include "libavutil/avutil.h" + +#define LIBSWRESAMPLE_VERSION_MAJOR 2 +#define LIBSWRESAMPLE_VERSION_MINOR 1 +#define LIBSWRESAMPLE_VERSION_MICRO 100 + +#define LIBSWRESAMPLE_VERSION_INT AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \ + LIBSWRESAMPLE_VERSION_MINOR, \ + LIBSWRESAMPLE_VERSION_MICRO) +#define LIBSWRESAMPLE_VERSION AV_VERSION(LIBSWRESAMPLE_VERSION_MAJOR, \ + LIBSWRESAMPLE_VERSION_MINOR, \ + LIBSWRESAMPLE_VERSION_MICRO) +#define LIBSWRESAMPLE_BUILD LIBSWRESAMPLE_VERSION_INT + +#define LIBSWRESAMPLE_IDENT "SwR" AV_STRINGIFY(LIBSWRESAMPLE_VERSION) + +#endif /* SWRESAMPLE_VERSION_H */ diff --git a/libswresample/x86/Makefile b/libswresample/x86/Makefile new file mode 100644 index 0000000000..be44df56aa --- /dev/null +++ b/libswresample/x86/Makefile @@ -0,0 +1,9 @@ +YASM-OBJS += x86/audio_convert.o\ + x86/rematrix.o\ + x86/resample.o\ + +OBJS += x86/audio_convert_init.o\ + x86/rematrix_init.o\ + x86/resample_init.o\ + +OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm new file mode 100644 index 0000000000..d441636d3c --- /dev/null +++ b/libswresample/x86/audio_convert.asm @@ -0,0 +1,739 @@ +;****************************************************************************** +;* Copyright (c) 2012 Michael Niedermayer +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +SECTION_RODATA 32 +flt2pm31: times 8 dd 4.6566129e-10 +flt2p31 : times 8 dd 2147483648.0 +flt2p15 : times 8 dd 32768.0 + +word_unpack_shuf : db 0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15 + +SECTION .text + + +;to, from, a/u, log2_outsize, log_intsize, const +%macro PACK_2CH 5-7 +cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2 + mov src2q , [srcq+gprsize] + mov srcq , [srcq] + mov dstq , [dstq] +%ifidn %3, a + test dstq, mmsize-1 + jne pack_2ch_%2_to_%1_u_int %+ SUFFIX + test srcq, mmsize-1 + jne pack_2ch_%2_to_%1_u_int %+ SUFFIX + test src2q, mmsize-1 + jne pack_2ch_%2_to_%1_u_int %+ SUFFIX +%else +pack_2ch_%2_to_%1_u_int %+ SUFFIX: +%endif + lea srcq , [srcq + (1<<%5)*lenq] + lea src2q, [src2q + (1<<%5)*lenq] + lea dstq , [dstq + (2<<%4)*lenq] + neg lenq + %7 m0,m1,m2,m3,m4,m5 +.next: +%if %4 >= %5 + mov%3 m0, [ srcq +(1<<%5)*lenq] + mova m1, m0 + mov%3 m2, [ src2q+(1<<%5)*lenq] +%if %5 == 1 + punpcklwd m0, m2 + punpckhwd m1, m2 +%else + punpckldq m0, m2 + punpckhdq m1, m2 +%endif + %6 m0,m1,m2,m3,m4,m5 +%else + mov%3 m0, [ srcq +(1<<%5)*lenq] + mov%3 m1, [mmsize + srcq +(1<<%5)*lenq] + mov%3 m2, [ src2q+(1<<%5)*lenq] + mov%3 m3, [mmsize + src2q+(1<<%5)*lenq] + %6 m0,m1,m2,m3,m4,m5 + mova m2, m0 + punpcklwd m0, m1 + punpckhwd m2, m1 + SWAP 1,2 +%endif + mov%3 [ dstq+(2<<%4)*lenq], m0 + mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1 +%if %4 > %5 + mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2 + mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3 + add lenq, 4*mmsize/(2<<%4) +%else + add lenq, 2*mmsize/(2<<%4) +%endif + jl .next + REP_RET +%endmacro + +%macro UNPACK_2CH 5-7 +cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2 + mov dst2q , [dstq+gprsize] + mov srcq , [srcq] + mov dstq , [dstq] +%ifidn %3, a + test dstq, mmsize-1 + jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX + test srcq, mmsize-1 + jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX + test dst2q, mmsize-1 + jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX +%else +unpack_2ch_%2_to_%1_u_int %+ SUFFIX: +%endif + lea srcq , [srcq + (2<<%5)*lenq] + lea dstq , [dstq + (1<<%4)*lenq] + lea dst2q, [dst2q + (1<<%4)*lenq] + neg lenq + %7 m0,m1,m2,m3,m4,m5 + mova m6, [word_unpack_shuf] +.next: + mov%3 m0, [ srcq +(2<<%5)*lenq] + mov%3 m2, [ mmsize + srcq +(2<<%5)*lenq] +%if %5 == 1 +%ifidn SUFFIX, _ssse3 + pshufb m0, m6 + mova m1, m0 + pshufb m2, m6 + punpcklqdq m0,m2 + punpckhqdq m1,m2 +%else + mova m1, m0 + punpcklwd m0,m2 + punpckhwd m1,m2 + + mova m2, m0 + punpcklwd m0,m1 + punpckhwd m2,m1 + + mova m1, m0 + punpcklwd m0,m2 + punpckhwd m1,m2 +%endif +%else + mova m1, m0 + shufps m0, m2, 10001000b + shufps m1, m2, 11011101b +%endif +%if %4 < %5 + mov%3 m2, [2*mmsize + srcq +(2<<%5)*lenq] + mova m3, m2 + mov%3 m4, [3*mmsize + srcq +(2<<%5)*lenq] + shufps m2, m4, 10001000b + shufps m3, m4, 11011101b + SWAP 1,2 +%endif + %6 m0,m1,m2,m3,m4,m5 + mov%3 [ dstq+(1<<%4)*lenq], m0 +%if %4 > %5 + mov%3 [ dst2q+(1<<%4)*lenq], m2 + mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 + mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3 + add lenq, 2*mmsize/(1<<%4) +%else + mov%3 [ dst2q+(1<<%4)*lenq], m1 + add lenq, mmsize/(1<<%4) +%endif + jl .next + REP_RET +%endmacro + +%macro CONV 5-7 +cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len + mov srcq , [srcq] + mov dstq , [dstq] +%ifidn %3, a + test dstq, mmsize-1 + jne %2_to_%1_u_int %+ SUFFIX + test srcq, mmsize-1 + jne %2_to_%1_u_int %+ SUFFIX +%else +%2_to_%1_u_int %+ SUFFIX: +%endif + lea srcq , [srcq + (1<<%5)*lenq] + lea dstq , [dstq + (1<<%4)*lenq] + neg lenq + %7 m0,m1,m2,m3,m4,m5 +.next: + mov%3 m0, [ srcq +(1<<%5)*lenq] + mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq] +%if %4 < %5 + mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq] + mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq] +%endif + %6 m0,m1,m2,m3,m4,m5 + mov%3 [ dstq+(1<<%4)*lenq], m0 + mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1 +%if %4 > %5 + mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2 + mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3 + add lenq, 4*mmsize/(1<<%4) +%else + add lenq, 2*mmsize/(1<<%4) +%endif + jl .next +%if mmsize == 8 + emms + RET +%else + REP_RET +%endif +%endmacro + +%macro PACK_6CH 8 +cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len +%if ARCH_X86_64 + mov lend, r2d +%else + %define lend dword r2m +%endif + mov src1q, [srcq+1*gprsize] + mov src2q, [srcq+2*gprsize] + mov src3q, [srcq+3*gprsize] + mov src4q, [srcq+4*gprsize] + mov src5q, [srcq+5*gprsize] + mov srcq, [srcq] + mov dstq, [dstq] +%ifidn %3, a + test dstq, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX + test srcq, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX + test src1q, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX + test src2q, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX + test src3q, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX + test src4q, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX + test src5q, mmsize-1 + jne pack_6ch_%2_to_%1_u_int %+ SUFFIX +%else +pack_6ch_%2_to_%1_u_int %+ SUFFIX: +%endif + sub src1q, srcq + sub src2q, srcq + sub src3q, srcq + sub src4q, srcq + sub src5q, srcq + %8 x,x,x,x,m7,x +.loop: + mov%3 m0, [srcq ] + mov%3 m1, [srcq+src1q] + mov%3 m2, [srcq+src2q] + mov%3 m3, [srcq+src3q] + mov%3 m4, [srcq+src4q] + mov%3 m5, [srcq+src5q] +%if cpuflag(sse) + SBUTTERFLYPS 0, 1, 6 + SBUTTERFLYPS 2, 3, 6 + SBUTTERFLYPS 4, 5, 6 + +%if cpuflag(avx) + blendps m6, m4, m0, 1100b +%else + movaps m6, m4 + shufps m4, m0, q3210 + SWAP 4,6 +%endif + movlhps m0, m2 + movhlps m4, m2 +%if cpuflag(avx) + blendps m2, m5, m1, 1100b +%else + movaps m2, m5 + shufps m5, m1, q3210 + SWAP 2,5 +%endif + movlhps m1, m3 + movhlps m5, m3 + + %7 m0,m6,x,x,m7,m3 + %7 m4,m1,x,x,m7,m3 + %7 m2,m5,x,x,m7,m3 + + mov %+ %3 %+ ps [dstq ], m0 + mov %+ %3 %+ ps [dstq+16], m6 + mov %+ %3 %+ ps [dstq+32], m4 + mov %+ %3 %+ ps [dstq+48], m1 + mov %+ %3 %+ ps [dstq+64], m2 + mov %+ %3 %+ ps [dstq+80], m5 +%else ; mmx + SBUTTERFLY dq, 0, 1, 6 + SBUTTERFLY dq, 2, 3, 6 + SBUTTERFLY dq, 4, 5, 6 + + movq [dstq ], m0 + movq [dstq+ 8], m2 + movq [dstq+16], m4 + movq [dstq+24], m1 + movq [dstq+32], m3 + movq [dstq+40], m5 +%endif + add srcq, mmsize + add dstq, mmsize*6 + sub lend, mmsize/4 + jg .loop +%if mmsize == 8 + emms + RET +%else + REP_RET +%endif +%endmacro + +%macro UNPACK_6CH 8 +cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len +%if ARCH_X86_64 + mov lend, r2d +%else + %define lend dword r2m +%endif + mov dst1q, [dstq+1*gprsize] + mov dst2q, [dstq+2*gprsize] + mov dst3q, [dstq+3*gprsize] + mov dst4q, [dstq+4*gprsize] + mov dst5q, [dstq+5*gprsize] + mov dstq, [dstq] + mov srcq, [srcq] +%ifidn %3, a + test dstq, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX + test srcq, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX + test dst1q, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX + test dst2q, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX + test dst3q, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX + test dst4q, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX + test dst5q, mmsize-1 + jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX +%else +unpack_6ch_%2_to_%1_u_int %+ SUFFIX: +%endif + sub dst1q, dstq + sub dst2q, dstq + sub dst3q, dstq + sub dst4q, dstq + sub dst5q, dstq + %8 x,x,x,x,m7,x +.loop: + mov%3 m0, [srcq ] + mov%3 m1, [srcq+16] + mov%3 m2, [srcq+32] + mov%3 m3, [srcq+48] + mov%3 m4, [srcq+64] + mov%3 m5, [srcq+80] + + SBUTTERFLYPS 0, 3, 6 + SBUTTERFLYPS 1, 4, 6 + SBUTTERFLYPS 2, 5, 6 + SBUTTERFLYPS 0, 4, 6 + SBUTTERFLYPS 3, 2, 6 + SBUTTERFLYPS 1, 5, 6 + SWAP 1, 4 + SWAP 2, 3 + + %7 m0,m1,x,x,m7,m6 + %7 m2,m3,x,x,m7,m6 + %7 m4,m5,x,x,m7,m6 + + mov %+ %3 %+ ps [dstq ], m0 + mov %+ %3 %+ ps [dstq+dst1q], m1 + mov %+ %3 %+ ps [dstq+dst2q], m2 + mov %+ %3 %+ ps [dstq+dst3q], m3 + mov %+ %3 %+ ps [dstq+dst4q], m4 + mov %+ %3 %+ ps [dstq+dst5q], m5 + + add srcq, mmsize*6 + add dstq, mmsize + sub lend, mmsize/4 + jg .loop + REP_RET +%endmacro + +%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32) + +%macro PACK_8CH 8 +cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7 + mov dstq, [dstq] +%if ARCH_X86_32 + DEFINE_ARGS dst, src, src2, src3, src4, src5, src6 + %define lend dword r2m + %define src1q r0q + %define src1m dword [rsp+32] +%if HAVE_ALIGNED_STACK == 0 + DEFINE_ARGS dst, src, src2, src3, src5, src6 + %define src4q r0q + %define src4m dword [rsp+36] +%endif + %define src7q r0q + %define src7m dword [rsp+40] + mov dstm, dstq +%endif + mov src7q, [srcq+7*gprsize] + mov src6q, [srcq+6*gprsize] +%if ARCH_X86_32 + mov src7m, src7q +%endif + mov src5q, [srcq+5*gprsize] + mov src4q, [srcq+4*gprsize] + mov src3q, [srcq+3*gprsize] +%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 + mov src4m, src4q +%endif + mov src2q, [srcq+2*gprsize] + mov src1q, [srcq+1*gprsize] + mov srcq, [srcq] +%ifidn %3, a +%if ARCH_X86_32 + test dstmp, mmsize-1 +%else + test dstq, mmsize-1 +%endif + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX + test srcq, mmsize-1 + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX + test src1q, mmsize-1 + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX + test src2q, mmsize-1 + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX + test src3q, mmsize-1 + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX +%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 + test src4m, mmsize-1 +%else + test src4q, mmsize-1 +%endif + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX + test src5q, mmsize-1 + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX + test src6q, mmsize-1 + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX +%if ARCH_X86_32 + test src7m, mmsize-1 +%else + test src7q, mmsize-1 +%endif + jne pack_8ch_%2_to_%1_u_int %+ SUFFIX +%else +pack_8ch_%2_to_%1_u_int %+ SUFFIX: +%endif + sub src1q, srcq + sub src2q, srcq + sub src3q, srcq +%if ARCH_X86_64 || HAVE_ALIGNED_STACK + sub src4q, srcq +%else + sub src4m, srcq +%endif + sub src5q, srcq + sub src6q, srcq +%if ARCH_X86_64 + sub src7q, srcq +%else + mov src1m, src1q + sub src7m, srcq +%endif + +%if ARCH_X86_64 + %8 x,x,x,x,m9,x +%elifidn %1, int32 + %define m9 [flt2p31] +%else + %define m9 [flt2pm31] +%endif + +.loop: + mov%3 m0, [srcq ] + mov%3 m1, [srcq+src1q] + mov%3 m2, [srcq+src2q] +%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0 + mov src4q, src4m +%endif + mov%3 m3, [srcq+src3q] + mov%3 m4, [srcq+src4q] + mov%3 m5, [srcq+src5q] +%if ARCH_X86_32 + mov src7q, src7m +%endif + mov%3 m6, [srcq+src6q] + mov%3 m7, [srcq+src7q] + +%if ARCH_X86_64 + TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8 + + %7 m0,m1,x,x,m9,m8 + %7 m2,m3,x,x,m9,m8 + %7 m4,m5,x,x,m9,m8 + %7 m6,m7,x,x,m9,m8 + + mov%3 [dstq], m0 +%else + mov dstq, dstm + + TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1 + + %7 m0,m1,x,x,m9,m2 + mova m2, [rsp] + mov%3 [dstq], m0 + %7 m2,m3,x,x,m9,m0 + %7 m4,m5,x,x,m9,m0 + %7 m6,m7,x,x,m9,m0 + +%endif + + mov%3 [dstq+16], m1 + mov%3 [dstq+32], m2 + mov%3 [dstq+48], m3 + mov%3 [dstq+64], m4 + mov%3 [dstq+80], m5 + mov%3 [dstq+96], m6 + mov%3 [dstq+112], m7 + + add srcq, mmsize + add dstq, mmsize*8 +%if ARCH_X86_32 + mov dstm, dstq + mov src1q, src1m +%endif + sub lend, mmsize/4 + jg .loop + REP_RET +%endmacro + +%macro INT16_TO_INT32_N 6 + pxor m2, m2 + pxor m3, m3 + punpcklwd m2, m1 + punpckhwd m3, m1 + SWAP 4,0 + pxor m0, m0 + pxor m1, m1 + punpcklwd m0, m4 + punpckhwd m1, m4 +%endmacro + +%macro INT32_TO_INT16_N 6 + psrad m0, 16 + psrad m1, 16 + psrad m2, 16 + psrad m3, 16 + packssdw m0, m1 + packssdw m2, m3 + SWAP 1,2 +%endmacro + +%macro INT32_TO_FLOAT_INIT 6 + mova %5, [flt2pm31] +%endmacro +%macro INT32_TO_FLOAT_N 6 + cvtdq2ps %1, %1 + cvtdq2ps %2, %2 + mulps %1, %1, %5 + mulps %2, %2, %5 +%endmacro + +%macro FLOAT_TO_INT32_INIT 6 + mova %5, [flt2p31] +%endmacro +%macro FLOAT_TO_INT32_N 6 + mulps %1, %5 + mulps %2, %5 + cvtps2dq %6, %1 + cmpps %1, %1, %5, 5 + paddd %1, %6 + cvtps2dq %6, %2 + cmpps %2, %2, %5, 5 + paddd %2, %6 +%endmacro + +%macro INT16_TO_FLOAT_INIT 6 + mova m5, [flt2pm31] +%endmacro +%macro INT16_TO_FLOAT_N 6 + INT16_TO_INT32_N %1,%2,%3,%4,%5,%6 + cvtdq2ps m0, m0 + cvtdq2ps m1, m1 + cvtdq2ps m2, m2 + cvtdq2ps m3, m3 + mulps m0, m0, m5 + mulps m1, m1, m5 + mulps m2, m2, m5 + mulps m3, m3, m5 +%endmacro + +%macro FLOAT_TO_INT16_INIT 6 + mova m5, [flt2p15] +%endmacro +%macro FLOAT_TO_INT16_N 6 + mulps m0, m5 + mulps m1, m5 + mulps m2, m5 + mulps m3, m5 + cvtps2dq m0, m0 + cvtps2dq m1, m1 + packssdw m0, m1 + cvtps2dq m1, m2 + cvtps2dq m3, m3 + packssdw m1, m3 +%endmacro + +%macro NOP_N 0-6 +%endmacro + +INIT_MMX mmx +CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N +CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N +CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N +CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N + +PACK_6CH float, float, u, 2, 2, 0, NOP_N, NOP_N +PACK_6CH float, float, a, 2, 2, 0, NOP_N, NOP_N + +INIT_XMM sse +PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N +PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N + +UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N +UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N + +INIT_XMM sse2 +CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N +CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N +CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N +CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N + +PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N +PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N +PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N +PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N +PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N +PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N +PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N +PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N + +UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N +UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N +UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N +UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N +UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N +UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N +UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N +UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N + +CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT +CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT + +PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT +PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT + +UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT +UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT + +PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT + +UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT + +PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N +PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N + +PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT + +INIT_XMM ssse3 +UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N +UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N +UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N +UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N +UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT +UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT + +%if HAVE_AVX_EXTERNAL +INIT_XMM avx +PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N +PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N + +UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N +UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N + +PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT + +UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT + +PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N +PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N + +PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT + +INIT_YMM avx +CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT +%endif + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT +%endif diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c new file mode 100644 index 0000000000..bb89cf604b --- /dev/null +++ b/libswresample/x86/audio_convert_init.c @@ -0,0 +1,181 @@ +/* + * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86/cpu.h" +#include "libswresample/swresample_internal.h" +#include "libswresample/audioconvert.h" + +#define PROTO(pre, in, out, cap) void ff ## pre ## in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len); +#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap) +#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap) +#define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx) PROTO3(pre, avx2) +PROTO4(_) +PROTO4(_pack_2ch_) +PROTO4(_pack_6ch_) +PROTO4(_pack_8ch_) +PROTO4(_unpack_2ch_) +PROTO4(_unpack_6ch_) + +av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac, + enum AVSampleFormat out_fmt, + enum AVSampleFormat in_fmt, + int channels){ + int mm_flags = av_get_cpu_flags(); + + ac->simd_f= NULL; + +//FIXME add memcpy case + +#define MULTI_CAPS_FUNC(flag, cap) \ + if (EXTERNAL_##flag(mm_flags)) {\ + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\ + ac->simd_f = ff_int16_to_int32_a_ ## cap;\ + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\ + ac->simd_f = ff_int32_to_int16_a_ ## cap;\ + } + +MULTI_CAPS_FUNC(MMX, mmx) +MULTI_CAPS_FUNC(SSE2, sse2) + + if(EXTERNAL_MMX(mm_flags)) { + if(channels == 6) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_float_to_float_a_mmx; + } + } + if(EXTERNAL_SSE(mm_flags)) { + if(channels == 6) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_float_to_float_a_sse; + + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_6ch_float_to_float_a_sse; + } + } + if(EXTERNAL_SSE2(mm_flags)) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P) + ac->simd_f = ff_int16_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_float_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_float_to_int16_a_sse2; + + if(channels == 2) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_2ch_int32_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S16P) + ac->simd_f = ff_pack_2ch_int16_to_int16_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S16P) + ac->simd_f = ff_pack_2ch_int16_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_2ch_int32_to_int16_a_sse2; + + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_2ch_int32_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_int16_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_2ch_int32_to_int16_a_sse2; + + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_2ch_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_2ch_float_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S16P) + ac->simd_f = ff_pack_2ch_int16_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_2ch_float_to_int16_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_2ch_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLT) + ac->simd_f = ff_unpack_2ch_float_to_int32_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLT) + ac->simd_f = ff_unpack_2ch_float_to_int16_a_sse2; + } + if(channels == 6) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_6ch_float_to_int32_a_sse2; + + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_6ch_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLT) + ac->simd_f = ff_unpack_6ch_float_to_int32_a_sse2; + } + if(channels == 8) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_8ch_float_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_8ch_int32_to_float_a_sse2; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_8ch_float_to_int32_a_sse2; + } + } + if(EXTERNAL_SSSE3(mm_flags)) { + if(channels == 2) { + if( out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_int16_a_ssse3; + if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_int32_a_ssse3; + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16) + ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3; + } + } + if(EXTERNAL_AVX_FAST(mm_flags)) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_int32_to_float_a_avx; + } + if(EXTERNAL_AVX(mm_flags)) { + if(channels == 6) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_float_to_float_a_avx; + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_6ch_int32_to_float_a_avx; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_6ch_float_to_int32_a_avx; + + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_6ch_float_to_float_a_avx; + if( out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32) + ac->simd_f = ff_unpack_6ch_int32_to_float_a_avx; + if( out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLT) + ac->simd_f = ff_unpack_6ch_float_to_int32_a_avx; + } + if(channels == 8) { + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_8ch_float_to_float_a_avx; + if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32P) + ac->simd_f = ff_pack_8ch_int32_to_float_a_avx; + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_pack_8ch_float_to_int32_a_avx; + } + } + if(EXTERNAL_AVX2_FAST(mm_flags)) { + if( out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP) + ac->simd_f = ff_float_to_int32_a_avx2; + } +} diff --git a/libswresample/x86/rematrix.asm b/libswresample/x86/rematrix.asm new file mode 100644 index 0000000000..7984b9a729 --- /dev/null +++ b/libswresample/x86/rematrix.asm @@ -0,0 +1,250 @@ +;****************************************************************************** +;* Copyright (c) 2012 Michael Niedermayer +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + + +SECTION_RODATA 32 +dw1: times 8 dd 1 +w1 : times 16 dw 1 + +SECTION .text + +%macro MIX2_FLT 1 +cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len +%ifidn %1, a + test in1q, mmsize-1 + jne mix_2_1_float_u_int %+ SUFFIX + test in2q, mmsize-1 + jne mix_2_1_float_u_int %+ SUFFIX + test outq, mmsize-1 + jne mix_2_1_float_u_int %+ SUFFIX +%else +mix_2_1_float_u_int %+ SUFFIX: +%endif + VBROADCASTSS m4, [coeffpq + 4*index1q] + VBROADCASTSS m5, [coeffpq + 4*index2q] + shl lend , 2 + add in1q , lenq + add in2q , lenq + add outq , lenq + neg lenq +.next: +%ifidn %1, a + mulps m0, m4, [in1q + lenq ] + mulps m1, m5, [in2q + lenq ] + mulps m2, m4, [in1q + lenq + mmsize] + mulps m3, m5, [in2q + lenq + mmsize] +%else + movu m0, [in1q + lenq ] + movu m1, [in2q + lenq ] + movu m2, [in1q + lenq + mmsize] + movu m3, [in2q + lenq + mmsize] + mulps m0, m0, m4 + mulps m1, m1, m5 + mulps m2, m2, m4 + mulps m3, m3, m5 +%endif + addps m0, m0, m1 + addps m2, m2, m3 + mov%1 [outq + lenq ], m0 + mov%1 [outq + lenq + mmsize], m2 + add lenq, mmsize*2 + jl .next + REP_RET +%endmacro + +%macro MIX1_FLT 1 +cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len +%ifidn %1, a + test inq, mmsize-1 + jne mix_1_1_float_u_int %+ SUFFIX + test outq, mmsize-1 + jne mix_1_1_float_u_int %+ SUFFIX +%else +mix_1_1_float_u_int %+ SUFFIX: +%endif + VBROADCASTSS m2, [coeffpq + 4*indexq] + shl lenq , 2 + add inq , lenq + add outq , lenq + neg lenq +.next: +%ifidn %1, a + mulps m0, m2, [inq + lenq ] + mulps m1, m2, [inq + lenq + mmsize] +%else + movu m0, [inq + lenq ] + movu m1, [inq + lenq + mmsize] + mulps m0, m0, m2 + mulps m1, m1, m2 +%endif + mov%1 [outq + lenq ], m0 + mov%1 [outq + lenq + mmsize], m1 + add lenq, mmsize*2 + jl .next + REP_RET +%endmacro + +%macro MIX1_INT16 1 +cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len +%ifidn %1, a + test inq, mmsize-1 + jne mix_1_1_int16_u_int %+ SUFFIX + test outq, mmsize-1 + jne mix_1_1_int16_u_int %+ SUFFIX +%else +mix_1_1_int16_u_int %+ SUFFIX: +%endif + movd m4, [coeffpq + 4*indexq] + SPLATW m5, m4 + psllq m4, 32 + psrlq m4, 48 + mova m0, [w1] + psllw m0, m4 + psrlw m0, 1 + punpcklwd m5, m0 + add lenq , lenq + add inq , lenq + add outq , lenq + neg lenq +.next: + mov%1 m0, [inq + lenq ] + mov%1 m2, [inq + lenq + mmsize] + mova m1, m0 + mova m3, m2 + punpcklwd m0, [w1] + punpckhwd m1, [w1] + punpcklwd m2, [w1] + punpckhwd m3, [w1] + pmaddwd m0, m5 + pmaddwd m1, m5 + pmaddwd m2, m5 + pmaddwd m3, m5 + psrad m0, m4 + psrad m1, m4 + psrad m2, m4 + psrad m3, m4 + packssdw m0, m1 + packssdw m2, m3 + mov%1 [outq + lenq ], m0 + mov%1 [outq + lenq + mmsize], m2 + add lenq, mmsize*2 + jl .next +%if mmsize == 8 + emms + RET +%else + REP_RET +%endif +%endmacro + +%macro MIX2_INT16 1 +cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len +%ifidn %1, a + test in1q, mmsize-1 + jne mix_2_1_int16_u_int %+ SUFFIX + test in2q, mmsize-1 + jne mix_2_1_int16_u_int %+ SUFFIX + test outq, mmsize-1 + jne mix_2_1_int16_u_int %+ SUFFIX +%else +mix_2_1_int16_u_int %+ SUFFIX: +%endif + movd m4, [coeffpq + 4*index1q] + movd m6, [coeffpq + 4*index2q] + SPLATW m5, m4 + SPLATW m6, m6 + psllq m4, 32 + psrlq m4, 48 + mova m7, [dw1] + pslld m7, m4 + psrld m7, 1 + punpcklwd m5, m6 + add lend , lend + add in1q , lenq + add in2q , lenq + add outq , lenq + neg lenq +.next: + mov%1 m0, [in1q + lenq ] + mov%1 m2, [in2q + lenq ] + mova m1, m0 + punpcklwd m0, m2 + punpckhwd m1, m2 + + mov%1 m2, [in1q + lenq + mmsize] + mov%1 m6, [in2q + lenq + mmsize] + mova m3, m2 + punpcklwd m2, m6 + punpckhwd m3, m6 + + pmaddwd m0, m5 + pmaddwd m1, m5 + pmaddwd m2, m5 + pmaddwd m3, m5 + paddd m0, m7 + paddd m1, m7 + paddd m2, m7 + paddd m3, m7 + psrad m0, m4 + psrad m1, m4 + psrad m2, m4 + psrad m3, m4 + packssdw m0, m1 + packssdw m2, m3 + mov%1 [outq + lenq ], m0 + mov%1 [outq + lenq + mmsize], m2 + add lenq, mmsize*2 + jl .next +%if mmsize == 8 + emms + RET +%else + REP_RET +%endif +%endmacro + + +INIT_MMX mmx +MIX1_INT16 u +MIX1_INT16 a +MIX2_INT16 u +MIX2_INT16 a + +INIT_XMM sse +MIX2_FLT u +MIX2_FLT a +MIX1_FLT u +MIX1_FLT a + +INIT_XMM sse2 +MIX1_INT16 u +MIX1_INT16 a +MIX2_INT16 u +MIX2_INT16 a + +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +MIX2_FLT u +MIX2_FLT a +MIX1_FLT u +MIX1_FLT a +%endif diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c new file mode 100644 index 0000000000..5f2c5fe170 --- /dev/null +++ b/libswresample/x86/rematrix_init.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at) + * + * This file is part of libswresample + * + * libswresample is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libswresample is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libswresample; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86/cpu.h" +#include "libswresample/swresample_internal.h" + +#define D(type, simd) \ +mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\ +mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd; + +D(float, sse) +D(float, avx) +D(int16, mmx) +D(int16, sse2) + +av_cold int swri_rematrix_init_x86(struct SwrContext *s){ +#if HAVE_YASM + int mm_flags = av_get_cpu_flags(); + int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout); + int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout); + int num = nb_in * nb_out; + int i,j; + + s->mix_1_1_simd = NULL; + s->mix_2_1_simd = NULL; + + if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ + if(EXTERNAL_MMX(mm_flags)) { + s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx; + s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx; + } + if(EXTERNAL_SSE2(mm_flags)) { + s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2; + s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2; + } + s->native_simd_matrix = av_mallocz_array(num, 2 * sizeof(int16_t)); + s->native_simd_one = av_mallocz(2 * sizeof(int16_t)); + if (!s->native_simd_matrix || !s->native_simd_one) + return AVERROR(ENOMEM); + + for(i=0; i<nb_out; i++){ + int sh = 0; + for(j=0; j<nb_in; j++) + sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j])); + sh = FFMAX(av_log2(sh) - 14, 0); + for(j=0; j<nb_in; j++) { + ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh; + ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] = + ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh; + } + } + ((int16_t*)s->native_simd_one)[1] = 14; + ((int16_t*)s->native_simd_one)[0] = 16384; + } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ + if(EXTERNAL_SSE(mm_flags)) { + s->mix_1_1_simd = ff_mix_1_1_a_float_sse; + s->mix_2_1_simd = ff_mix_2_1_a_float_sse; + } + if(EXTERNAL_AVX_FAST(mm_flags)) { + s->mix_1_1_simd = ff_mix_1_1_a_float_avx; + s->mix_2_1_simd = ff_mix_2_1_a_float_avx; + } + s->native_simd_matrix = av_mallocz_array(num, sizeof(float)); + s->native_simd_one = av_mallocz(sizeof(float)); + if (!s->native_simd_matrix || !s->native_simd_one) + return AVERROR(ENOMEM); + memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); + memcpy(s->native_simd_one, s->native_one, sizeof(float)); + } +#endif + + return 0; +} diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm new file mode 100644 index 0000000000..4163df1aa1 --- /dev/null +++ b/libswresample/x86/resample.asm @@ -0,0 +1,610 @@ +;****************************************************************************** +;* Copyright (c) 2012 Michael Niedermayer +;* Copyright (c) 2014 James Almer <jamrial <at> gmail.com> +;* Copyright (c) 2014 Ronald S. Bultje <rsbultje@gmail.com> +;* +;* This file is part of FFmpeg. +;* +;* FFmpeg is free software; you can redistribute it and/or +;* modify it under the terms of the GNU Lesser General Public +;* License as published by the Free Software Foundation; either +;* version 2.1 of the License, or (at your option) any later version. +;* +;* FFmpeg is distributed in the hope that it will be useful, +;* but WITHOUT ANY WARRANTY; without even the implied warranty of +;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;* Lesser General Public License for more details. +;* +;* You should have received a copy of the GNU Lesser General Public +;* License along with FFmpeg; if not, write to the Free Software +;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +;****************************************************************************** + +%include "libavutil/x86/x86util.asm" + +%if ARCH_X86_64 +%define pointer resq +%else +%define pointer resd +%endif + +struc ResampleContext + .av_class: pointer 1 + .filter_bank: pointer 1 + .filter_length: resd 1 + .filter_alloc: resd 1 + .ideal_dst_incr: resd 1 + .dst_incr: resd 1 + .dst_incr_div: resd 1 + .dst_incr_mod: resd 1 + .index: resd 1 + .frac: resd 1 + .src_incr: resd 1 + .compensation_distance: resd 1 + .phase_count: resd 1 + + ; there's a few more here but we only care about the first few +endstruc + +SECTION_RODATA + +pf_1: dd 1.0 +pdbl_1: dq 1.0 +pd_0x4000: dd 0x4000 + +SECTION .text + +; FIXME remove unneeded variables (index_incr, phase_mask) +%macro RESAMPLE_FNS 3-5 ; format [float or int16], bps, log2_bps, float op suffix [s or d], 1.0 constant +; int resample_common_$format(ResampleContext *ctx, $format *dst, +; const $format *src, int size, int update_ctx) +%if ARCH_X86_64 ; unix64 and win64 +cglobal resample_common_%1, 0, 15, 2, ctx, dst, src, phase_count, index, frac, \ + dst_incr_mod, size, min_filter_count_x4, \ + min_filter_len_x4, dst_incr_div, src_incr, \ + phase_mask, dst_end, filter_bank + + ; use red-zone for variable storage +%define ctx_stackq [rsp-0x8] +%define src_stackq [rsp-0x10] +%if WIN64 +%define update_context_stackd r4m +%else ; unix64 +%define update_context_stackd [rsp-0x14] +%endif + + ; load as many variables in registers as possible; for the rest, store + ; on stack so that we have 'ctx' available as one extra register + mov sized, r3d +%if UNIX64 + mov update_context_stackd, r4d +%endif + mov indexd, [ctxq+ResampleContext.index] + mov fracd, [ctxq+ResampleContext.frac] + mov dst_incr_modd, [ctxq+ResampleContext.dst_incr_mod] + mov filter_bankq, [ctxq+ResampleContext.filter_bank] + mov src_incrd, [ctxq+ResampleContext.src_incr] + mov ctx_stackq, ctxq + mov min_filter_len_x4d, [ctxq+ResampleContext.filter_length] + mov dst_incr_divd, [ctxq+ResampleContext.dst_incr_div] + shl min_filter_len_x4d, %3 + lea dst_endq, [dstq+sizeq*%2] + +%if UNIX64 + mov ecx, [ctxq+ResampleContext.phase_count] + mov edi, [ctxq+ResampleContext.filter_alloc] + + DEFINE_ARGS filter_alloc, dst, src, phase_count, index, frac, dst_incr_mod, \ + filter, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ + src_incr, phase_mask, dst_end, filter_bank +%elif WIN64 + mov R9d, [ctxq+ResampleContext.filter_alloc] + mov ecx, [ctxq+ResampleContext.phase_count] + + DEFINE_ARGS phase_count, dst, src, filter_alloc, index, frac, dst_incr_mod, \ + filter, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ + src_incr, phase_mask, dst_end, filter_bank +%endif + + neg min_filter_len_x4q + sub filter_bankq, min_filter_len_x4q + sub srcq, min_filter_len_x4q + mov src_stackq, srcq +%else ; x86-32 +cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \ + index, min_filter_length_x4, filter_bank + + ; push temp variables to stack +%define ctx_stackq r0mp +%define src_stackq r2mp +%define update_context_stackd r4m + + mov dstq, r1mp + mov r3, r3mp + lea r3, [dstq+r3*%2] + PUSH dword [ctxq+ResampleContext.dst_incr_div] + PUSH dword [ctxq+ResampleContext.dst_incr_mod] + PUSH dword [ctxq+ResampleContext.filter_alloc] + PUSH r3 + PUSH dword [ctxq+ResampleContext.phase_count] ; unneeded replacement for phase_mask + PUSH dword [ctxq+ResampleContext.src_incr] + mov min_filter_length_x4d, [ctxq+ResampleContext.filter_length] + mov indexd, [ctxq+ResampleContext.index] + shl min_filter_length_x4d, %3 + mov fracd, [ctxq+ResampleContext.frac] + neg min_filter_length_x4q + mov filter_bankq, [ctxq+ResampleContext.filter_bank] + sub r2mp, min_filter_length_x4q + sub filter_bankq, min_filter_length_x4q + PUSH min_filter_length_x4q + PUSH filter_bankq + mov phase_countd, [ctxq+ResampleContext.phase_count] + + DEFINE_ARGS src, phase_count, dst, frac, index, min_filter_count_x4, filter + +%define filter_bankq dword [rsp+0x0] +%define min_filter_length_x4q dword [rsp+0x4] +%define src_incrd dword [rsp+0x8] +%define phase_maskd dword [rsp+0xc] +%define dst_endq dword [rsp+0x10] +%define filter_allocd dword [rsp+0x14] +%define dst_incr_modd dword [rsp+0x18] +%define dst_incr_divd dword [rsp+0x1c] + + mov srcq, r2mp +%endif + +.loop: + mov filterd, filter_allocd + imul filterd, indexd +%if ARCH_X86_64 + mov min_filter_count_x4q, min_filter_len_x4q + lea filterq, [filter_bankq+filterq*%2] +%else ; x86-32 + mov min_filter_count_x4q, filter_bankq + lea filterq, [min_filter_count_x4q+filterq*%2] + mov min_filter_count_x4q, min_filter_length_x4q +%endif +%ifidn %1, int16 + movd m0, [pd_0x4000] +%else ; float/double + xorps m0, m0, m0 +%endif + + align 16 +.inner_loop: + movu m1, [srcq+min_filter_count_x4q*1] +%ifidn %1, int16 +%if cpuflag(xop) + vpmadcswd m0, m1, [filterq+min_filter_count_x4q*1], m0 +%else + pmaddwd m1, [filterq+min_filter_count_x4q*1] + paddd m0, m1 +%endif +%else ; float/double +%if cpuflag(fma4) || cpuflag(fma3) + fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0 +%else + mulp%4 m1, m1, [filterq+min_filter_count_x4q*1] + addp%4 m0, m0, m1 +%endif ; cpuflag +%endif + add min_filter_count_x4q, mmsize + js .inner_loop + +%ifidn %1, int16 + HADDD m0, m1 + psrad m0, 15 + add fracd, dst_incr_modd + packssdw m0, m0 + add indexd, dst_incr_divd + movd [dstq], m0 +%else ; float/double + ; horizontal sum & store +%if mmsize == 32 + vextractf128 xm1, m0, 0x1 + addps xm0, xm1 +%endif + movhlps xm1, xm0 +%ifidn %1, float + addps xm0, xm1 + shufps xm1, xm0, xm0, q0001 +%endif + add fracd, dst_incr_modd + addp%4 xm0, xm1 + add indexd, dst_incr_divd + movs%4 [dstq], xm0 +%endif + cmp fracd, src_incrd + jl .skip + sub fracd, src_incrd + inc indexd + +%if UNIX64 + DEFINE_ARGS filter_alloc, dst, src, phase_count, index, frac, dst_incr_mod, \ + index_incr, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ + src_incr, phase_mask, dst_end, filter_bank +%elif WIN64 + DEFINE_ARGS phase_count, dst, src, filter_alloc, index, frac, dst_incr_mod, \ + index_incr, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \ + src_incr, phase_mask, dst_end, filter_bank +%else ; x86-32 + DEFINE_ARGS src, phase_count, dst, frac, index, index_incr +%endif + +.skip: + add dstq, %2 + cmp indexd, phase_countd + jb .index_skip +.index_while: + sub indexd, phase_countd + lea srcq, [srcq+%2] + cmp indexd, phase_countd + jnb .index_while +.index_skip: + cmp dstq, dst_endq + jne .loop + +%if ARCH_X86_64 + DEFINE_ARGS ctx, dst, src, phase_count, index, frac +%else ; x86-32 + DEFINE_ARGS src, ctx, update_context, frac, index +%endif + + cmp dword update_context_stackd, 0 + jz .skip_store + ; strictly speaking, the function should always return the consumed + ; number of bytes; however, we only use the value if update_context + ; is true, so let's just leave it uninitialized otherwise + mov ctxq, ctx_stackq + movifnidn rax, srcq + mov [ctxq+ResampleContext.frac ], fracd + sub rax, src_stackq + mov [ctxq+ResampleContext.index], indexd + shr rax, %3 + +.skip_store: +%if ARCH_X86_32 + ADD rsp, 0x20 +%endif + RET + +; int resample_linear_$format(ResampleContext *ctx, float *dst, +; const float *src, int size, int update_ctx) +%if ARCH_X86_64 ; unix64 and win64 +%if UNIX64 +cglobal resample_linear_%1, 0, 15, 5, ctx, dst, phase_mask, phase_count, index, frac, \ + size, dst_incr_mod, min_filter_count_x4, \ + min_filter_len_x4, dst_incr_div, src_incr, \ + src, dst_end, filter_bank + + mov srcq, r2mp +%else ; win64 +cglobal resample_linear_%1, 0, 15, 5, ctx, phase_mask, src, phase_count, index, frac, \ + size, dst_incr_mod, min_filter_count_x4, \ + min_filter_len_x4, dst_incr_div, src_incr, \ + dst, dst_end, filter_bank + + mov dstq, r1mp +%endif + + ; use red-zone for variable storage +%define ctx_stackq [rsp-0x8] +%define src_stackq [rsp-0x10] +%define phase_mask_stackd [rsp-0x14] +%if WIN64 +%define update_context_stackd r4m +%else ; unix64 +%define update_context_stackd [rsp-0x18] +%endif + + ; load as many variables in registers as possible; for the rest, store + ; on stack so that we have 'ctx' available as one extra register + mov sized, r3d +%if UNIX64 + mov update_context_stackd, r4d +%endif + mov indexd, [ctxq+ResampleContext.index] + mov fracd, [ctxq+ResampleContext.frac] + mov dst_incr_modd, [ctxq+ResampleContext.dst_incr_mod] + mov filter_bankq, [ctxq+ResampleContext.filter_bank] + mov src_incrd, [ctxq+ResampleContext.src_incr] + mov ctx_stackq, ctxq + mov min_filter_len_x4d, [ctxq+ResampleContext.filter_length] +%ifidn %1, int16 + movd m4, [pd_0x4000] +%else ; float/double + cvtsi2s%4 xm0, src_incrd + movs%4 xm4, [%5] + divs%4 xm4, xm0 +%endif + mov dst_incr_divd, [ctxq+ResampleContext.dst_incr_div] + shl min_filter_len_x4d, %3 + lea dst_endq, [dstq+sizeq*%2] + +%if UNIX64 + mov ecx, [ctxq+ResampleContext.phase_count] + mov edi, [ctxq+ResampleContext.filter_alloc] + + DEFINE_ARGS filter_alloc, dst, filter2, phase_count, index, frac, filter1, \ + dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ + dst_incr_div, src_incr, src, dst_end, filter_bank +%elif WIN64 + mov R9d, [ctxq+ResampleContext.filter_alloc] + mov ecx, [ctxq+ResampleContext.phase_count] + + DEFINE_ARGS phase_count, filter2, src, filter_alloc, index, frac, filter1, \ + dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ + dst_incr_div, src_incr, dst, dst_end, filter_bank +%endif + + neg min_filter_len_x4q + sub filter_bankq, min_filter_len_x4q + sub srcq, min_filter_len_x4q + mov src_stackq, srcq +%else ; x86-32 +cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \ + frac, index, dst, filter_bank + + ; push temp variables to stack +%define ctx_stackq r0mp +%define src_stackq r2mp +%define update_context_stackd r4m + + mov dstq, r1mp + mov r3, r3mp + lea r3, [dstq+r3*%2] + PUSH dword [ctxq+ResampleContext.dst_incr_div] + PUSH r3 + mov r3, dword [ctxq+ResampleContext.filter_alloc] + PUSH dword [ctxq+ResampleContext.dst_incr_mod] + PUSH r3 + shl r3, %3 + PUSH r3 + mov r3, dword [ctxq+ResampleContext.src_incr] + PUSH dword [ctxq+ResampleContext.phase_count] ; unneeded replacement of phase_mask + PUSH r3d +%ifidn %1, int16 + movd m4, [pd_0x4000] +%else ; float/double + cvtsi2s%4 xm0, r3d + movs%4 xm4, [%5] + divs%4 xm4, xm0 +%endif + mov min_filter_length_x4d, [ctxq+ResampleContext.filter_length] + mov indexd, [ctxq+ResampleContext.index] + shl min_filter_length_x4d, %3 + mov fracd, [ctxq+ResampleContext.frac] + neg min_filter_length_x4q + mov filter_bankq, [ctxq+ResampleContext.filter_bank] + sub r2mp, min_filter_length_x4q + sub filter_bankq, min_filter_length_x4q + PUSH min_filter_length_x4q + PUSH filter_bankq + PUSH dword [ctxq+ResampleContext.phase_count] + + DEFINE_ARGS filter1, min_filter_count_x4, filter2, frac, index, dst, src + +%define phase_count_stackd dword [rsp+0x0] +%define filter_bankq dword [rsp+0x4] +%define min_filter_length_x4q dword [rsp+0x8] +%define src_incrd dword [rsp+0xc] +%define phase_mask_stackd dword [rsp+0x10] +%define filter_alloc_x4q dword [rsp+0x14] +%define filter_allocd dword [rsp+0x18] +%define dst_incr_modd dword [rsp+0x1c] +%define dst_endq dword [rsp+0x20] +%define dst_incr_divd dword [rsp+0x24] + + mov srcq, r2mp +%endif + +.loop: + mov filter1d, filter_allocd + imul filter1d, indexd +%if ARCH_X86_64 + mov min_filter_count_x4q, min_filter_len_x4q + lea filter1q, [filter_bankq+filter1q*%2] + lea filter2q, [filter1q+filter_allocq*%2] +%else ; x86-32 + mov min_filter_count_x4q, filter_bankq + lea filter1q, [min_filter_count_x4q+filter1q*%2] + mov min_filter_count_x4q, min_filter_length_x4q + mov filter2q, filter1q + add filter2q, filter_alloc_x4q +%endif +%ifidn %1, int16 + mova m0, m4 + mova m2, m4 +%else ; float/double + xorps m0, m0, m0 + xorps m2, m2, m2 +%endif + + align 16 +.inner_loop: + movu m1, [srcq+min_filter_count_x4q*1] +%ifidn %1, int16 +%if cpuflag(xop) + vpmadcswd m2, m1, [filter2q+min_filter_count_x4q*1], m2 + vpmadcswd m0, m1, [filter1q+min_filter_count_x4q*1], m0 +%else + pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1] + pmaddwd m1, [filter1q+min_filter_count_x4q*1] + paddd m2, m3 + paddd m0, m1 +%endif ; cpuflag +%else ; float/double +%if cpuflag(fma4) || cpuflag(fma3) + fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2 + fmaddp%4 m0, m1, [filter1q+min_filter_count_x4q*1], m0 +%else + mulp%4 m3, m1, [filter2q+min_filter_count_x4q*1] + mulp%4 m1, m1, [filter1q+min_filter_count_x4q*1] + addp%4 m2, m2, m3 + addp%4 m0, m0, m1 +%endif ; cpuflag +%endif + add min_filter_count_x4q, mmsize + js .inner_loop + +%ifidn %1, int16 +%if mmsize == 16 +%if cpuflag(xop) + vphadddq m2, m2 + vphadddq m0, m0 +%endif + pshufd m3, m2, q0032 + pshufd m1, m0, q0032 + paddd m2, m3 + paddd m0, m1 +%endif +%if notcpuflag(xop) + PSHUFLW m3, m2, q0032 + PSHUFLW m1, m0, q0032 + paddd m2, m3 + paddd m0, m1 +%endif + psubd m2, m0 + ; This is probably a really bad idea on atom and other machines with a + ; long transfer latency between GPRs and XMMs (atom). However, it does + ; make the clip a lot simpler... + movd eax, m2 + add indexd, dst_incr_divd + imul fracd + idiv src_incrd + movd m1, eax + add fracd, dst_incr_modd + paddd m0, m1 + psrad m0, 15 + packssdw m0, m0 + movd [dstq], m0 + + ; note that for imul/idiv, I need to move filter to edx/eax for each: + ; - 32bit: eax=r0[filter1], edx=r2[filter2] + ; - win64: eax=r6[filter1], edx=r1[todo] + ; - unix64: eax=r6[filter1], edx=r2[todo] +%else ; float/double + ; val += (v2 - val) * (FELEML) frac / c->src_incr; +%if mmsize == 32 + vextractf128 xm1, m0, 0x1 + vextractf128 xm3, m2, 0x1 + addps xm0, xm1 + addps xm2, xm3 +%endif + cvtsi2s%4 xm1, fracd + subp%4 xm2, xm0 + mulp%4 xm1, xm4 + shufp%4 xm1, xm1, q0000 +%if cpuflag(fma4) || cpuflag(fma3) + fmaddp%4 xm0, xm2, xm1, xm0 +%else + mulp%4 xm2, xm1 + addp%4 xm0, xm2 +%endif ; cpuflag + + ; horizontal sum & store + movhlps xm1, xm0 +%ifidn %1, float + addps xm0, xm1 + shufps xm1, xm0, xm0, q0001 +%endif + add fracd, dst_incr_modd + addp%4 xm0, xm1 + add indexd, dst_incr_divd + movs%4 [dstq], xm0 +%endif + cmp fracd, src_incrd + jl .skip + sub fracd, src_incrd + inc indexd + +%if UNIX64 + DEFINE_ARGS filter_alloc, dst, filter2, phase_count, index, frac, index_incr, \ + dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ + dst_incr_div, src_incr, src, dst_end, filter_bank +%elif WIN64 + DEFINE_ARGS phase_count, filter2, src, filter_alloc, index, frac, index_incr, \ + dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ + dst_incr_div, src_incr, dst, dst_end, filter_bank +%else ; x86-32 + DEFINE_ARGS filter1, phase_count, index_incr, frac, index, dst, src +%endif + +.skip: +%if ARCH_X86_32 + mov phase_countd, phase_count_stackd +%endif + add dstq, %2 + cmp indexd, phase_countd + jb .index_skip +.index_while: + sub indexd, phase_countd + lea srcq, [srcq+%2] + cmp indexd, phase_countd + jnb .index_while +.index_skip: + cmp dstq, dst_endq + jne .loop + +%if UNIX64 + DEFINE_ARGS ctx, dst, filter2, phase_count, index, frac, index_incr, \ + dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ + dst_incr_div, src_incr, src, dst_end, filter_bank +%elif WIN64 + DEFINE_ARGS ctx, filter2, src, phase_count, index, frac, index_incr, \ + dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \ + dst_incr_div, src_incr, dst, dst_end, filter_bank +%else ; x86-32 + DEFINE_ARGS filter1, ctx, update_context, frac, index, dst, src +%endif + + cmp dword update_context_stackd, 0 + jz .skip_store + ; strictly speaking, the function should always return the consumed + ; number of bytes; however, we only use the value if update_context + ; is true, so let's just leave it uninitialized otherwise + mov ctxq, ctx_stackq + movifnidn rax, srcq + mov [ctxq+ResampleContext.frac ], fracd + sub rax, src_stackq + mov [ctxq+ResampleContext.index], indexd + shr rax, %3 + +.skip_store: +%if ARCH_X86_32 + ADD rsp, 0x28 +%endif + RET +%endmacro + +INIT_XMM sse +RESAMPLE_FNS float, 4, 2, s, pf_1 + +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +RESAMPLE_FNS float, 4, 2, s, pf_1 +%endif +%if HAVE_FMA3_EXTERNAL +INIT_YMM fma3 +RESAMPLE_FNS float, 4, 2, s, pf_1 +%endif +%if HAVE_FMA4_EXTERNAL +INIT_XMM fma4 +RESAMPLE_FNS float, 4, 2, s, pf_1 +%endif + +%if ARCH_X86_32 +INIT_MMX mmxext +RESAMPLE_FNS int16, 2, 1 +%endif + +INIT_XMM sse2 +RESAMPLE_FNS int16, 2, 1 +%if HAVE_XOP_EXTERNAL +INIT_XMM xop +RESAMPLE_FNS int16, 2, 1 +%endif + +INIT_XMM sse2 +RESAMPLE_FNS double, 8, 3, d, pdbl_1 diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c new file mode 100644 index 0000000000..9d7d5cf89e --- /dev/null +++ b/libswresample/x86/resample_init.c @@ -0,0 +1,90 @@ +/* + * audio resampling + * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * audio resampling + * @author Michael Niedermayer <michaelni@gmx.at> + */ + +#include "libavutil/x86/cpu.h" +#include "libswresample/resample.h" + +#define RESAMPLE_FUNCS(type, opt) \ +int ff_resample_common_##type##_##opt(ResampleContext *c, void *dst, \ + const void *src, int sz, int upd); \ +int ff_resample_linear_##type##_##opt(ResampleContext *c, void *dst, \ + const void *src, int sz, int upd) + +RESAMPLE_FUNCS(int16, mmxext); +RESAMPLE_FUNCS(int16, sse2); +RESAMPLE_FUNCS(int16, xop); +RESAMPLE_FUNCS(float, sse); +RESAMPLE_FUNCS(float, avx); +RESAMPLE_FUNCS(float, fma3); +RESAMPLE_FUNCS(float, fma4); +RESAMPLE_FUNCS(double, sse2); + +av_cold void swri_resample_dsp_x86_init(ResampleContext *c) +{ + int av_unused mm_flags = av_get_cpu_flags(); + + switch(c->format){ + case AV_SAMPLE_FMT_S16P: + if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext + : ff_resample_common_int16_mmxext; + } + if (EXTERNAL_SSE2(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2 + : ff_resample_common_int16_sse2; + } + if (EXTERNAL_XOP(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_int16_xop + : ff_resample_common_int16_xop; + } + break; + case AV_SAMPLE_FMT_FLTP: + if (EXTERNAL_SSE(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_float_sse + : ff_resample_common_float_sse; + } + if (EXTERNAL_AVX_FAST(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_float_avx + : ff_resample_common_float_avx; + } + if (EXTERNAL_FMA3_FAST(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_float_fma3 + : ff_resample_common_float_fma3; + } + if (EXTERNAL_FMA4(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_float_fma4 + : ff_resample_common_float_fma4; + } + break; + case AV_SAMPLE_FMT_DBLP: + if (EXTERNAL_SSE2(mm_flags)) { + c->dsp.resample = c->linear ? ff_resample_linear_double_sse2 + : ff_resample_common_double_sse2; + } + break; + } +} diff --git a/libswresample/x86/w64xmmtest.c b/libswresample/x86/w64xmmtest.c new file mode 100644 index 0000000000..9cddb4a858 --- /dev/null +++ b/libswresample/x86/w64xmmtest.c @@ -0,0 +1,29 @@ +/* + * check XMM registers for clobbers on Win64 + * Copyright (c) 2013 Martin Storsjo + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libswresample/swresample.h" +#include "libavutil/x86/w64xmmtest.h" + +wrap(swr_convert(struct SwrContext *s, uint8_t **out, int out_count, + const uint8_t **in , int in_count)) +{ + testxmmclobbers(swr_convert, s, out, out_count, in, in_count); +} |