42 files changed, 8219 insertions, 0 deletions
diff --git a/libswresample/Makefile b/libswresample/Makefile
new file mode 100644
index 0000000000..120ee3385d
--- /dev/null
+++ b/libswresample/Makefile
@@ -0,0 +1,24 @@
+include $(SUBDIR)../config.mak
+
+NAME = swresample
+FFLIBS = avutil
+
+HEADERS = swresample.h                       \
+          version.h                          \
+
+OBJS = audioconvert.o                        \
+       dither.o                              \
+       options.o                             \
+       rematrix.o                            \
+       resample.o                            \
+       resample_dsp.o                        \
+       swresample.o                          \
+       swresample_frame.o                    \
+
+OBJS-$(CONFIG_LIBSOXR) += soxr_resample.o
+OBJS-$(CONFIG_SHARED)  += log2_tab.o
+
+# Windows resource file
+SLIBOBJS-$(HAVE_GNU_WINDRES) += swresampleres.o
+
+TESTPROGS = swresample
diff --git a/libswresample/aarch64/Makefile b/libswresample/aarch64/Makefile
new file mode 100644
index 0000000000..320ed67e82
--- /dev/null
+++ b/libswresample/aarch64/Makefile
@@ -0,0 +1,5 @@
+OBJS                             += aarch64/audio_convert_init.o
+
+OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
+
+NEON-OBJS                        += aarch64/audio_convert_neon.o
diff --git a/libswresample/aarch64/audio_convert_init.c b/libswresample/aarch64/audio_convert_init.c
new file mode 100644
index 0000000000..60e24adb1c
--- /dev/null
+++ b/libswresample/aarch64/audio_convert_init.c
@@ -0,0 +1,67 @@
+/*
+ * This file is part of libswresample.
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/aarch64/cpu.h"
+#include "libavutil/samplefmt.h"
+#include "libswresample/swresample_internal.h"
+#include "libswresample/audioconvert.h"
+
+void swri_oldapi_conv_flt_to_s16_neon(int16_t *dst, const float *src, int len);
+void swri_oldapi_conv_fltp_to_s16_2ch_neon(int16_t *dst, float *const *src, int len, int channels);
+void swri_oldapi_conv_fltp_to_s16_nch_neon(int16_t *dst, float *const *src, int len, int channels);
+
+static void conv_flt_to_s16_neon(uint8_t **dst, const uint8_t **src, int len){
+    swri_oldapi_conv_flt_to_s16_neon((int16_t*)*dst, (const float*)*src, len);
+}
+
+static void conv_fltp_to_s16_2ch_neon(uint8_t **dst, const uint8_t **src, int len){
+    swri_oldapi_conv_fltp_to_s16_2ch_neon((int16_t*)*dst, (float *const*)src, len, 2);
+}
+
+static void conv_fltp_to_s16_nch_neon(uint8_t **dst, const uint8_t **src, int len){
+    int channels;
+    for(channels=3; channels<SWR_CH_MAX && src[channels]; channels++)
+        ;
+    swri_oldapi_conv_fltp_to_s16_nch_neon((int16_t*)*dst, (float *const*)src, len, channels);
+}
+
+av_cold void swri_audio_convert_init_aarch64(struct AudioConvert *ac,
+                                       enum AVSampleFormat out_fmt,
+                                       enum AVSampleFormat in_fmt,
+                                       int channels)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    ac->simd_f= NULL;
+
+    if (have_neon(cpu_flags)) {
+        if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f = conv_flt_to_s16_neon;
+        if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels == 2)
+            ac->simd_f = conv_fltp_to_s16_2ch_neon;
+        if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels >  2)
+            ac->simd_f = conv_fltp_to_s16_nch_neon;
+        if(ac->simd_f)
+            ac->in_simd_align_mask = ac->out_simd_align_mask = 15;
+    }
+}
diff --git a/libswresample/aarch64/audio_convert_neon.S b/libswresample/aarch64/audio_convert_neon.S
new file mode 100644
index 0000000000..74feff448a
--- /dev/null
+++ b/libswresample/aarch64/audio_convert_neon.S
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ * Copyright (c) 2014 Janne Grunau <janne-libav@jannau.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/aarch64/asm.S"
+
+function swri_oldapi_conv_flt_to_s16_neon, export=1
+        subs            x2,  x2,  #8
+        ld1             {v0.4s}, [x1],  #16
+        fcvtzs          v4.4s,  v0.4s,  #31
+        ld1             {v1.4s}, [x1],  #16
+        fcvtzs          v5.4s,  v1.4s,  #31
+        b.eq            3f
+        ands            x12, x2,  #~15
+        b.eq            2f
+1:      subs            x12, x12, #16
+        sqrshrn         v4.4h,  v4.4s,  #16
+        ld1             {v2.4s}, [x1],  #16
+        fcvtzs          v6.4s,  v2.4s,  #31
+        sqrshrn2        v4.8h,  v5.4s,  #16
+        ld1             {v3.4s}, [x1],  #16
+        fcvtzs          v7.4s,  v3.4s,  #31
+        sqrshrn         v6.4h,  v6.4s,  #16
+        st1             {v4.8h}, [x0],  #16
+        sqrshrn2        v6.8h,  v7.4s,  #16
+        ld1             {v0.4s}, [x1],  #16
+        fcvtzs          v4.4s,  v0.4s,  #31
+        ld1             {v1.4s}, [x1],  #16
+        fcvtzs          v5.4s,  v1.4s,  #31
+        st1             {v6.8h}, [x0],  #16
+        b.ne            1b
+        ands            x2,  x2,  #15
+        b.eq            3f
+2:      ld1             {v2.4s}, [x1],  #16
+        sqrshrn         v4.4h,  v4.4s,  #16
+        fcvtzs          v6.4s,  v2.4s,  #31
+        ld1             {v3.4s}, [x1],  #16
+        sqrshrn2        v4.8h,  v5.4s,  #16
+        fcvtzs          v7.4s,  v3.4s,  #31
+        sqrshrn         v6.4h,  v6.4s,  #16
+        st1             {v4.8h}, [x0],  #16
+        sqrshrn2        v6.8h,  v7.4s,  #16
+        st1             {v6.8h}, [x0]
+        ret
+3:      sqrshrn         v4.4h,  v4.4s,  #16
+        sqrshrn2        v4.8h,  v5.4s,  #16
+        st1             {v4.8h}, [x0]
+        ret
+endfunc
+
+function swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1
+        ldp             x4,  x5,  [x1]
+        subs            x2,  x2,  #8
+        ld1             {v0.4s},  [x4], #16
+        fcvtzs          v4.4s,  v0.4s,  #31
+        ld1             {v1.4s},  [x4], #16
+        fcvtzs          v5.4s,  v1.4s,  #31
+        ld1             {v2.4s},  [x5], #16
+        fcvtzs          v6.4s,  v2.4s,  #31
+        ld1             {v3.4s},  [x5], #16
+        fcvtzs          v7.4s,  v3.4s,  #31
+        b.eq            3f
+        ands            x12, x2,  #~15
+        b.eq            2f
+1:      subs            x12, x12, #16
+        ld1             {v16.4s}, [x4], #16
+        fcvtzs          v20.4s, v16.4s, #31
+        sri             v6.4s,  v4.4s,  #16
+        ld1             {v17.4s}, [x4], #16
+        fcvtzs          v21.4s, v17.4s, #31
+        ld1             {v18.4s}, [x5], #16
+        fcvtzs          v22.4s, v18.4s, #31
+        ld1             {v19.4s}, [x5], #16
+        sri             v7.4s,  v5.4s,  #16
+        st1             {v6.4s},  [x0], #16
+        fcvtzs          v23.4s, v19.4s, #31
+        st1             {v7.4s},  [x0], #16
+        sri             v22.4s, v20.4s, #16
+        ld1             {v0.4s},  [x4], #16
+        sri             v23.4s, v21.4s, #16
+        st1             {v22.4s}, [x0], #16
+        fcvtzs          v4.4s,  v0.4s,  #31
+        ld1             {v1.4s},  [x4], #16
+        fcvtzs          v5.4s,  v1.4s,  #31
+        ld1             {v2.4s},  [x5], #16
+        fcvtzs          v6.4s,  v2.4s,  #31
+        ld1             {v3.4s},  [x5], #16
+        fcvtzs          v7.4s,  v3.4s,  #31
+        st1             {v23.4s}, [x0], #16
+        b.ne            1b
+        ands            x2,  x2,  #15
+        b.eq            3f
+2:      sri             v6.4s,  v4.4s,  #16
+        ld1             {v0.4s},  [x4], #16
+        fcvtzs          v0.4s,  v0.4s,  #31
+        ld1             {v1.4s},  [x4], #16
+        fcvtzs          v1.4s,  v1.4s,  #31
+        ld1             {v2.4s},  [x5], #16
+        fcvtzs          v2.4s,  v2.4s,  #31
+        sri             v7.4s,  v5.4s,  #16
+        ld1             {v3.4s},  [x5], #16
+        fcvtzs          v3.4s,  v3.4s,  #31
+        sri             v2.4s,  v0.4s,  #16
+        st1             {v6.4s,v7.4s},  [x0], #32
+        sri             v3.4s,  v1.4s,  #16
+        st1             {v2.4s,v3.4s},  [x0], #32
+        ret
+3:      sri             v6.4s,  v4.4s,  #16
+        sri             v7.4s,  v5.4s,  #16
+        st1             {v6.4s,v7.4s},  [x0]
+        ret
+endfunc
+
+function swri_oldapi_conv_fltp_to_s16_nch_neon, export=1
+        cmp             w3,  #2
+        b.eq            X(swri_oldapi_conv_fltp_to_s16_2ch_neon)
+        b.gt            1f
+        ldr             x1,  [x1]
+        b               X(swri_oldapi_conv_flt_to_s16_neon)
+1:
+        cmp             w3,  #4
+        lsl             x12, x3,  #1
+        b.lt            4f
+
+5:      // 4 channels
+        ldp             x4, x5, [x1], #16
+        ldp             x6, x7, [x1], #16
+        mov             w9,  w2
+        mov             x8,  x0
+        ld1             {v4.4s},        [x4], #16
+        fcvtzs          v4.4s,  v4.4s,  #31
+        ld1             {v5.4s},        [x5], #16
+        fcvtzs          v5.4s,  v5.4s,  #31
+        ld1             {v6.4s},        [x6], #16
+        fcvtzs          v6.4s, v6.4s, #31
+        ld1             {v7.4s},        [x7], #16
+        fcvtzs          v7.4s, v7.4s, #31
+6:
+        subs            w9,  w9,  #8
+        ld1             {v0.4s},        [x4], #16
+        fcvtzs          v0.4s,  v0.4s,  #31
+        sri             v5.4s,  v4.4s,  #16
+        ld1             {v1.4s},        [x5], #16
+        fcvtzs          v1.4s,  v1.4s,  #31
+        sri             v7.4s,  v6.4s,  #16
+        ld1             {v2.4s},        [x6], #16
+        fcvtzs          v2.4s,  v2.4s,  #31
+        zip1            v16.4s, v5.4s,  v7.4s
+        ld1             {v3.4s},        [x7], #16
+        fcvtzs          v3.4s,  v3.4s,  #31
+        zip2            v17.4s, v5.4s,  v7.4s
+        st1             {v16.d}[0],     [x8], x12
+        sri             v1.4s,  v0.4s,  #16
+        st1             {v16.d}[1],     [x8], x12
+        sri             v3.4s,  v2.4s,  #16
+        st1             {v17.d}[0],     [x8], x12
+        zip1            v18.4s, v1.4s,  v3.4s
+        st1             {v17.d}[1],     [x8], x12
+        zip2            v19.4s, v1.4s,  v3.4s
+        b.eq            7f
+        ld1             {v4.4s},        [x4], #16
+        fcvtzs          v4.4s,  v4.4s,  #31
+        st1             {v18.d}[0],     [x8], x12
+        ld1             {v5.4s},        [x5], #16
+        fcvtzs          v5.4s,  v5.4s,  #31
+        st1             {v18.d}[1],     [x8], x12
+        ld1             {v6.4s},    [x6], #16
+        fcvtzs          v6.4s, v6.4s, #31
+        st1             {v19.d}[0],     [x8], x12
+        ld1             {v7.4s},    [x7], #16
+        fcvtzs          v7.4s, v7.4s, #31
+        st1             {v19.d}[1],     [x8], x12
+        b               6b
+7:
+        st1             {v18.d}[0],     [x8], x12
+        st1             {v18.d}[1],     [x8], x12
+        st1             {v19.d}[0],     [x8], x12
+        st1             {v19.d}[1],     [x8], x12
+        subs            w3,  w3,  #4
+        b.eq            end
+        cmp             w3,  #4
+        add             x0,  x0,  #8
+        b.ge            5b
+
+4:      // 2 channels
+        cmp             w3,  #2
+        b.lt            4f
+        ldp             x4,  x5,  [x1], #16
+        mov             w9,  w2
+        mov             x8,  x0
+        tst             w9,  #8
+        ld1             {v4.4s},        [x4], #16
+        fcvtzs          v4.4s,  v4.4s,  #31
+        ld1             {v5.4s},        [x5], #16
+        fcvtzs          v5.4s,  v5.4s,  #31
+        ld1             {v6.4s},        [x4], #16
+        fcvtzs          v6.4s,  v6.4s,  #31
+        ld1             {v7.4s},        [x5], #16
+        fcvtzs          v7.4s,  v7.4s,  #31
+        b.eq            6f
+        subs            w9,  w9,  #8
+        b.eq            7f
+        sri             v5.4s,  v4.4s,  #16
+        ld1             {v4.4s},        [x4], #16
+        fcvtzs          v4.4s,  v4.4s,  #31
+        st1             {v5.s}[0],      [x8], x12
+        sri             v7.4s,  v6.4s,  #16
+        st1             {v5.s}[1],      [x8], x12
+        ld1             {v6.4s},        [x4], #16
+        fcvtzs          v6.4s,  v6.4s, #31
+        st1             {v5.s}[2],      [x8], x12
+        st1             {v5.s}[3],      [x8], x12
+        st1             {v7.s}[0],      [x8], x12
+        st1             {v7.s}[1],      [x8], x12
+        ld1             {v5.4s},        [x5], #16
+        fcvtzs          v5.4s,  v5.4s,  #31
+        st1             {v7.s}[2],      [x8], x12
+        st1             {v7.s}[3],      [x8], x12
+        ld1             {v7.4s},        [x5], #16
+        fcvtzs          v7.4s,  v7.4s,  #31
+6:
+        subs            w9,  w9,  #16
+        ld1             {v0.4s},        [x4], #16
+        sri             v5.4s,  v4.4s,  #16
+        fcvtzs          v0.4s,  v0.4s,  #31
+        ld1             {v1.4s},        [x5], #16
+        sri             v7.4s,  v6.4s,  #16
+        st1             {v5.s}[0],      [x8], x12
+        st1             {v5.s}[1],      [x8], x12
+        fcvtzs          v1.4s,  v1.4s,  #31
+        st1             {v5.s}[2],      [x8], x12
+        st1             {v5.s}[3],      [x8], x12
+        ld1             {v2.4s},        [x4], #16
+        st1             {v7.s}[0],      [x8], x12
+        fcvtzs          v2.4s,  v2.4s,  #31
+        st1             {v7.s}[1],      [x8], x12
+        ld1             {v3.4s},        [x5], #16
+        st1             {v7.s}[2],      [x8], x12
+        fcvtzs          v3.4s,  v3.4s,  #31
+        st1             {v7.s}[3],      [x8], x12
+        sri             v1.4s,  v0.4s,  #16
+        sri             v3.4s,  v2.4s,  #16
+        b.eq            6f
+        ld1             {v4.4s},        [x4], #16
+        st1             {v1.s}[0],      [x8], x12
+        fcvtzs          v4.4s,  v4.4s,  #31
+        st1             {v1.s}[1],      [x8], x12
+        ld1             {v5.4s},        [x5], #16
+        st1             {v1.s}[2],      [x8], x12
+        fcvtzs          v5.4s,  v5.4s,  #31
+        st1             {v1.s}[3],      [x8], x12
+        ld1             {v6.4s},        [x4], #16
+        st1             {v3.s}[0],      [x8], x12
+        fcvtzs          v6.4s,  v6.4s,  #31
+        st1             {v3.s}[1],      [x8], x12
+        ld1             {v7.4s},        [x5], #16
+        st1             {v3.s}[2],      [x8], x12
+        fcvtzs          v7.4s,  v7.4s,  #31
+        st1             {v3.s}[3],      [x8], x12
+        b.gt            6b
+6:
+        st1             {v1.s}[0],      [x8], x12
+        st1             {v1.s}[1],      [x8], x12
+        st1             {v1.s}[2],      [x8], x12
+        st1             {v1.s}[3],      [x8], x12
+        st1             {v3.s}[0],      [x8], x12
+        st1             {v3.s}[1],      [x8], x12
+        st1             {v3.s}[2],      [x8], x12
+        st1             {v3.s}[3],      [x8], x12
+        b               8f
+7:
+        sri             v5.4s,  v4.4s,  #16
+        sri             v7.4s,  v6.4s,  #16
+        st1             {v5.s}[0],      [x8], x12
+        st1             {v5.s}[1],      [x8], x12
+        st1             {v5.s}[2],      [x8], x12
+        st1             {v5.s}[3],      [x8], x12
+        st1             {v7.s}[0],      [x8], x12
+        st1             {v7.s}[1],      [x8], x12
+        st1             {v7.s}[2],      [x8], x12
+        st1             {v7.s}[3],      [x8], x12
+8:
+        subs            w3,  w3,  #2
+        add             x0,  x0,  #4
+        b.eq            end
+
+4:      // 1 channel
+        ldr             x4,  [x1]
+        tst             w2,  #8
+        mov             w9,  w2
+        mov             x5,  x0
+        ld1             {v0.4s},        [x4], #16
+        fcvtzs          v0.4s,  v0.4s,  #31
+        ld1             {v1.4s},        [x4], #16
+        fcvtzs          v1.4s,  v1.4s,  #31
+        b.ne            8f
+6:
+        subs            w9,  w9,  #16
+        ld1             {v2.4s},        [x4], #16
+        fcvtzs          v2.4s,  v2.4s,  #31
+        ld1             {v3.4s},        [x4], #16
+        fcvtzs          v3.4s,  v3.4s,  #31
+        st1             {v0.h}[1],      [x5], x12
+        st1             {v0.h}[3],      [x5], x12
+        st1             {v0.h}[5],      [x5], x12
+        st1             {v0.h}[7],      [x5], x12
+        st1             {v1.h}[1],      [x5], x12
+        st1             {v1.h}[3],      [x5], x12
+        st1             {v1.h}[5],      [x5], x12
+        st1             {v1.h}[7],      [x5], x12
+        b.eq            7f
+        ld1             {v0.4s},        [x4], #16
+        fcvtzs          v0.4s,  v0.4s,  #31
+        ld1             {v1.4s},        [x4], #16
+        fcvtzs          v1.4s,  v1.4s,  #31
+7:
+        st1             {v2.h}[1],      [x5], x12
+        st1             {v2.h}[3],      [x5], x12
+        st1             {v2.h}[5],      [x5], x12
+        st1             {v2.h}[7],      [x5], x12
+        st1             {v3.h}[1],      [x5], x12
+        st1             {v3.h}[3],      [x5], x12
+        st1             {v3.h}[5],      [x5], x12
+        st1             {v3.h}[7],      [x5], x12
+        b.gt            6b
+        ret
+8:
+        subs            w9,  w9,  #8
+        st1             {v0.h}[1],      [x5], x12
+        st1             {v0.h}[3],      [x5], x12
+        st1             {v0.h}[5],      [x5], x12
+        st1             {v0.h}[7],      [x5], x12
+        st1             {v1.h}[1],      [x5], x12
+        st1             {v1.h}[3],      [x5], x12
+        st1             {v1.h}[5],      [x5], x12
+        st1             {v1.h}[7],      [x5], x12
+        b.eq            end
+        ld1             {v0.4s},        [x4], #16
+        fcvtzs          v0.4s,  v0.4s,  #31
+        ld1             {v1.4s},        [x4], #16
+        fcvtzs          v1.4s,  v1.4s,  #31
+        b               6b
+end:
+        ret
+endfunc
diff --git a/libswresample/aarch64/neontest.c b/libswresample/aarch64/neontest.c
new file mode 100644
index 0000000000..85c71bf4c9
--- /dev/null
+++ b/libswresample/aarch64/neontest.c
@@ -0,0 +1,29 @@
+/*
+ * check NEON registers for clobbers
+ * Copyright (c) 2013 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libswresample/swresample.h"
+#include "libavutil/aarch64/neontest.h"
+
+wrap(swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
+                 const uint8_t **in , int in_count))
+{
+    testneonclobbers(swr_convert, s, out, out_count, in, in_count);
+}
diff --git a/libswresample/arm/Makefile b/libswresample/arm/Makefile
new file mode 100644
index 0000000000..53ab4626f4
--- /dev/null
+++ b/libswresample/arm/Makefile
@@ -0,0 +1,8 @@
+OBJS      += arm/audio_convert_init.o \
+             arm/resample_init.o
+
+
+OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
+
+NEON-OBJS += arm/audio_convert_neon.o \
+             arm/resample.o
diff --git a/libswresample/arm/audio_convert_init.c b/libswresample/arm/audio_convert_init.c
new file mode 100644
index 0000000000..ec9e62ede7
--- /dev/null
+++ b/libswresample/arm/audio_convert_init.c
@@ -0,0 +1,67 @@
+/*
+ * This file is part of libswresample.
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavutil/samplefmt.h"
+#include "libswresample/swresample_internal.h"
+#include "libswresample/audioconvert.h"
+
+void swri_oldapi_conv_flt_to_s16_neon(int16_t *dst, const float *src, int len);
+void swri_oldapi_conv_fltp_to_s16_2ch_neon(int16_t *dst, float *const *src, int len, int channels);
+void swri_oldapi_conv_fltp_to_s16_nch_neon(int16_t *dst, float *const *src, int len, int channels);
+
+static void conv_flt_to_s16_neon(uint8_t **dst, const uint8_t **src, int len){
+    swri_oldapi_conv_flt_to_s16_neon((int16_t*)*dst, (const float*)*src, len);
+}
+
+static void conv_fltp_to_s16_2ch_neon(uint8_t **dst, const uint8_t **src, int len){
+    swri_oldapi_conv_fltp_to_s16_2ch_neon((int16_t*)*dst, (float *const*)src, len, 2);
+}
+
+static void conv_fltp_to_s16_nch_neon(uint8_t **dst, const uint8_t **src, int len){
+    int channels;
+    for(channels=3; channels<SWR_CH_MAX && src[channels]; channels++)
+        ;
+    swri_oldapi_conv_fltp_to_s16_nch_neon((int16_t*)*dst, (float *const*)src, len, channels);
+}
+
+av_cold void swri_audio_convert_init_arm(struct AudioConvert *ac,
+                                       enum AVSampleFormat out_fmt,
+                                       enum AVSampleFormat in_fmt,
+                                       int channels)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    ac->simd_f= NULL;
+
+    if (have_neon(cpu_flags)) {
+        if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f = conv_flt_to_s16_neon;
+        if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels == 2)
+            ac->simd_f = conv_fltp_to_s16_2ch_neon;
+        if(out_fmt == AV_SAMPLE_FMT_S16 && in_fmt == AV_SAMPLE_FMT_FLTP && channels >  2)
+            ac->simd_f = conv_fltp_to_s16_nch_neon;
+        if(ac->simd_f)
+            ac->in_simd_align_mask = ac->out_simd_align_mask = 15;
+    }
+}
diff --git a/libswresample/arm/audio_convert_neon.S b/libswresample/arm/audio_convert_neon.S
new file mode 100644
index 0000000000..1f88316dde
--- /dev/null
+++ b/libswresample/arm/audio_convert_neon.S
@@ -0,0 +1,363 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of libswresample.
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/arm/asm.S"
+
+function swri_oldapi_conv_flt_to_s16_neon, export=1
+        subs            r2,  r2,  #8
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q8,  q0,  #31
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q9,  q1,  #31
+        beq             3f
+        bics            r12, r2,  #15
+        beq             2f
+1:      subs            r12, r12, #16
+        vqrshrn.s32     d4,  q8,  #16
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vqrshrn.s32     d5,  q9,  #16
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vqrshrn.s32     d6,  q0,  #16
+        vst1.16         {q2},     [r0,:128]!
+        vqrshrn.s32     d7,  q1,  #16
+        vld1.32         {q8},     [r1,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vld1.32         {q9},     [r1,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vst1.16         {q3},     [r0,:128]!
+        bne             1b
+        ands            r2,  r2,  #15
+        beq             3f
+2:      vld1.32         {q0},     [r1,:128]!
+        vqrshrn.s32     d4,  q8,  #16
+        vcvt.s32.f32    q0,  q0,  #31
+        vld1.32         {q1},     [r1,:128]!
+        vqrshrn.s32     d5,  q9,  #16
+        vcvt.s32.f32    q1,  q1,  #31
+        vqrshrn.s32     d6,  q0,  #16
+        vst1.16         {q2},     [r0,:128]!
+        vqrshrn.s32     d7,  q1,  #16
+        vst1.16         {q3},     [r0,:128]!
+        bx              lr
+3:      vqrshrn.s32     d4,  q8,  #16
+        vqrshrn.s32     d5,  q9,  #16
+        vst1.16         {q2},     [r0,:128]!
+        bx              lr
+endfunc
+
+function swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1
+        ldm             r1,  {r1, r3}
+        subs            r2,  r2,  #8
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q8,  q0,  #31
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q9,  q1,  #31
+        vld1.32         {q10},    [r3,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vld1.32         {q11},    [r3,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        beq             3f
+        bics            r12, r2,  #15
+        beq             2f
+1:      subs            r12, r12, #16
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vsri.32         q10, q8,  #16
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vld1.32         {q12},    [r3,:128]!
+        vcvt.s32.f32    q12, q12, #31
+        vld1.32         {q13},    [r3,:128]!
+        vsri.32         q11, q9,  #16
+        vst1.16         {q10},    [r0,:128]!
+        vcvt.s32.f32    q13, q13, #31
+        vst1.16         {q11},    [r0,:128]!
+        vsri.32         q12, q0,  #16
+        vld1.32         {q8},     [r1,:128]!
+        vsri.32         q13, q1,  #16
+        vst1.16         {q12},    [r0,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vld1.32         {q9},     [r1,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vld1.32         {q10},    [r3,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vld1.32         {q11},    [r3,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        vst1.16         {q13},    [r0,:128]!
+        bne             1b
+        ands            r2,  r2,  #15
+        beq             3f
+2:      vsri.32         q10, q8,  #16
+        vld1.32         {q0},     [r1,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vld1.32         {q1},     [r1,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vld1.32         {q12},    [r3,:128]!
+        vcvt.s32.f32    q12, q12, #31
+        vsri.32         q11, q9,  #16
+        vld1.32         {q13},    [r3,:128]!
+        vcvt.s32.f32    q13, q13, #31
+        vst1.16         {q10},    [r0,:128]!
+        vsri.32         q12, q0,  #16
+        vst1.16         {q11},    [r0,:128]!
+        vsri.32         q13, q1,  #16
+        vst1.16         {q12-q13},[r0,:128]!
+        bx              lr
+3:      vsri.32         q10, q8,  #16
+        vsri.32         q11, q9,  #16
+        vst1.16         {q10-q11},[r0,:128]!
+        bx              lr
+endfunc
+
+function swri_oldapi_conv_fltp_to_s16_nch_neon, export=1
+        cmp             r3,  #2
+        itt             lt
+        ldrlt           r1,  [r1]
+        blt             X(swri_oldapi_conv_flt_to_s16_neon)
+        beq             X(swri_oldapi_conv_fltp_to_s16_2ch_neon)
+
+        push            {r4-r8, lr}
+        cmp             r3,  #4
+        lsl             r12, r3,  #1
+        blt             4f
+
+        @ 4 channels
+5:      ldm             r1!, {r4-r7}
+        mov             lr,  r2
+        mov             r8,  r0
+        vld1.32         {q8},     [r4,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vld1.32         {q9},     [r5,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vld1.32         {q10},    [r6,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vld1.32         {q11},    [r7,:128]!
+        vcvt.s32.f32    q11, q11, #31
+6:      subs            lr,  lr,  #8
+        vld1.32         {q0},     [r4,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vsri.32         q9,  q8,  #16
+        vld1.32         {q1},     [r5,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vsri.32         q11, q10, #16
+        vld1.32         {q2},     [r6,:128]!
+        vcvt.s32.f32    q2,  q2,  #31
+        vzip.32         d18, d22
+        vld1.32         {q3},     [r7,:128]!
+        vcvt.s32.f32    q3,  q3,  #31
+        vzip.32         d19, d23
+        vst1.16         {d18},    [r8], r12
+        vsri.32         q1,  q0,  #16
+        vst1.16         {d22},    [r8], r12
+        vsri.32         q3,  q2,  #16
+        vst1.16         {d19},    [r8], r12
+        vzip.32         d2,  d6
+        vst1.16         {d23},    [r8], r12
+        vzip.32         d3,  d7
+        beq             7f
+        vld1.32         {q8},     [r4,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vst1.16         {d2},     [r8], r12
+        vld1.32         {q9},     [r5,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vst1.16         {d6},     [r8], r12
+        vld1.32         {q10},    [r6,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vst1.16         {d3},     [r8], r12
+        vld1.32         {q11},    [r7,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        vst1.16         {d7},     [r8], r12
+        b               6b
+7:      vst1.16         {d2},     [r8], r12
+        vst1.16         {d6},     [r8], r12
+        vst1.16         {d3},     [r8], r12
+        vst1.16         {d7},     [r8], r12
+        subs            r3,  r3,  #4
+        it              eq
+        popeq           {r4-r8, pc}
+        cmp             r3,  #4
+        add             r0,  r0,  #8
+        bge             5b
+
+        @ 2 channels
+4:      cmp             r3,  #2
+        blt             4f
+        ldm             r1!, {r4-r5}
+        mov             lr,  r2
+        mov             r8,  r0
+        tst             lr,  #8
+        vld1.32         {q8},     [r4,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vld1.32         {q9},     [r5,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vld1.32         {q10},    [r4,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vld1.32         {q11},    [r5,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        beq             6f
+        subs            lr,  lr,  #8
+        beq             7f
+        vsri.32         d18, d16, #16
+        vsri.32         d19, d17, #16
+        vld1.32         {q8},     [r4,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vst1.32         {d18[0]}, [r8], r12
+        vsri.32         d22, d20, #16
+        vst1.32         {d18[1]}, [r8], r12
+        vsri.32         d23, d21, #16
+        vst1.32         {d19[0]}, [r8], r12
+        vst1.32         {d19[1]}, [r8], r12
+        vld1.32         {q9},     [r5,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vst1.32         {d22[0]}, [r8], r12
+        vst1.32         {d22[1]}, [r8], r12
+        vld1.32         {q10},    [r4,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vst1.32         {d23[0]}, [r8], r12
+        vst1.32         {d23[1]}, [r8], r12
+        vld1.32         {q11},    [r5,:128]!
+        vcvt.s32.f32    q11, q11, #31
+6:      subs            lr,  lr,  #16
+        vld1.32         {q0},     [r4,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vsri.32         d18, d16, #16
+        vld1.32         {q1},     [r5,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        vsri.32         d19, d17, #16
+        vld1.32         {q2},     [r4,:128]!
+        vcvt.s32.f32    q2,  q2,  #31
+        vld1.32         {q3},     [r5,:128]!
+        vcvt.s32.f32    q3,  q3,  #31
+        vst1.32         {d18[0]}, [r8], r12
+        vsri.32         d22, d20, #16
+        vst1.32         {d18[1]}, [r8], r12
+        vsri.32         d23, d21, #16
+        vst1.32         {d19[0]}, [r8], r12
+        vsri.32         d2,  d0,  #16
+        vst1.32         {d19[1]}, [r8], r12
+        vsri.32         d3,  d1,  #16
+        vst1.32         {d22[0]}, [r8], r12
+        vsri.32         d6,  d4,  #16
+        vst1.32         {d22[1]}, [r8], r12
+        vsri.32         d7,  d5,  #16
+        vst1.32         {d23[0]}, [r8], r12
+        vst1.32         {d23[1]}, [r8], r12
+        beq             6f
+        vld1.32         {q8},     [r4,:128]!
+        vcvt.s32.f32    q8,  q8,  #31
+        vst1.32         {d2[0]},  [r8], r12
+        vst1.32         {d2[1]},  [r8], r12
+        vld1.32         {q9},     [r5,:128]!
+        vcvt.s32.f32    q9,  q9,  #31
+        vst1.32         {d3[0]},  [r8], r12
+        vst1.32         {d3[1]},  [r8], r12
+        vld1.32         {q10},    [r4,:128]!
+        vcvt.s32.f32    q10, q10, #31
+        vst1.32         {d6[0]},  [r8], r12
+        vst1.32         {d6[1]},  [r8], r12
+        vld1.32         {q11},    [r5,:128]!
+        vcvt.s32.f32    q11, q11, #31
+        vst1.32         {d7[0]},  [r8], r12
+        vst1.32         {d7[1]},  [r8], r12
+        bgt             6b
+6:      vst1.32         {d2[0]},  [r8], r12
+        vst1.32         {d2[1]},  [r8], r12
+        vst1.32         {d3[0]},  [r8], r12
+        vst1.32         {d3[1]},  [r8], r12
+        vst1.32         {d6[0]},  [r8], r12
+        vst1.32         {d6[1]},  [r8], r12
+        vst1.32         {d7[0]},  [r8], r12
+        vst1.32         {d7[1]},  [r8], r12
+        b               8f
+7:      vsri.32         d18, d16, #16
+        vsri.32         d19, d17, #16
+        vst1.32         {d18[0]}, [r8], r12
+        vsri.32         d22, d20, #16
+        vst1.32         {d18[1]}, [r8], r12
+        vsri.32         d23, d21, #16
+        vst1.32         {d19[0]}, [r8], r12
+        vst1.32         {d19[1]}, [r8], r12
+        vst1.32         {d22[0]}, [r8], r12
+        vst1.32         {d22[1]}, [r8], r12
+        vst1.32         {d23[0]}, [r8], r12
+        vst1.32         {d23[1]}, [r8], r12
+8:      subs            r3,  r3,  #2
+        add             r0,  r0,  #4
+        it              eq
+        popeq           {r4-r8, pc}
+
+        @ 1 channel
+4:      ldr             r4,  [r1]
+        tst             r2,  #8
+        mov             lr,  r2
+        mov             r5,  r0
+        vld1.32         {q0},     [r4,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vld1.32         {q1},     [r4,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        bne             8f
+6:      subs            lr,  lr,  #16
+        vld1.32         {q2},     [r4,:128]!
+        vcvt.s32.f32    q2,  q2,  #31
+        vld1.32         {q3},     [r4,:128]!
+        vcvt.s32.f32    q3,  q3,  #31
+        vst1.16         {d0[1]},  [r5,:16], r12
+        vst1.16         {d0[3]},  [r5,:16], r12
+        vst1.16         {d1[1]},  [r5,:16], r12
+        vst1.16         {d1[3]},  [r5,:16], r12
+        vst1.16         {d2[1]},  [r5,:16], r12
+        vst1.16         {d2[3]},  [r5,:16], r12
+        vst1.16         {d3[1]},  [r5,:16], r12
+        vst1.16         {d3[3]},  [r5,:16], r12
+        beq             7f
+        vld1.32         {q0},     [r4,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vld1.32         {q1},     [r4,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+7:      vst1.16         {d4[1]},  [r5,:16], r12
+        vst1.16         {d4[3]},  [r5,:16], r12
+        vst1.16         {d5[1]},  [r5,:16], r12
+        vst1.16         {d5[3]},  [r5,:16], r12
+        vst1.16         {d6[1]},  [r5,:16], r12
+        vst1.16         {d6[3]},  [r5,:16], r12
+        vst1.16         {d7[1]},  [r5,:16], r12
+        vst1.16         {d7[3]},  [r5,:16], r12
+        bgt             6b
+        pop             {r4-r8, pc}
+8:      subs            lr,  lr,  #8
+        vst1.16         {d0[1]},  [r5,:16], r12
+        vst1.16         {d0[3]},  [r5,:16], r12
+        vst1.16         {d1[1]},  [r5,:16], r12
+        vst1.16         {d1[3]},  [r5,:16], r12
+        vst1.16         {d2[1]},  [r5,:16], r12
+        vst1.16         {d2[3]},  [r5,:16], r12
+        vst1.16         {d3[1]},  [r5,:16], r12
+        vst1.16         {d3[3]},  [r5,:16], r12
+        it              eq
+        popeq           {r4-r8, pc}
+        vld1.32         {q0},     [r4,:128]!
+        vcvt.s32.f32    q0,  q0,  #31
+        vld1.32         {q1},     [r4,:128]!
+        vcvt.s32.f32    q1,  q1,  #31
+        b               6b
+endfunc
diff --git a/libswresample/arm/neontest.c b/libswresample/arm/neontest.c
new file mode 100644
index 0000000000..2abbbc2367
--- /dev/null
+++ b/libswresample/arm/neontest.c
@@ -0,0 +1,29 @@
+/*
+ * check NEON registers for clobbers
+ * Copyright (c) 2013 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libswresample/swresample.h"
+#include "libavutil/arm/neontest.h"
+
+wrap(swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
+                 const uint8_t **in , int in_count))
+{
+    testneonclobbers(swr_convert, s, out, out_count, in, in_count);
+}
diff --git a/libswresample/arm/resample.S b/libswresample/arm/resample.S
new file mode 100644
index 0000000000..c231301b2b
--- /dev/null
+++ b/libswresample/arm/resample.S
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Matthieu Bouron <matthieu.bouron stupeflix.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+function ff_resample_common_apply_filter_x4_float_neon, export=1
+    vmov.f32            q0, #0.0                                       @ accumulator
+1:  vld1.32             {q1}, [r1]!                                    @ src
+    vld1.32             {q2}, [r2]!                                    @ filter
+    vmla.f32            q0, q1, q2                                     @ accumulator += src + {0..3} * filter + {0..3}
+    subs                r3, #4                                         @ filter_length -= 4
+    bgt                 1b                                             @ loop until filter_length
+    vpadd.f32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
+    vpadd.f32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
+    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
+    mov pc, lr
+endfunc
+
+function ff_resample_common_apply_filter_x8_float_neon, export=1
+    vmov.f32            q0, #0.0                                       @ accumulator
+1:  vld1.32             {q1}, [r1]!                                    @ src
+    vld1.32             {q2}, [r2]!                                    @ filter
+    vld1.32             {q8}, [r1]!                                    @ src
+    vld1.32             {q9}, [r2]!                                    @ filter
+    vmla.f32            q0, q1, q2                                     @ accumulator += src + {0..3} * filter + {0..3}
+    vmla.f32            q0, q8, q9                                     @ accumulator += src + {4..7} * filter + {4..7}
+    subs                r3, #8                                         @ filter_length -= 8
+    bgt                 1b                                             @ loop until filter_length
+    vpadd.f32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
+    vpadd.f32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
+    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
+    mov pc, lr
+endfunc
+
+function ff_resample_common_apply_filter_x4_s16_neon, export=1
+    vmov.s32            q0, #0                                         @ accumulator
+1:  vld1.16             {d2}, [r1]!                                    @ src
+    vld1.16             {d4}, [r2]!                                    @ filter
+    vmlal.s16           q0, d2, d4                                     @ accumulator += src + {0..3} * filter + {0..3}
+    subs                r3, #4                                         @ filter_length -= 4
+    bgt                 1b                                             @ loop until filter_length
+    vpadd.s32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
+    vpadd.s32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
+    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
+    mov pc, lr
+endfunc
+
+function ff_resample_common_apply_filter_x8_s16_neon, export=1
+    vmov.s32            q0, #0                                         @ accumulator
+1:  vld1.16             {q1}, [r1]!                                    @ src
+    vld1.16             {q2}, [r2]!                                    @ filter
+    vmlal.s16           q0, d2, d4                                     @ accumulator += src + {0..3} * filter + {0..3}
+    vmlal.s16           q0, d3, d5                                     @ accumulator += src + {4..7} * filter + {4..7}
+    subs                r3, #8                                         @ filter_length -= 8
+    bgt                 1b                                             @ loop until filter_length
+    vpadd.s32           d0, d0, d1                                     @ pair adding of the 4x32-bit accumulated values
+    vpadd.s32           d0, d0, d0                                     @ pair adding of the 4x32-bit accumulator values
+    vst1.32             {d0[0]}, [r0]                                  @ write accumulator
+    mov pc, lr
+endfunc
diff --git a/libswresample/arm/resample_init.c b/libswresample/arm/resample_init.c
new file mode 100644
index 0000000000..003fafd29b
--- /dev/null
+++ b/libswresample/arm/resample_init.c
@@ -0,0 +1,122 @@
+/*
+ * Audio resampling
+ *
+ * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "libavutil/cpu.h"
+#include "libavutil/avassert.h"
+
+#include "libavutil/arm/cpu.h"
+#include "libswresample/resample.h"
+
+#define DECLARE_RESAMPLE_COMMON_TEMPLATE(TYPE, DELEM, FELEM, FELEM2, OUT)                         \
+                                                                                                  \
+void ff_resample_common_apply_filter_x4_##TYPE##_neon(FELEM2 *acc, const DELEM *src,              \
+                                                      const FELEM *filter, int length);           \
+                                                                                                  \
+void ff_resample_common_apply_filter_x8_##TYPE##_neon(FELEM2 *acc, const DELEM *src,              \
+                                                      const FELEM *filter, int length);           \
+                                                                                                  \
+static int ff_resample_common_##TYPE##_neon(ResampleContext *c, void *dest, const void *source,   \
+                                            int n, int update_ctx)                                \
+{                                                                                                 \
+    DELEM *dst = dest;                                                                            \
+    const DELEM *src = source;                                                                    \
+    int dst_index;                                                                                \
+    int index= c->index;                                                                          \
+    int frac= c->frac;                                                                            \
+    int sample_index = 0;                                                                         \
+    int x4_aligned_filter_length = c->filter_length & ~3;                                         \
+    int x8_aligned_filter_length = c->filter_length & ~7;                                         \
+                                                                                                  \
+    while (index >= c->phase_count) {                                                             \
+        sample_index++;                                                                           \
+        index -= c->phase_count;                                                                  \
+    }                                                                                             \
+                                                                                                  \
+    for (dst_index = 0; dst_index < n; dst_index++) {                                             \
+        FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;                     \
+                                                                                                  \
+        FELEM2 val=0;                                                                             \
+        int i = 0;                                                                                \
+        if (x8_aligned_filter_length >= 8) {                                                      \
+            ff_resample_common_apply_filter_x8_##TYPE##_neon(&val, &src[sample_index],            \
+                                                             filter, x8_aligned_filter_length);   \
+            i += x8_aligned_filter_length;                                                        \
+                                                                                                  \
+        } else if (x4_aligned_filter_length >= 4) {                                               \
+            ff_resample_common_apply_filter_x4_##TYPE##_neon(&val, &src[sample_index],            \
+                                                             filter, x4_aligned_filter_length);   \
+            i += x4_aligned_filter_length;                                                        \
+        }                                                                                         \
+        for (; i < c->filter_length; i++) {                                                       \
+            val += src[sample_index + i] * (FELEM2)filter[i];                                     \
+        }                                                                                         \
+        OUT(dst[dst_index], val);                                                                 \
+                                                                                                  \
+        frac  += c->dst_incr_mod;                                                                 \
+        index += c->dst_incr_div;                                                                 \
+        if (frac >= c->src_incr) {                                                                \
+            frac -= c->src_incr;                                                                  \
+            index++;                                                                              \
+        }                                                                                         \
+                                                                                                  \
+        while (index >= c->phase_count) {                                                         \
+            sample_index++;                                                                       \
+            index -= c->phase_count;                                                              \
+        }                                                                                         \
+    }                                                                                             \
+                                                                                                  \
+    if(update_ctx){                                                                               \
+        c->frac= frac;                                                                            \
+        c->index= index;                                                                          \
+    }                                                                                             \
+                                                                                                  \
+    return sample_index;                                                                          \
+}                                                                                                 \
+
+#define OUT(d, v) d = v
+DECLARE_RESAMPLE_COMMON_TEMPLATE(float, float, float, float, OUT)
+#undef OUT
+
+#define OUT(d, v) (v) = ((v) + (1<<(14)))>>15; (d) = av_clip_int16(v)
+DECLARE_RESAMPLE_COMMON_TEMPLATE(s16, int16_t, int16_t, int32_t, OUT)
+#undef OUT
+
+av_cold void swri_resample_dsp_arm_init(ResampleContext *c)
+{
+    int cpu_flags = av_get_cpu_flags();
+
+    if (!have_neon(cpu_flags))
+        return;
+
+    switch(c->format) {
+    case AV_SAMPLE_FMT_FLTP:
+        if (!c->linear)
+            c->dsp.resample = ff_resample_common_float_neon;
+        break;
+    case AV_SAMPLE_FMT_S16P:
+        if (!c->linear)
+            c->dsp.resample = ff_resample_common_s16_neon;
+        break;
+    }
+}
diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c
new file mode 100644
index 0000000000..58b0bf33e9
--- /dev/null
+++ b/libswresample/audioconvert.c
@@ -0,0 +1,225 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio conversion
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/avassert.h"
+#include "libavutil/libm.h"
+#include "libavutil/samplefmt.h"
+#include "audioconvert.h"
+
+
+#define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt
+
+//FIXME rounding ?
+#define CONV_FUNC(ofmt, otype, ifmt, expr)\
+static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end)\
+{\
+    uint8_t *end2 = end - 3*os;\
+    while(po < end2){\
+        *(otype*)po = expr; pi += is; po += os;\
+        *(otype*)po = expr; pi += is; po += os;\
+        *(otype*)po = expr; pi += is; po += os;\
+        *(otype*)po = expr; pi += is; po += os;\
+    }\
+    while(po < end){\
+        *(otype*)po = expr; pi += is; po += os;\
+    }\
+}
+
+//FIXME put things below under ifdefs so we do not waste space for cases no codec will need
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<8)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80U)<<24)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0f/ (1<<7)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi<<16)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0f/ (1<<15)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0f/ (1U<<31)))
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float*)pi * (1U<<31))))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_FLT, *(const float*)pi)
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_FLT, *(const float*)pi)
+CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(  lrint(*(const double*)pi * (1<<7)) + 0x80))
+CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(  lrint(*(const double*)pi * (1<<15))))
+CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double*)pi * (1U<<31))))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_DBL, *(const double*)pi)
+CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_DBL, *(const double*)pi)
+
+#define FMT_PAIR_FUNC(out, in) [(out) + AV_SAMPLE_FMT_NB*(in)] = CONV_FUNC_NAME(out, in)
+
+static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAMPLE_FMT_NB] = {
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_U8 ),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8 ),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8 ),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8 ),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8 ),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_S16),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_S32),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_FLT),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8 , AV_SAMPLE_FMT_DBL),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL),
+    FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL),
+};
+
+static void cpy1(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, len);
+}
+static void cpy2(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 2*len);
+}
+static void cpy4(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 4*len);
+}
+static void cpy8(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 8*len);
+}
+
+AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt,
+                                       enum AVSampleFormat in_fmt,
+                                       int channels, const int *ch_map,
+                                       int flags)
+{
+    AudioConvert *ctx;
+    conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)];
+
+    if (!f)
+        return NULL;
+    ctx = av_mallocz(sizeof(*ctx));
+    if (!ctx)
+        return NULL;
+
+    if(channels == 1){
+         in_fmt = av_get_planar_sample_fmt( in_fmt);
+        out_fmt = av_get_planar_sample_fmt(out_fmt);
+    }
+
+    ctx->channels = channels;
+    ctx->conv_f   = f;
+    ctx->ch_map   = ch_map;
+    if (in_fmt == AV_SAMPLE_FMT_U8 || in_fmt == AV_SAMPLE_FMT_U8P)
+        memset(ctx->silence, 0x80, sizeof(ctx->silence));
+
+    if(out_fmt == in_fmt && !ch_map) {
+        switch(av_get_bytes_per_sample(in_fmt)){
+            case 1:ctx->simd_f = cpy1; break;
+            case 2:ctx->simd_f = cpy2; break;
+            case 4:ctx->simd_f = cpy4; break;
+            case 8:ctx->simd_f = cpy8; break;
+        }
+    }
+
+    if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);
+    if(ARCH_ARM)              swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);
+    if(ARCH_AARCH64)          swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);
+
+    return ctx;
+}
+
+void swri_audio_convert_free(AudioConvert **ctx)
+{
+    av_freep(ctx);
+}
+
+int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len)
+{
+    int ch;
+    int off=0;
+    const int os= (out->planar ? 1 :out->ch_count) *out->bps;
+    unsigned misaligned = 0;
+
+    av_assert0(ctx->channels == out->ch_count);
+
+    if (ctx->in_simd_align_mask) {
+        int planes = in->planar ? in->ch_count : 1;
+        unsigned m = 0;
+        for (ch = 0; ch < planes; ch++)
+            m |= (intptr_t)in->ch[ch];
+        misaligned |= m & ctx->in_simd_align_mask;
+    }
+    if (ctx->out_simd_align_mask) {
+        int planes = out->planar ? out->ch_count : 1;
+        unsigned m = 0;
+        for (ch = 0; ch < planes; ch++)
+            m |= (intptr_t)out->ch[ch];
+        misaligned |= m & ctx->out_simd_align_mask;
+    }
+
+    //FIXME optimize common cases
+
+    if(ctx->simd_f && !ctx->ch_map && !misaligned){
+        off = len&~15;
+        av_assert1(off>=0);
+        av_assert1(off<=len);
+        av_assert2(ctx->channels == SWR_CH_MAX || !in->ch[ctx->channels]);
+        if(off>0){
+            if(out->planar == in->planar){
+                int planes = out->planar ? out->ch_count : 1;
+                for(ch=0; ch<planes; ch++){
+                    ctx->simd_f(out->ch+ch, (const uint8_t **)in->ch+ch, off * (out->planar ? 1 :out->ch_count));
+                }
+            }else{
+                ctx->simd_f(out->ch, (const uint8_t **)in->ch, off);
+            }
+        }
+        if(off == len)
+            return 0;
+    }
+
+    for(ch=0; ch<ctx->channels; ch++){
+        const int ich= ctx->ch_map ? ctx->ch_map[ch] : ch;
+        const int is= ich < 0 ? 0 : (in->planar ? 1 : in->ch_count) * in->bps;
+        const uint8_t *pi= ich < 0 ? ctx->silence : in->ch[ich];
+        uint8_t       *po= out->ch[ch];
+        uint8_t *end= po + os*len;
+        if(!po)
+            continue;
+        ctx->conv_f(po+off*os, pi+off*is, is, os, end);
+    }
+    return 0;
+}
diff --git a/libswresample/audioconvert.h b/libswresample/audioconvert.h
new file mode 100644
index 0000000000..1ca30c2a65
--- /dev/null
+++ b/libswresample/audioconvert.h
@@ -0,0 +1,78 @@
+/*
+ * audio conversion
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2008 Peter Ross
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWRESAMPLE_AUDIOCONVERT_H
+#define SWRESAMPLE_AUDIOCONVERT_H
+
+/**
+ * @file
+ * Audio format conversion routines
+ */
+
+
+#include "swresample_internal.h"
+#include "libavutil/cpu.h"
+
+
+typedef void (conv_func_type)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end);
+typedef void (simd_func_type)(uint8_t **dst, const uint8_t **src, int len);
+
+typedef struct AudioConvert {
+    int channels;
+    int  in_simd_align_mask;
+    int out_simd_align_mask;
+    conv_func_type *conv_f;
+    simd_func_type *simd_f;
+    const int *ch_map;
+    uint8_t silence[8]; ///< silence input sample
+}AudioConvert;
+
+/**
+ * Create an audio sample format converter context
+ * @param out_fmt Output sample format
+ * @param in_fmt Input sample format
+ * @param channels Number of channels
+ * @param flags See AV_CPU_FLAG_xx
+ * @param ch_map list of the channels id to pick from the source stream, NULL
+ *               if all channels must be selected
+ * @return NULL on error
+ */
+AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt,
+                                       enum AVSampleFormat in_fmt,
+                                       int channels, const int *ch_map,
+                                       int flags);
+
+/**
+ * Free audio sample format converter context.
+ * and set the pointer to NULL
+ */
+void swri_audio_convert_free(AudioConvert **ctx);
+
+/**
+ * Convert between audio sample formats
+ * @param[in] out array of output buffers for each channel. set to NULL to ignore processing of the given channel.
+ * @param[in] in array of input buffers for each channel
+ * @param len length of audio frame size (measured in samples)
+ */
+int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len);
+
+#endif /* SWRESAMPLE_AUDIOCONVERT_H */
diff --git a/libswresample/dither.c b/libswresample/dither.c
new file mode 100644
index 0000000000..08c793d4cf
--- /dev/null
+++ b/libswresample/dither.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2012-2013 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "swresample_internal.h"
+
+#include "noise_shaping_data.c"
+
+int swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat noise_fmt) {
+    double scale = s->dither.noise_scale;
+#define TMP_EXTRA 2
+    double *tmp = av_malloc_array(len + TMP_EXTRA, sizeof(double));
+    int i;
+
+    if (!tmp)
+        return AVERROR(ENOMEM);
+
+    for(i=0; i<len + TMP_EXTRA; i++){
+        double v;
+        seed = seed* 1664525 + 1013904223;
+
+        switch(s->dither.method){
+            case SWR_DITHER_RECTANGULAR: v= ((double)seed) / UINT_MAX - 0.5; break;
+            default:
+                av_assert0(s->dither.method < SWR_DITHER_NB);
+                v = ((double)seed) / UINT_MAX;
+                seed = seed*1664525 + 1013904223;
+                v-= ((double)seed) / UINT_MAX;
+                break;
+        }
+        tmp[i] = v;
+    }
+
+    for(i=0; i<len; i++){
+        double v;
+
+        switch(s->dither.method){
+            default:
+                av_assert0(s->dither.method < SWR_DITHER_NB);
+                v = tmp[i];
+                break;
+            case SWR_DITHER_TRIANGULAR_HIGHPASS :
+                v = (- tmp[i] + 2*tmp[i+1] - tmp[i+2]) / sqrt(6);
+                break;
+        }
+
+        v*= scale;
+
+        switch(noise_fmt){
+            case AV_SAMPLE_FMT_S16P: ((int16_t*)dst)[i] = v; break;
+            case AV_SAMPLE_FMT_S32P: ((int32_t*)dst)[i] = v; break;
+            case AV_SAMPLE_FMT_FLTP: ((float  *)dst)[i] = v; break;
+            case AV_SAMPLE_FMT_DBLP: ((double *)dst)[i] = v; break;
+            default: av_assert0(0);
+        }
+    }
+
+    av_free(tmp);
+    return 0;
+}
+
+av_cold int swri_dither_init(SwrContext *s, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt)
+{
+    int i;
+    double scale = 0;
+
+    if (s->dither.method > SWR_DITHER_TRIANGULAR_HIGHPASS && s->dither.method <= SWR_DITHER_NS)
+        return AVERROR(EINVAL);
+
+    out_fmt = av_get_packed_sample_fmt(out_fmt);
+    in_fmt  = av_get_packed_sample_fmt( in_fmt);
+
+    if(in_fmt == AV_SAMPLE_FMT_FLT || in_fmt == AV_SAMPLE_FMT_DBL){
+        if(out_fmt == AV_SAMPLE_FMT_S32) scale = 1.0/(1LL<<31);
+        if(out_fmt == AV_SAMPLE_FMT_S16) scale = 1.0/(1LL<<15);
+        if(out_fmt == AV_SAMPLE_FMT_U8 ) scale = 1.0/(1LL<< 7);
+    }
+    if(in_fmt == AV_SAMPLE_FMT_S32 && out_fmt == AV_SAMPLE_FMT_S32 && (s->dither.output_sample_bits&31)) scale = 1;
+    if(in_fmt == AV_SAMPLE_FMT_S32 && out_fmt == AV_SAMPLE_FMT_S16) scale = 1<<16;
+    if(in_fmt == AV_SAMPLE_FMT_S32 && out_fmt == AV_SAMPLE_FMT_U8 ) scale = 1<<24;
+    if(in_fmt == AV_SAMPLE_FMT_S16 && out_fmt == AV_SAMPLE_FMT_U8 ) scale = 1<<8;
+
+    scale *= s->dither.scale;
+
+    if (out_fmt == AV_SAMPLE_FMT_S32 && s->dither.output_sample_bits)
+        scale *= 1<<(32-s->dither.output_sample_bits);
+
+    s->dither.ns_pos = 0;
+    s->dither.noise_scale=   scale;
+    s->dither.ns_scale   =   scale;
+    s->dither.ns_scale_1 = scale ? 1/scale : 0;
+    memset(s->dither.ns_errors, 0, sizeof(s->dither.ns_errors));
+    for (i=0; filters[i].coefs; i++) {
+        const filter_t *f = &filters[i];
+        if (llabs(s->out_sample_rate - f->rate)*20 <= f->rate && f->name == s->dither.method) {
+            int j;
+            s->dither.ns_taps = f->len;
+            for (j=0; j<f->len; j++)
+                s->dither.ns_coeffs[j] = f->coefs[j];
+            s->dither.ns_scale_1 *= 1 - exp(f->gain_cB * M_LN10 * 0.005) * 2 / (1<<(8*av_get_bytes_per_sample(out_fmt)));
+            break;
+        }
+    }
+    if (!filters[i].coefs && s->dither.method > SWR_DITHER_NS) {
+        av_log(s, AV_LOG_WARNING, "Requested noise shaping dither not available at this sampling rate, using triangular hp dither\n");
+        s->dither.method = SWR_DITHER_TRIANGULAR_HIGHPASS;
+    }
+
+    av_assert0(!s->preout.count);
+    s->dither.noise = s->preout;
+    s->dither.temp  = s->preout;
+    if (s->dither.method > SWR_DITHER_NS) {
+        s->dither.noise.bps = 4;
+        s->dither.noise.fmt = AV_SAMPLE_FMT_FLTP;
+        s->dither.noise_scale = 1;
+    }
+
+    return 0;
+}
+
+#define TEMPLATE_DITHER_S16
+#include "dither_template.c"
+#undef TEMPLATE_DITHER_S16
+
+#define TEMPLATE_DITHER_S32
+#include "dither_template.c"
+#undef TEMPLATE_DITHER_S32
+
+#define TEMPLATE_DITHER_FLT
+#include "dither_template.c"
+#undef TEMPLATE_DITHER_FLT
+
+#define TEMPLATE_DITHER_DBL
+#include "dither_template.c"
+#undef TEMPLATE_DITHER_DBL
diff --git a/libswresample/dither_template.c b/libswresample/dither_template.c
new file mode 100644
index 0000000000..1f535de3dc
--- /dev/null
+++ b/libswresample/dither_template.c
@@ -0,0 +1,84 @@
+/*
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(TEMPLATE_DITHER_DBL)
+#    define RENAME(N) N ## _double
+#    define DELEM  double
+#    define CLIP(v) while(0)
+
+#elif defined(TEMPLATE_DITHER_FLT)
+#    define RENAME(N) N ## _float
+#    define DELEM  float
+#    define CLIP(v) while(0)
+
+#elif defined(TEMPLATE_DITHER_S32)
+#    define RENAME(N) N ## _int32
+#    define DELEM  int32_t
+#    define CLIP(v) v = FFMAX(FFMIN(v, INT32_MAX), INT32_MIN)
+
+#elif defined(TEMPLATE_DITHER_S16)
+#    define RENAME(N) N ## _int16
+#    define DELEM  int16_t
+#    define CLIP(v) v = FFMAX(FFMIN(v, INT16_MAX), INT16_MIN)
+
+#else
+ERROR
+#endif
+
+void RENAME(swri_noise_shaping)(SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count){
+    int pos = s->dither.ns_pos;
+    int i, j, ch;
+    int taps  = s->dither.ns_taps;
+    float S   = s->dither.ns_scale;
+    float S_1 = s->dither.ns_scale_1;
+
+    av_assert2((taps&3) != 2);
+    av_assert2((taps&3) != 3 || s->dither.ns_coeffs[taps] == 0);
+
+    for (ch=0; ch<srcs->ch_count; ch++) {
+        const float *noise = ((const float *)noises->ch[ch]) + s->dither.noise_pos;
+        const DELEM *src = (const DELEM*)srcs->ch[ch];
+        DELEM *dst = (DELEM*)dsts->ch[ch];
+        float *ns_errors = s->dither.ns_errors[ch];
+        const float *ns_coeffs = s->dither.ns_coeffs;
+        pos  = s->dither.ns_pos;
+        for (i=0; i<count; i++) {
+            double d1, d = src[i]*S_1;
+            for(j=0; j<taps-2; j+=4) {
+                d -= ns_coeffs[j    ] * ns_errors[pos + j    ]
+                    +ns_coeffs[j + 1] * ns_errors[pos + j + 1]
+                    +ns_coeffs[j + 2] * ns_errors[pos + j + 2]
+                    +ns_coeffs[j + 3] * ns_errors[pos + j + 3];
+            }
+            if(j < taps)
+                d -= ns_coeffs[j] * ns_errors[pos + j];
+            pos = pos ? pos - 1 : taps - 1;
+            d1 = rint(d + noise[i]);
+            ns_errors[pos + taps] = ns_errors[pos] = d1 - d;
+            d1 *= S;
+            CLIP(d1);
+            dst[i] = d1;
+        }
+    }
+
+    s->dither.ns_pos = pos;
+}
+
+#undef RENAME
+#undef DELEM
+#undef CLIP
diff --git a/libswresample/libswresample.v b/libswresample/libswresample.v
new file mode 100644
index 0000000000..3b3508d707
--- /dev/null
+++ b/libswresample/libswresample.v
@@ -0,0 +1,7 @@
+LIBSWRESAMPLE_MAJOR {
+    global:
+        swr_*;
+        swresample_*;
+    local:
+        *;
+};
diff --git a/libswresample/log2_tab.c b/libswresample/log2_tab.c
new file mode 100644
index 0000000000..47a1df03b7
--- /dev/null
+++ b/libswresample/log2_tab.c
@@ -0,0 +1 @@
+#include "libavutil/log2_tab.c"
diff --git a/libswresample/noise_shaping_data.c b/libswresample/noise_shaping_data.c
new file mode 100644
index 0000000000..77e0f2eafc
--- /dev/null
+++ b/libswresample/noise_shaping_data.c
@@ -0,0 +1,224 @@
+/* Effect: dither/noise-shape   Copyright (c) 2008-9 robs@users.sourceforge.net
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at
+ * your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+typedef struct {
+  int  rate;
+  enum {fir, iir} type;
+  size_t len;
+  int gain_cB; /* Chosen so clips are few if any, but not guaranteed none. */
+  double const * coefs;
+  enum SwrDitherType name;
+} filter_t;
+
+static double const lip44[] = {2.033, -2.165, 1.959, -1.590, .6149};
+static double const fwe44[] = {
+  2.412, -3.370, 3.937, -4.174, 3.353, -2.205, 1.281, -.569, .0847};
+static double const mew44[] = {
+  1.662, -1.263, .4827, -.2913, .1268, -.1124, .03252, -.01265, -.03524};
+static double const iew44[] = {
+  2.847, -4.685, 6.214, -7.184, 6.639, -5.032, 3.263, -1.632, .4191};
+static double const ges44[] = {
+  2.2061, -.4706, -.2534, -.6214, 1.0587, .0676, -.6054, -.2738};
+static double const ges48[] = {
+  2.2374, -.7339, -.1251, -.6033, .903, .0116, -.5853, -.2571};
+
+static double const shi48[] = {
+  2.8720729351043701172,  -5.0413231849670410156,   6.2442994117736816406,
+  -5.8483986854553222656, 3.7067542076110839844,  -1.0495119094848632812,
+  -1.1830236911773681641,   2.1126792430877685547, -1.9094531536102294922,
+  0.99913084506988525391, -0.17090806365013122559, -0.32615602016448974609,
+  0.39127644896507263184, -0.26876461505889892578,  0.097676105797290802002,
+  -0.023473845794796943665,
+};
+static double const shi44[] = {
+  2.6773197650909423828,  -4.8308925628662109375,   6.570110321044921875,
+  -7.4572014808654785156, 6.7263274192810058594,  -4.8481650352478027344,
+  2.0412089824676513672,   0.7006359100341796875, -2.9537565708160400391,
+  4.0800385475158691406,  -4.1845216751098632812,   3.3311812877655029297,
+  -2.1179926395416259766,   0.879302978515625,      -0.031759146600961685181,
+  -0.42382788658142089844, 0.47882103919982910156, -0.35490813851356506348,
+  0.17496839165687561035, -0.060908168554306030273,
+};
+static double const shi38[] = {
+  1.6335992813110351562,  -2.2615492343902587891,   2.4077029228210449219,
+  -2.6341717243194580078, 2.1440362930297851562,  -1.8153258562088012695,
+  1.0816224813461303711,  -0.70302653312683105469, 0.15991993248462677002,
+  0.041549518704414367676, -0.29416576027870178223,  0.2518316805362701416,
+  -0.27766478061676025391,  0.15785403549671173096, -0.10165894031524658203,
+  0.016833892092108726501,
+};
+static double const shi32[] =
+{ /* dmaker 32000: bestmax=4.99659 (inverted) */
+0.82118552923202515,
+-1.0063692331314087,
+0.62341964244842529,
+-1.0447187423706055,
+0.64532512426376343,
+-0.87615132331848145,
+0.52219754457473755,
+-0.67434263229370117,
+0.44954317808151245,
+-0.52557498216629028,
+0.34567299485206604,
+-0.39618203043937683,
+0.26791760325431824,
+-0.28936097025871277,
+0.1883765310049057,
+-0.19097308814525604,
+0.10431359708309174,
+-0.10633844882249832,
+0.046832218766212463,
+-0.039653312414884567,
+};
+static double const shi22[] =
+{ /* dmaker 22050: bestmax=5.77762 (inverted) */
+0.056581053882837296,
+-0.56956905126571655,
+-0.40727734565734863,
+-0.33870288729667664,
+-0.29810553789138794,
+-0.19039161503314972,
+-0.16510021686553955,
+-0.13468159735202789,
+-0.096633769571781158,
+-0.081049129366874695,
+-0.064953058958053589,
+-0.054459091275930405,
+-0.043378707021474838,
+-0.03660014271736145,
+-0.026256965473294258,
+-0.018786206841468811,
+-0.013387725688517094,
+-0.0090983230620622635,
+-0.0026585909072309732,
+-0.00042083300650119781,
+};
+static double const shi16[] =
+{ /* dmaker 16000: bestmax=5.97128 (inverted) */
+-0.37251132726669312,
+-0.81423574686050415,
+-0.55010956525802612,
+-0.47405767440795898,
+-0.32624706625938416,
+-0.3161766529083252,
+-0.2286367267370224,
+-0.22916607558727264,
+-0.19565616548061371,
+-0.18160104751586914,
+-0.15423151850700378,
+-0.14104481041431427,
+-0.11844276636838913,
+-0.097583092749118805,
+-0.076493598520755768,
+-0.068106919527053833,
+-0.041881654411554337,
+-0.036922425031661987,
+-0.019364040344953537,
+-0.014994367957115173,
+};
+static double const shi11[] =
+{ /* dmaker 11025: bestmax=5.9406 (inverted) */
+-0.9264228343963623,
+-0.98695987462997437,
+-0.631156325340271,
+-0.51966935396194458,
+-0.39738872647285461,
+-0.35679301619529724,
+-0.29720726609230042,
+-0.26310476660728455,
+-0.21719355881214142,
+-0.18561814725399017,
+-0.15404847264289856,
+-0.12687471508979797,
+-0.10339745879173279,
+-0.083688631653785706,
+-0.05875682458281517,
+-0.046893671154975891,
+-0.027950936928391457,
+-0.020740609616041183,
+-0.009366452693939209,
+-0.0060260160826146603,
+};
+static double const shi08[] =
+{ /* dmaker 8000: bestmax=5.56234 (inverted) */
+-1.202863335609436,
+-0.94103097915649414,
+-0.67878556251525879,
+-0.57650017738342285,
+-0.50004476308822632,
+-0.44349345564842224,
+-0.37833768129348755,
+-0.34028723835945129,
+-0.29413089156150818,
+-0.24994957447052002,
+-0.21715600788593292,
+-0.18792112171649933,
+-0.15268312394618988,
+-0.12135542929172516,
+-0.099610626697540283,
+-0.075273610651493073,
+-0.048787496984004974,
+-0.042586319148540497,
+-0.028991291299462318,
+-0.011869125068187714,
+};
+static double const shl48[] = {
+  2.3925774097442626953,  -3.4350297451019287109,   3.1853709220886230469,
+  -1.8117271661758422852, -0.20124770700931549072,  1.4759907722473144531,
+  -1.7210904359817504883,   0.97746700048446655273, -0.13790138065814971924,
+  -0.38185903429985046387,  0.27421241998672485352,  0.066584214568138122559,
+  -0.35223302245140075684,  0.37672343850135803223, -0.23964276909828186035,
+  0.068674825131893157959,
+};
+static double const shl44[] = {
+  2.0833916664123535156,  -3.0418450832366943359,   3.2047898769378662109,
+  -2.7571926116943359375, 1.4978630542755126953,  -0.3427594602108001709,
+  -0.71733748912811279297,  1.0737057924270629883, -1.0225815773010253906,
+  0.56649994850158691406, -0.20968692004680633545, -0.065378531813621520996,
+  0.10322438180446624756, -0.067442022264003753662, -0.00495197344571352005,
+  0,
+};
+static double const shh44[] = {
+   3.0259189605712890625, -6.0268716812133789062,   9.195003509521484375,
+   -11.824929237365722656, 12.767142295837402344, -11.917946815490722656,
+   9.1739168167114257812,  -5.3712320327758789062, 1.1393624544143676758,
+   2.4484779834747314453,  -4.9719839096069335938,   6.0392003059387207031,
+   -5.9359521865844726562,  4.903278350830078125,   -3.5527443885803222656,
+   2.1909697055816650391, -1.1672389507293701172,  0.4903914332389831543,
+   -0.16519790887832641602,  0.023217858746647834778,
+};
+
+static const filter_t filters[] = {
+  {44100, fir,  5, 210, lip44,          SWR_DITHER_NS_LIPSHITZ},
+  {46000, fir,  9, 276, fwe44,          SWR_DITHER_NS_F_WEIGHTED},
+  {46000, fir,  9, 160, mew44,          SWR_DITHER_NS_MODIFIED_E_WEIGHTED},
+  {46000, fir,  9, 321, iew44,          SWR_DITHER_NS_IMPROVED_E_WEIGHTED},
+//   {48000, iir,  4, 220, ges48, SWR_DITHER_NS_GESEMANN},
+//   {44100, iir,  4, 230, ges44, SWR_DITHER_NS_GESEMANN},
+  {48000, fir, 16, 301, shi48,          SWR_DITHER_NS_SHIBATA},
+  {44100, fir, 20, 333, shi44,          SWR_DITHER_NS_SHIBATA},
+  {37800, fir, 16, 240, shi38,          SWR_DITHER_NS_SHIBATA},
+  {32000, fir, 20, 240/*TBD*/, shi32,   SWR_DITHER_NS_SHIBATA},
+  {22050, fir, 20, 240/*TBD*/, shi22,   SWR_DITHER_NS_SHIBATA},
+  {16000, fir, 20, 240/*TBD*/, shi16,   SWR_DITHER_NS_SHIBATA},
+  {11025, fir, 20, 240/*TBD*/, shi11,   SWR_DITHER_NS_SHIBATA},
+  { 8000, fir, 20, 240/*TBD*/, shi08,   SWR_DITHER_NS_SHIBATA},
+  {48000, fir, 16, 250, shl48,          SWR_DITHER_NS_LOW_SHIBATA},
+  {44100, fir, 15, 250, shl44,          SWR_DITHER_NS_LOW_SHIBATA},
+  {44100, fir, 20, 383, shh44,          SWR_DITHER_NS_HIGH_SHIBATA},
+  {    0, fir,  0,   0,  NULL,          SWR_DITHER_NONE},
+};
diff --git a/libswresample/options.c b/libswresample/options.c
new file mode 100644
index 0000000000..816ce47750
--- /dev/null
+++ b/libswresample/options.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "swresample_internal.h"
+
+#include <float.h>
+
+#define  C30DB  M_SQRT2
+#define  C15DB  1.189207115
+#define C__0DB  1.0
+#define C_15DB  0.840896415
+#define C_30DB  M_SQRT1_2
+#define C_45DB  0.594603558
+#define C_60DB  0.5
+
+#define OFFSET(x) offsetof(SwrContext,x)
+#define PARAM AV_OPT_FLAG_AUDIO_PARAM
+
+static const AVOption options[]={
+{"ich"                  , "set input channel count"     , OFFSET(user_in_ch_count  ), AV_OPT_TYPE_INT, {.i64=0                    }, 0      , SWR_CH_MAX, PARAM},
+{"in_channel_count"     , "set input channel count"     , OFFSET(user_in_ch_count  ), AV_OPT_TYPE_INT, {.i64=0                    }, 0      , SWR_CH_MAX, PARAM},
+{"och"                  , "set output channel count"    , OFFSET(user_out_ch_count ), AV_OPT_TYPE_INT, {.i64=0                    }, 0      , SWR_CH_MAX, PARAM},
+{"out_channel_count"    , "set output channel count"    , OFFSET(user_out_ch_count ), AV_OPT_TYPE_INT, {.i64=0                    }, 0      , SWR_CH_MAX, PARAM},
+{"uch"                  , "set used channel count"      , OFFSET(user_used_ch_count), AV_OPT_TYPE_INT, {.i64=0                    }, 0      , SWR_CH_MAX, PARAM},
+{"used_channel_count"   , "set used channel count"      , OFFSET(user_used_ch_count), AV_OPT_TYPE_INT, {.i64=0                    }, 0      , SWR_CH_MAX, PARAM},
+{"isr"                  , "set input sample rate"       , OFFSET( in_sample_rate), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , INT_MAX   , PARAM},
+{"in_sample_rate"       , "set input sample rate"       , OFFSET( in_sample_rate), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , INT_MAX   , PARAM},
+{"osr"                  , "set output sample rate"      , OFFSET(out_sample_rate), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , INT_MAX   , PARAM},
+{"out_sample_rate"      , "set output sample rate"      , OFFSET(out_sample_rate), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , INT_MAX   , PARAM},
+{"isf"                  , "set input sample format"     , OFFSET( in_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
+{"in_sample_fmt"        , "set input sample format"     , OFFSET( in_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
+{"osf"                  , "set output sample format"    , OFFSET(out_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
+{"out_sample_fmt"       , "set output sample format"    , OFFSET(out_sample_fmt ), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
+{"tsf"                  , "set internal sample format"  , OFFSET(user_int_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
+{"internal_sample_fmt"  , "set internal sample format"  , OFFSET(user_int_sample_fmt), AV_OPT_TYPE_SAMPLE_FMT , {.i64=AV_SAMPLE_FMT_NONE}, -1   , INT_MAX, PARAM},
+{"icl"                  , "set input channel layout"    , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"in_channel_layout"    , "set input channel layout"    , OFFSET(user_in_ch_layout ), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"ocl"                  , "set output channel layout"   , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"out_channel_layout"   , "set output channel layout"   , OFFSET(user_out_ch_layout), AV_OPT_TYPE_CHANNEL_LAYOUT, {.i64=0           }, INT64_MIN, INT64_MAX , PARAM, "channel_layout"},
+{"clev"                 , "set center mix level"        , OFFSET(clev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
+{"center_mix_level"     , "set center mix level"        , OFFSET(clev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
+{"slev"                 , "set surround mix level"      , OFFSET(slev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
+{"surround_mix_level"   , "set surround mix Level"      , OFFSET(slev           ), AV_OPT_TYPE_FLOAT, {.dbl=C_30DB                }, -32    , 32        , PARAM},
+{"lfe_mix_level"        , "set LFE mix level"           , OFFSET(lfe_mix_level  ), AV_OPT_TYPE_FLOAT, {.dbl=0                     }, -32    , 32        , PARAM},
+{"rmvol"                , "set rematrix volume"         , OFFSET(rematrix_volume), AV_OPT_TYPE_FLOAT, {.dbl=1.0                   }, -1000  , 1000      , PARAM},
+{"rematrix_volume"      , "set rematrix volume"         , OFFSET(rematrix_volume), AV_OPT_TYPE_FLOAT, {.dbl=1.0                   }, -1000  , 1000      , PARAM},
+{"rematrix_maxval"      , "set rematrix maxval"         , OFFSET(rematrix_maxval), AV_OPT_TYPE_FLOAT, {.dbl=0.0                   }, 0      , 1000      , PARAM},
+
+{"flags"                , "set flags"                   , OFFSET(flags          ), AV_OPT_TYPE_FLAGS, {.i64=0                     }, 0      , UINT_MAX  , PARAM, "flags"},
+{"swr_flags"            , "set flags"                   , OFFSET(flags          ), AV_OPT_TYPE_FLAGS, {.i64=0                     }, 0      , UINT_MAX  , PARAM, "flags"},
+{"res"                  , "force resampling"            , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_RESAMPLE     }, INT_MIN, INT_MAX   , PARAM, "flags"},
+
+{"dither_scale"         , "set dither scale"            , OFFSET(dither.scale   ), AV_OPT_TYPE_FLOAT, {.dbl=1                     }, 0      , INT_MAX   , PARAM},
+
+{"dither_method"        , "set dither method"           , OFFSET(dither.method  ), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , SWR_DITHER_NB-1, PARAM, "dither_method"},
+{"rectangular"          , "select rectangular dither"   , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_RECTANGULAR}, INT_MIN, INT_MAX   , PARAM, "dither_method"},
+{"triangular"           , "select triangular dither"    , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR }, INT_MIN, INT_MAX   , PARAM, "dither_method"},
+{"triangular_hp"        , "select triangular dither with high pass" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR_HIGHPASS }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"lipshitz"             , "select Lipshitz noise shaping dither" , 0             , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LIPSHITZ}, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"shibata"              , "select Shibata noise shaping dither" , 0              , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"low_shibata"          , "select low Shibata noise shaping dither" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LOW_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"high_shibata"         , "select high Shibata noise shaping dither" , 0         , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_HIGH_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"f_weighted"           , "select f-weighted noise shaping dither" , 0           , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_F_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"modified_e_weighted"  , "select modified-e-weighted noise shaping dither" , 0  , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_MODIFIED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"improved_e_weighted"  , "select improved-e-weighted noise shaping dither" , 0  , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_IMPROVED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+
+{"filter_size"          , "set swr resampling filter size", OFFSET(filter_size)  , AV_OPT_TYPE_INT  , {.i64=32                    }, 0      , INT_MAX   , PARAM },
+{"phase_shift"          , "set swr resampling phase shift", OFFSET(phase_shift)  , AV_OPT_TYPE_INT  , {.i64=10                    }, 0      , 24        , PARAM },
+{"linear_interp"        , "enable linear interpolation" , OFFSET(linear_interp)  , AV_OPT_TYPE_BOOL , {.i64=0                     }, 0      , 1         , PARAM },
+{"exact_rational"       , "enable exact rational"       , OFFSET(exact_rational) , AV_OPT_TYPE_BOOL , {.i64=0                     }, 0      , 1         , PARAM },
+{"cutoff"               , "set cutoff frequency ratio"  , OFFSET(cutoff)         , AV_OPT_TYPE_DOUBLE,{.dbl=0.                    }, 0      , 1         , PARAM },
+
+/* duplicate option in order to work with avconv */
+{"resample_cutoff"      , "set cutoff frequency ratio"  , OFFSET(cutoff)         , AV_OPT_TYPE_DOUBLE,{.dbl=0.                    }, 0      , 1         , PARAM },
+
+{"resampler"            , "set resampling Engine"       , OFFSET(engine)         , AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , SWR_ENGINE_NB-1, PARAM, "resampler"},
+{"swr"                  , "select SW Resampler"         , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_ENGINE_SWR        }, INT_MIN, INT_MAX   , PARAM, "resampler"},
+{"soxr"                 , "select SoX Resampler"        , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_ENGINE_SOXR       }, INT_MIN, INT_MAX   , PARAM, "resampler"},
+{"precision"            , "set soxr resampling precision (in bits)"
+                                                        , OFFSET(precision)      , AV_OPT_TYPE_DOUBLE,{.dbl=20.0                  }, 15.0   , 33.0      , PARAM },
+{"cheby"                , "enable soxr Chebyshev passband & higher-precision irrational ratio approximation"
+                                                        , OFFSET(cheby)          , AV_OPT_TYPE_BOOL , {.i64=0                     }, 0      , 1         , PARAM },
+{"min_comp"             , "set minimum difference between timestamps and audio data (in seconds) below which no timestamp compensation of either kind is applied"
+                                                        , OFFSET(min_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=FLT_MAX               }, 0      , FLT_MAX   , PARAM },
+{"min_hard_comp"        , "set minimum difference between timestamps and audio data (in seconds) to trigger padding/trimming the data."
+                                                        , OFFSET(min_hard_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=0.1                   }, 0      , INT_MAX   , PARAM },
+{"comp_duration"        , "set duration (in seconds) over which data is stretched/squeezed to make it match the timestamps."
+                                                        , OFFSET(soft_compensation_duration),AV_OPT_TYPE_FLOAT ,{.dbl=1                     }, 0      , INT_MAX   , PARAM },
+{"max_soft_comp"        , "set maximum factor by which data is stretched/squeezed to make it match the timestamps."
+                                                        , OFFSET(max_soft_compensation),AV_OPT_TYPE_FLOAT ,{.dbl=0                     }, INT_MIN, INT_MAX   , PARAM },
+{"async"                , "simplified 1 parameter audio timestamp matching, 0(disabled), 1(filling and trimming), >1(maximum stretch/squeeze in samples per second)"
+                                                        , OFFSET(async)          , AV_OPT_TYPE_FLOAT ,{.dbl=0                     }, INT_MIN, INT_MAX   , PARAM },
+{"first_pts"            , "Assume the first pts should be this value (in samples)."
+                                                        , OFFSET(firstpts_in_samples), AV_OPT_TYPE_INT64 ,{.i64=AV_NOPTS_VALUE    }, INT64_MIN,INT64_MAX, PARAM },
+
+{ "matrix_encoding"     , "set matrixed stereo encoding" , OFFSET(matrix_encoding), AV_OPT_TYPE_INT   ,{.i64 = AV_MATRIX_ENCODING_NONE}, AV_MATRIX_ENCODING_NONE,     AV_MATRIX_ENCODING_NB-1, PARAM, "matrix_encoding" },
+    { "none",  "select none",               0, AV_OPT_TYPE_CONST, { .i64 = AV_MATRIX_ENCODING_NONE  }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" },
+    { "dolby", "select Dolby",              0, AV_OPT_TYPE_CONST, { .i64 = AV_MATRIX_ENCODING_DOLBY }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" },
+    { "dplii", "select Dolby Pro Logic II", 0, AV_OPT_TYPE_CONST, { .i64 = AV_MATRIX_ENCODING_DPLII }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" },
+
+{ "filter_type"         , "select swr filter type"      , OFFSET(filter_type)    , AV_OPT_TYPE_INT  , { .i64 = SWR_FILTER_TYPE_KAISER }, SWR_FILTER_TYPE_CUBIC, SWR_FILTER_TYPE_KAISER, PARAM, "filter_type" },
+    { "cubic"           , "select cubic"                , 0                      , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_CUBIC            }, INT_MIN, INT_MAX, PARAM, "filter_type" },
+    { "blackman_nuttall", "select Blackman Nuttall windowed sinc", 0             , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_BLACKMAN_NUTTALL }, INT_MIN, INT_MAX, PARAM, "filter_type" },
+    { "kaiser"          , "select Kaiser windowed sinc" , 0                      , AV_OPT_TYPE_CONST, { .i64 = SWR_FILTER_TYPE_KAISER           }, INT_MIN, INT_MAX, PARAM, "filter_type" },
+
+{ "kaiser_beta"         , "set swr Kaiser window beta"  , OFFSET(kaiser_beta)    , AV_OPT_TYPE_DOUBLE  , {.dbl=9                     }, 2      , 16        , PARAM },
+
+{ "output_sample_bits"  , "set swr number of output sample bits", OFFSET(dither.output_sample_bits), AV_OPT_TYPE_INT  , {.i64=0   }, 0      , 64        , PARAM },
+{0}
+};
+
+static const char* context_to_name(void* ptr) {
+    return "SWR";
+}
+
+static const AVClass av_class = {
+    .class_name                = "SWResampler",
+    .item_name                 = context_to_name,
+    .option                    = options,
+    .version                   = LIBAVUTIL_VERSION_INT,
+    .log_level_offset_offset   = OFFSET(log_level_offset),
+    .parent_log_context_offset = OFFSET(log_ctx),
+    .category                  = AV_CLASS_CATEGORY_SWRESAMPLER,
+};
+
+const AVClass *swr_get_class(void)
+{
+    return &av_class;
+}
+
+av_cold struct SwrContext *swr_alloc(void){
+    SwrContext *s= av_mallocz(sizeof(SwrContext));
+    if(s){
+        s->av_class= &av_class;
+        av_opt_set_defaults(s);
+    }
+    return s;
+}
diff --git a/libswresample/rematrix.c b/libswresample/rematrix.c
new file mode 100644
index 0000000000..ddba0433e8
--- /dev/null
+++ b/libswresample/rematrix.c
@@ -0,0 +1,542 @@
+/*
+ * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swresample_internal.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+
+#define TEMPLATE_REMATRIX_FLT
+#include "rematrix_template.c"
+#undef TEMPLATE_REMATRIX_FLT
+
+#define TEMPLATE_REMATRIX_DBL
+#include "rematrix_template.c"
+#undef TEMPLATE_REMATRIX_DBL
+
+#define TEMPLATE_REMATRIX_S16
+#include "rematrix_template.c"
+#define TEMPLATE_CLIP
+#include "rematrix_template.c"
+#undef TEMPLATE_CLIP
+#undef TEMPLATE_REMATRIX_S16
+
+#define TEMPLATE_REMATRIX_S32
+#include "rematrix_template.c"
+#undef TEMPLATE_REMATRIX_S32
+
+#define FRONT_LEFT             0
+#define FRONT_RIGHT            1
+#define FRONT_CENTER           2
+#define LOW_FREQUENCY          3
+#define BACK_LEFT              4
+#define BACK_RIGHT             5
+#define FRONT_LEFT_OF_CENTER   6
+#define FRONT_RIGHT_OF_CENTER  7
+#define BACK_CENTER            8
+#define SIDE_LEFT              9
+#define SIDE_RIGHT             10
+#define TOP_CENTER             11
+#define TOP_FRONT_LEFT         12
+#define TOP_FRONT_CENTER       13
+#define TOP_FRONT_RIGHT        14
+#define TOP_BACK_LEFT          15
+#define TOP_BACK_CENTER        16
+#define TOP_BACK_RIGHT         17
+#define NUM_NAMED_CHANNELS     18
+
+int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride)
+{
+    int nb_in, nb_out, in, out;
+
+    if (!s || s->in_convert) // s needs to be allocated but not initialized
+        return AVERROR(EINVAL);
+    memset(s->matrix, 0, sizeof(s->matrix));
+    nb_in  = av_get_channel_layout_nb_channels(s->user_in_ch_layout);
+    nb_out = av_get_channel_layout_nb_channels(s->user_out_ch_layout);
+    for (out = 0; out < nb_out; out++) {
+        for (in = 0; in < nb_in; in++)
+            s->matrix[out][in] = matrix[in];
+        matrix += stride;
+    }
+    s->rematrix_custom = 1;
+    return 0;
+}
+
+static int even(int64_t layout){
+    if(!layout) return 1;
+    if(layout&(layout-1)) return 1;
+    return 0;
+}
+
+static int clean_layout(SwrContext *s, int64_t layout){
+    if(layout && layout != AV_CH_FRONT_CENTER && !(layout&(layout-1))) {
+        char buf[128];
+        av_get_channel_layout_string(buf, sizeof(buf), -1, layout);
+        av_log(s, AV_LOG_VERBOSE, "Treating %s as mono\n", buf);
+        return AV_CH_FRONT_CENTER;
+    }
+
+    return layout;
+}
+
+static int sane_layout(int64_t layout){
+    if(!(layout & AV_CH_LAYOUT_SURROUND)) // at least 1 front speaker
+        return 0;
+    if(!even(layout & (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT))) // no asymetric front
+        return 0;
+    if(!even(layout & (AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT)))   // no asymetric side
+        return 0;
+    if(!even(layout & (AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT)))
+        return 0;
+    if(!even(layout & (AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER)))
+        return 0;
+    if(av_get_channel_layout_nb_channels(layout) >= SWR_CH_MAX)
+        return 0;
+
+    return 1;
+}
+
+av_cold static int auto_matrix(SwrContext *s)
+{
+    int i, j, out_i;
+    double matrix[NUM_NAMED_CHANNELS][NUM_NAMED_CHANNELS]={{0}};
+    int64_t unaccounted, in_ch_layout, out_ch_layout;
+    double maxcoef=0;
+    char buf[128];
+    const int matrix_encoding = s->matrix_encoding;
+    float maxval;
+
+    in_ch_layout = clean_layout(s, s->in_ch_layout);
+    out_ch_layout = clean_layout(s, s->out_ch_layout);
+
+    if(   out_ch_layout == AV_CH_LAYOUT_STEREO_DOWNMIX
+       && (in_ch_layout & AV_CH_LAYOUT_STEREO_DOWNMIX) == 0
+    )
+        out_ch_layout = AV_CH_LAYOUT_STEREO;
+
+    if(    in_ch_layout == AV_CH_LAYOUT_STEREO_DOWNMIX
+       && (out_ch_layout & AV_CH_LAYOUT_STEREO_DOWNMIX) == 0
+    )
+        in_ch_layout = AV_CH_LAYOUT_STEREO;
+
+    if(!sane_layout(in_ch_layout)){
+        av_get_channel_layout_string(buf, sizeof(buf), -1, s->in_ch_layout);
+        av_log(s, AV_LOG_ERROR, "Input channel layout '%s' is not supported\n", buf);
+        return AVERROR(EINVAL);
+    }
+
+    if(!sane_layout(out_ch_layout)){
+        av_get_channel_layout_string(buf, sizeof(buf), -1, s->out_ch_layout);
+        av_log(s, AV_LOG_ERROR, "Output channel layout '%s' is not supported\n", buf);
+        return AVERROR(EINVAL);
+    }
+
+    memset(s->matrix, 0, sizeof(s->matrix));
+    for(i=0; i<FF_ARRAY_ELEMS(matrix); i++){
+        if(in_ch_layout & out_ch_layout & (1ULL<<i))
+            matrix[i][i]= 1.0;
+    }
+
+    unaccounted= in_ch_layout & ~out_ch_layout;
+
+//FIXME implement dolby surround
+//FIXME implement full ac3
+
+
+    if(unaccounted & AV_CH_FRONT_CENTER){
+        if((out_ch_layout & AV_CH_LAYOUT_STEREO) == AV_CH_LAYOUT_STEREO){
+            if(in_ch_layout & AV_CH_LAYOUT_STEREO) {
+                matrix[ FRONT_LEFT][FRONT_CENTER]+= s->clev;
+                matrix[FRONT_RIGHT][FRONT_CENTER]+= s->clev;
+            } else {
+                matrix[ FRONT_LEFT][FRONT_CENTER]+= M_SQRT1_2;
+                matrix[FRONT_RIGHT][FRONT_CENTER]+= M_SQRT1_2;
+            }
+        }else
+            av_assert0(0);
+    }
+    if(unaccounted & AV_CH_LAYOUT_STEREO){
+        if(out_ch_layout & AV_CH_FRONT_CENTER){
+            matrix[FRONT_CENTER][ FRONT_LEFT]+= M_SQRT1_2;
+            matrix[FRONT_CENTER][FRONT_RIGHT]+= M_SQRT1_2;
+            if(in_ch_layout & AV_CH_FRONT_CENTER)
+                matrix[FRONT_CENTER][ FRONT_CENTER] = s->clev*sqrt(2);
+        }else
+            av_assert0(0);
+    }
+
+    if(unaccounted & AV_CH_BACK_CENTER){
+        if(out_ch_layout & AV_CH_BACK_LEFT){
+            matrix[ BACK_LEFT][BACK_CENTER]+= M_SQRT1_2;
+            matrix[BACK_RIGHT][BACK_CENTER]+= M_SQRT1_2;
+        }else if(out_ch_layout & AV_CH_SIDE_LEFT){
+            matrix[ SIDE_LEFT][BACK_CENTER]+= M_SQRT1_2;
+            matrix[SIDE_RIGHT][BACK_CENTER]+= M_SQRT1_2;
+        }else if(out_ch_layout & AV_CH_FRONT_LEFT){
+            if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY ||
+                matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
+                if (unaccounted & (AV_CH_BACK_LEFT | AV_CH_SIDE_LEFT)) {
+                    matrix[FRONT_LEFT ][BACK_CENTER] -= s->slev * M_SQRT1_2;
+                    matrix[FRONT_RIGHT][BACK_CENTER] += s->slev * M_SQRT1_2;
+                } else {
+                    matrix[FRONT_LEFT ][BACK_CENTER] -= s->slev;
+                    matrix[FRONT_RIGHT][BACK_CENTER] += s->slev;
+                }
+            } else {
+                matrix[ FRONT_LEFT][BACK_CENTER]+= s->slev*M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_CENTER]+= s->slev*M_SQRT1_2;
+            }
+        }else if(out_ch_layout & AV_CH_FRONT_CENTER){
+            matrix[ FRONT_CENTER][BACK_CENTER]+= s->slev*M_SQRT1_2;
+        }else
+            av_assert0(0);
+    }
+    if(unaccounted & AV_CH_BACK_LEFT){
+        if(out_ch_layout & AV_CH_BACK_CENTER){
+            matrix[BACK_CENTER][ BACK_LEFT]+= M_SQRT1_2;
+            matrix[BACK_CENTER][BACK_RIGHT]+= M_SQRT1_2;
+        }else if(out_ch_layout & AV_CH_SIDE_LEFT){
+            if(in_ch_layout & AV_CH_SIDE_LEFT){
+                matrix[ SIDE_LEFT][ BACK_LEFT]+= M_SQRT1_2;
+                matrix[SIDE_RIGHT][BACK_RIGHT]+= M_SQRT1_2;
+            }else{
+            matrix[ SIDE_LEFT][ BACK_LEFT]+= 1.0;
+            matrix[SIDE_RIGHT][BACK_RIGHT]+= 1.0;
+            }
+        }else if(out_ch_layout & AV_CH_FRONT_LEFT){
+            if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) {
+                matrix[FRONT_LEFT ][BACK_LEFT ] -= s->slev * M_SQRT1_2;
+                matrix[FRONT_LEFT ][BACK_RIGHT] -= s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_LEFT ] += s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_RIGHT] += s->slev * M_SQRT1_2;
+            } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
+                matrix[FRONT_LEFT ][BACK_LEFT ] -= s->slev * SQRT3_2;
+                matrix[FRONT_LEFT ][BACK_RIGHT] -= s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_LEFT ] += s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_RIGHT] += s->slev * SQRT3_2;
+            } else {
+                matrix[ FRONT_LEFT][ BACK_LEFT] += s->slev;
+                matrix[FRONT_RIGHT][BACK_RIGHT] += s->slev;
+            }
+        }else if(out_ch_layout & AV_CH_FRONT_CENTER){
+            matrix[ FRONT_CENTER][BACK_LEFT ]+= s->slev*M_SQRT1_2;
+            matrix[ FRONT_CENTER][BACK_RIGHT]+= s->slev*M_SQRT1_2;
+        }else
+            av_assert0(0);
+    }
+
+    if(unaccounted & AV_CH_SIDE_LEFT){
+        if(out_ch_layout & AV_CH_BACK_LEFT){
+            /* if back channels do not exist in the input, just copy side
+               channels to back channels, otherwise mix side into back */
+            if (in_ch_layout & AV_CH_BACK_LEFT) {
+                matrix[BACK_LEFT ][SIDE_LEFT ] += M_SQRT1_2;
+                matrix[BACK_RIGHT][SIDE_RIGHT] += M_SQRT1_2;
+            } else {
+                matrix[BACK_LEFT ][SIDE_LEFT ] += 1.0;
+                matrix[BACK_RIGHT][SIDE_RIGHT] += 1.0;
+            }
+        }else if(out_ch_layout & AV_CH_BACK_CENTER){
+            matrix[BACK_CENTER][ SIDE_LEFT]+= M_SQRT1_2;
+            matrix[BACK_CENTER][SIDE_RIGHT]+= M_SQRT1_2;
+        }else if(out_ch_layout & AV_CH_FRONT_LEFT){
+            if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) {
+                matrix[FRONT_LEFT ][SIDE_LEFT ] -= s->slev * M_SQRT1_2;
+                matrix[FRONT_LEFT ][SIDE_RIGHT] -= s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_LEFT ] += s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_RIGHT] += s->slev * M_SQRT1_2;
+            } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
+                matrix[FRONT_LEFT ][SIDE_LEFT ] -= s->slev * SQRT3_2;
+                matrix[FRONT_LEFT ][SIDE_RIGHT] -= s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_LEFT ] += s->slev * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_RIGHT] += s->slev * SQRT3_2;
+            } else {
+                matrix[ FRONT_LEFT][ SIDE_LEFT] += s->slev;
+                matrix[FRONT_RIGHT][SIDE_RIGHT] += s->slev;
+            }
+        }else if(out_ch_layout & AV_CH_FRONT_CENTER){
+            matrix[ FRONT_CENTER][SIDE_LEFT ]+= s->slev*M_SQRT1_2;
+            matrix[ FRONT_CENTER][SIDE_RIGHT]+= s->slev*M_SQRT1_2;
+        }else
+            av_assert0(0);
+    }
+
+    if(unaccounted & AV_CH_FRONT_LEFT_OF_CENTER){
+        if(out_ch_layout & AV_CH_FRONT_LEFT){
+            matrix[ FRONT_LEFT][ FRONT_LEFT_OF_CENTER]+= 1.0;
+            matrix[FRONT_RIGHT][FRONT_RIGHT_OF_CENTER]+= 1.0;
+        }else if(out_ch_layout & AV_CH_FRONT_CENTER){
+            matrix[ FRONT_CENTER][ FRONT_LEFT_OF_CENTER]+= M_SQRT1_2;
+            matrix[ FRONT_CENTER][FRONT_RIGHT_OF_CENTER]+= M_SQRT1_2;
+        }else
+            av_assert0(0);
+    }
+    /* mix LFE into front left/right or center */
+    if (unaccounted & AV_CH_LOW_FREQUENCY) {
+        if (out_ch_layout & AV_CH_FRONT_CENTER) {
+            matrix[FRONT_CENTER][LOW_FREQUENCY] += s->lfe_mix_level;
+        } else if (out_ch_layout & AV_CH_FRONT_LEFT) {
+            matrix[FRONT_LEFT ][LOW_FREQUENCY] += s->lfe_mix_level * M_SQRT1_2;
+            matrix[FRONT_RIGHT][LOW_FREQUENCY] += s->lfe_mix_level * M_SQRT1_2;
+        } else
+            av_assert0(0);
+    }
+
+    for(out_i=i=0; i<64; i++){
+        double sum=0;
+        int in_i=0;
+        if((out_ch_layout & (1ULL<<i)) == 0)
+            continue;
+        for(j=0; j<64; j++){
+            if((in_ch_layout & (1ULL<<j)) == 0)
+               continue;
+            if (i < FF_ARRAY_ELEMS(matrix) && j < FF_ARRAY_ELEMS(matrix[0]))
+                s->matrix[out_i][in_i]= matrix[i][j];
+            else
+                s->matrix[out_i][in_i]= i == j && (in_ch_layout & out_ch_layout & (1ULL<<i));
+            sum += fabs(s->matrix[out_i][in_i]);
+            in_i++;
+        }
+        maxcoef= FFMAX(maxcoef, sum);
+        out_i++;
+    }
+    if(s->rematrix_volume  < 0)
+        maxcoef = -s->rematrix_volume;
+
+    if (s->rematrix_maxval > 0) {
+        maxval = s->rematrix_maxval;
+    } else if (   av_get_packed_sample_fmt(s->out_sample_fmt) < AV_SAMPLE_FMT_FLT
+               || av_get_packed_sample_fmt(s->int_sample_fmt) < AV_SAMPLE_FMT_FLT) {
+        maxval = 1.0;
+    } else
+        maxval = INT_MAX;
+
+    if(maxcoef > maxval || s->rematrix_volume  < 0){
+        maxcoef /= maxval;
+        for(i=0; i<SWR_CH_MAX; i++)
+            for(j=0; j<SWR_CH_MAX; j++){
+                s->matrix[i][j] /= maxcoef;
+            }
+    }
+
+    if(s->rematrix_volume > 0){
+        for(i=0; i<SWR_CH_MAX; i++)
+            for(j=0; j<SWR_CH_MAX; j++){
+                s->matrix[i][j] *= s->rematrix_volume;
+            }
+    }
+
+    av_log(s, AV_LOG_DEBUG, "Matrix coefficients:\n");
+    for(i=0; i<av_get_channel_layout_nb_channels(out_ch_layout); i++){
+        const char *c =
+            av_get_channel_name(av_channel_layout_extract_channel(out_ch_layout, i));
+        av_log(s, AV_LOG_DEBUG, "%s: ", c ? c : "?");
+        for(j=0; j<av_get_channel_layout_nb_channels(in_ch_layout); j++){
+            c = av_get_channel_name(av_channel_layout_extract_channel(in_ch_layout, j));
+            av_log(s, AV_LOG_DEBUG, "%s:%f ", c ? c : "?", s->matrix[i][j]);
+        }
+        av_log(s, AV_LOG_DEBUG, "\n");
+    }
+    return 0;
+}
+
+av_cold int swri_rematrix_init(SwrContext *s){
+    int i, j;
+    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
+    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
+
+    s->mix_any_f = NULL;
+
+    if (!s->rematrix_custom) {
+        int r = auto_matrix(s);
+        if (r)
+            return r;
+    }
+    if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
+        int maxsum = 0;
+        s->native_matrix = av_calloc(nb_in * nb_out, sizeof(int));
+        s->native_one    = av_mallocz(sizeof(int));
+        if (!s->native_matrix || !s->native_one)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < nb_out; i++) {
+            double rem = 0;
+            int sum = 0;
+
+            for (j = 0; j < nb_in; j++) {
+                double target = s->matrix[i][j] * 32768 + rem;
+                ((int*)s->native_matrix)[i * nb_in + j] = lrintf(target);
+                rem += target - ((int*)s->native_matrix)[i * nb_in + j];
+                sum += FFABS(((int*)s->native_matrix)[i * nb_in + j]);
+            }
+            maxsum = FFMAX(maxsum, sum);
+        }
+        *((int*)s->native_one) = 32768;
+        if (maxsum <= 32768) {
+            s->mix_1_1_f = (mix_1_1_func_type*)copy_s16;
+            s->mix_2_1_f = (mix_2_1_func_type*)sum2_s16;
+            s->mix_any_f = (mix_any_func_type*)get_mix_any_func_s16(s);
+        } else {
+            s->mix_1_1_f = (mix_1_1_func_type*)copy_clip_s16;
+            s->mix_2_1_f = (mix_2_1_func_type*)sum2_clip_s16;
+            s->mix_any_f = (mix_any_func_type*)get_mix_any_func_clip_s16(s);
+        }
+    }else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
+        s->native_matrix = av_calloc(nb_in * nb_out, sizeof(float));
+        s->native_one    = av_mallocz(sizeof(float));
+        if (!s->native_matrix || !s->native_one)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < nb_out; i++)
+            for (j = 0; j < nb_in; j++)
+                ((float*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
+        *((float*)s->native_one) = 1.0;
+        s->mix_1_1_f = (mix_1_1_func_type*)copy_float;
+        s->mix_2_1_f = (mix_2_1_func_type*)sum2_float;
+        s->mix_any_f = (mix_any_func_type*)get_mix_any_func_float(s);
+    }else if(s->midbuf.fmt == AV_SAMPLE_FMT_DBLP){
+        s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
+        s->native_one    = av_mallocz(sizeof(double));
+        if (!s->native_matrix || !s->native_one)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < nb_out; i++)
+            for (j = 0; j < nb_in; j++)
+                ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
+        *((double*)s->native_one) = 1.0;
+        s->mix_1_1_f = (mix_1_1_func_type*)copy_double;
+        s->mix_2_1_f = (mix_2_1_func_type*)sum2_double;
+        s->mix_any_f = (mix_any_func_type*)get_mix_any_func_double(s);
+    }else if(s->midbuf.fmt == AV_SAMPLE_FMT_S32P){
+        // Only for dithering currently
+//         s->native_matrix = av_calloc(nb_in * nb_out, sizeof(double));
+        s->native_one    = av_mallocz(sizeof(int));
+        if (!s->native_one)
+            return AVERROR(ENOMEM);
+//         for (i = 0; i < nb_out; i++)
+//             for (j = 0; j < nb_in; j++)
+//                 ((double*)s->native_matrix)[i * nb_in + j] = s->matrix[i][j];
+        *((int*)s->native_one) = 32768;
+        s->mix_1_1_f = (mix_1_1_func_type*)copy_s32;
+        s->mix_2_1_f = (mix_2_1_func_type*)sum2_s32;
+        s->mix_any_f = (mix_any_func_type*)get_mix_any_func_s32(s);
+    }else
+        av_assert0(0);
+    //FIXME quantize for integeres
+    for (i = 0; i < SWR_CH_MAX; i++) {
+        int ch_in=0;
+        for (j = 0; j < SWR_CH_MAX; j++) {
+            s->matrix32[i][j]= lrintf(s->matrix[i][j] * 32768);
+            if(s->matrix[i][j])
+                s->matrix_ch[i][++ch_in]= j;
+        }
+        s->matrix_ch[i][0]= ch_in;
+    }
+
+    if(HAVE_YASM && HAVE_MMX)
+        return swri_rematrix_init_x86(s);
+
+    return 0;
+}
+
+av_cold void swri_rematrix_free(SwrContext *s){
+    av_freep(&s->native_matrix);
+    av_freep(&s->native_one);
+    av_freep(&s->native_simd_matrix);
+    av_freep(&s->native_simd_one);
+}
+
+int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){
+    int out_i, in_i, i, j;
+    int len1 = 0;
+    int off = 0;
+
+    if(s->mix_any_f) {
+        s->mix_any_f(out->ch, (const uint8_t **)in->ch, s->native_matrix, len);
+        return 0;
+    }
+
+    if(s->mix_2_1_simd || s->mix_1_1_simd){
+        len1= len&~15;
+        off = len1 * out->bps;
+    }
+
+    av_assert0(!s->out_ch_layout || out->ch_count == av_get_channel_layout_nb_channels(s->out_ch_layout));
+    av_assert0(!s-> in_ch_layout || in ->ch_count == av_get_channel_layout_nb_channels(s-> in_ch_layout));
+
+    for(out_i=0; out_i<out->ch_count; out_i++){
+        switch(s->matrix_ch[out_i][0]){
+        case 0:
+            if(mustcopy)
+                memset(out->ch[out_i], 0, len * av_get_bytes_per_sample(s->int_sample_fmt));
+            break;
+        case 1:
+            in_i= s->matrix_ch[out_i][1];
+            if(s->matrix[out_i][in_i]!=1.0){
+                if(s->mix_1_1_simd && len1)
+                    s->mix_1_1_simd(out->ch[out_i]    , in->ch[in_i]    , s->native_simd_matrix, in->ch_count*out_i + in_i, len1);
+                if(len != len1)
+                    s->mix_1_1_f   (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1);
+            }else if(mustcopy){
+                memcpy(out->ch[out_i], in->ch[in_i], len*out->bps);
+            }else{
+                out->ch[out_i]= in->ch[in_i];
+            }
+            break;
+        case 2: {
+            int in_i1 = s->matrix_ch[out_i][1];
+            int in_i2 = s->matrix_ch[out_i][2];
+            if(s->mix_2_1_simd && len1)
+                s->mix_2_1_simd(out->ch[out_i]    , in->ch[in_i1]    , in->ch[in_i2]    , s->native_simd_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1);
+            else
+                s->mix_2_1_f   (out->ch[out_i]    , in->ch[in_i1]    , in->ch[in_i2]    , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1);
+            if(len != len1)
+                s->mix_2_1_f   (out->ch[out_i]+off, in->ch[in_i1]+off, in->ch[in_i2]+off, s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len-len1);
+            break;}
+        default:
+            if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){
+                for(i=0; i<len; i++){
+                    float v=0;
+                    for(j=0; j<s->matrix_ch[out_i][0]; j++){
+                        in_i= s->matrix_ch[out_i][1+j];
+                        v+= ((float*)in->ch[in_i])[i] * s->matrix[out_i][in_i];
+                    }
+                    ((float*)out->ch[out_i])[i]= v;
+                }
+            }else if(s->int_sample_fmt == AV_SAMPLE_FMT_DBLP){
+                for(i=0; i<len; i++){
+                    double v=0;
+                    for(j=0; j<s->matrix_ch[out_i][0]; j++){
+                        in_i= s->matrix_ch[out_i][1+j];
+                        v+= ((double*)in->ch[in_i])[i] * s->matrix[out_i][in_i];
+                    }
+                    ((double*)out->ch[out_i])[i]= v;
+                }
+            }else{
+                for(i=0; i<len; i++){
+                    int v=0;
+                    for(j=0; j<s->matrix_ch[out_i][0]; j++){
+                        in_i= s->matrix_ch[out_i][1+j];
+                        v+= ((int16_t*)in->ch[in_i])[i] * s->matrix32[out_i][in_i];
+                    }
+                    ((int16_t*)out->ch[out_i])[i]= (v + 16384)>>15;
+                }
+            }
+        }
+    }
+    return 0;
+}
diff --git a/libswresample/rematrix_template.c b/libswresample/rematrix_template.c
new file mode 100644
index 0000000000..add65e3155
--- /dev/null
+++ b/libswresample/rematrix_template.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if defined(TEMPLATE_REMATRIX_FLT)
+#    define R(x) x
+#    define SAMPLE float
+#    define COEFF float
+#    define INTER float
+#    define RENAME(x) x ## _float
+#elif defined(TEMPLATE_REMATRIX_DBL)
+#    define R(x) x
+#    define SAMPLE double
+#    define COEFF double
+#    define INTER double
+#    define RENAME(x) x ## _double
+#elif defined(TEMPLATE_REMATRIX_S16)
+#    define SAMPLE int16_t
+#    define COEFF int
+#    define INTER int
+#  ifdef TEMPLATE_CLIP
+#    define R(x) av_clip_int16(((x) + 16384)>>15)
+#    define RENAME(x) x ## _clip_s16
+#  else
+#    define R(x) (((x) + 16384)>>15)
+#    define RENAME(x) x ## _s16
+#  endif
+#elif defined(TEMPLATE_REMATRIX_S32)
+#    define R(x) (((x) + 16384)>>15)
+#    define SAMPLE int32_t
+#    define COEFF int
+#    define INTER int64_t
+#    define RENAME(x) x ## _s32
+#endif
+
+typedef void (RENAME(mix_any_func_type))(SAMPLE **out, const SAMPLE **in1, COEFF *coeffp, integer len);
+
+static void RENAME(sum2)(SAMPLE *out, const SAMPLE *in1, const SAMPLE *in2, COEFF *coeffp, integer index1, integer index2, integer len){
+    int i;
+    INTER coeff1 = coeffp[index1];
+    INTER coeff2 = coeffp[index2];
+
+    for(i=0; i<len; i++)
+        out[i] = R(coeff1*in1[i] + coeff2*in2[i]);
+}
+
+static void RENAME(copy)(SAMPLE *out, const SAMPLE *in, COEFF *coeffp, integer index, integer len){
+    int i;
+    INTER coeff = coeffp[index];
+    for(i=0; i<len; i++)
+        out[i] = R(coeff*in[i]);
+}
+
+static void RENAME(mix6to2)(SAMPLE **out, const SAMPLE **in, COEFF *coeffp, integer len){
+    int i;
+
+    for(i=0; i<len; i++) {
+        INTER t = in[2][i]*(INTER)coeffp[0*6+2] + in[3][i]*(INTER)coeffp[0*6+3];
+        out[0][i] = R(t + in[0][i]*(INTER)coeffp[0*6+0] + in[4][i]*(INTER)coeffp[0*6+4]);
+        out[1][i] = R(t + in[1][i]*(INTER)coeffp[1*6+1] + in[5][i]*(INTER)coeffp[1*6+5]);
+    }
+}
+
+static void RENAME(mix8to2)(SAMPLE **out, const SAMPLE **in, COEFF *coeffp, integer len){
+    int i;
+
+    for(i=0; i<len; i++) {
+        INTER t = in[2][i]*(INTER)coeffp[0*8+2] + in[3][i]*(INTER)coeffp[0*8+3];
+        out[0][i] = R(t + in[0][i]*(INTER)coeffp[0*8+0] + in[4][i]*(INTER)coeffp[0*8+4] + in[6][i]*(INTER)coeffp[0*8+6]);
+        out[1][i] = R(t + in[1][i]*(INTER)coeffp[1*8+1] + in[5][i]*(INTER)coeffp[1*8+5] + in[7][i]*(INTER)coeffp[1*8+7]);
+    }
+}
+
+static RENAME(mix_any_func_type) *RENAME(get_mix_any_func)(SwrContext *s){
+    if(   s->out_ch_layout == AV_CH_LAYOUT_STEREO && (s->in_ch_layout == AV_CH_LAYOUT_5POINT1 || s->in_ch_layout == AV_CH_LAYOUT_5POINT1_BACK)
+       && s->matrix[0][2] == s->matrix[1][2] && s->matrix[0][3] == s->matrix[1][3]
+       && !s->matrix[0][1] && !s->matrix[0][5] && !s->matrix[1][0] && !s->matrix[1][4]
+    )
+        return RENAME(mix6to2);
+
+    if(   s->out_ch_layout == AV_CH_LAYOUT_STEREO && s->in_ch_layout == AV_CH_LAYOUT_7POINT1
+       && s->matrix[0][2] == s->matrix[1][2] && s->matrix[0][3] == s->matrix[1][3]
+       && !s->matrix[0][1] && !s->matrix[0][5] && !s->matrix[1][0] && !s->matrix[1][4]
+       && !s->matrix[0][7] && !s->matrix[1][6]
+    )
+        return RENAME(mix8to2);
+
+    return NULL;
+}
+
+#undef R
+#undef SAMPLE
+#undef COEFF
+#undef INTER
+#undef RENAME
diff --git a/libswresample/resample.c b/libswresample/resample.c
new file mode 100644
index 0000000000..b834248167
--- /dev/null
+++ b/libswresample/resample.c
@@ -0,0 +1,638 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ * bessel function: Copyright (c) 2006 Xiaogang Zhang
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/avassert.h"
+#include "resample.h"
+
+static inline double eval_poly(const double *coeff, int size, double x) {
+    double sum = coeff[size-1];
+    int i;
+    for (i = size-2; i >= 0; --i) {
+        sum *= x;
+        sum += coeff[i];
+    }
+    return sum;
+}
+
+/**
+ * 0th order modified bessel function of the first kind.
+ * Algorithm taken from the Boost project, source:
+ * https://searchcode.com/codesearch/view/14918379/
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0 (see notice below).
+ * Boost Software License - Version 1.0 - August 17th, 2003
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+ */
+
+static double bessel(double x) {
+// Modified Bessel function of the first kind of order zero
+// minimax rational approximations on intervals, see
+// Blair and Edwards, Chalk River Report AECL-4928, 1974
+    static const double p1[] = {
+        -2.2335582639474375249e+15,
+        -5.5050369673018427753e+14,
+        -3.2940087627407749166e+13,
+        -8.4925101247114157499e+11,
+        -1.1912746104985237192e+10,
+        -1.0313066708737980747e+08,
+        -5.9545626019847898221e+05,
+        -2.4125195876041896775e+03,
+        -7.0935347449210549190e+00,
+        -1.5453977791786851041e-02,
+        -2.5172644670688975051e-05,
+        -3.0517226450451067446e-08,
+        -2.6843448573468483278e-11,
+        -1.5982226675653184646e-14,
+        -5.2487866627945699800e-18,
+    };
+    static const double q1[] = {
+        -2.2335582639474375245e+15,
+         7.8858692566751002988e+12,
+        -1.2207067397808979846e+10,
+         1.0377081058062166144e+07,
+        -4.8527560179962773045e+03,
+         1.0,
+    };
+    static const double p2[] = {
+        -2.2210262233306573296e-04,
+         1.3067392038106924055e-02,
+        -4.4700805721174453923e-01,
+         5.5674518371240761397e+00,
+        -2.3517945679239481621e+01,
+         3.1611322818701131207e+01,
+        -9.6090021968656180000e+00,
+    };
+    static const double q2[] = {
+        -5.5194330231005480228e-04,
+         3.2547697594819615062e-02,
+        -1.1151759188741312645e+00,
+         1.3982595353892851542e+01,
+        -6.0228002066743340583e+01,
+         8.5539563258012929600e+01,
+        -3.1446690275135491500e+01,
+        1.0,
+    };
+    double y, r, factor;
+    if (x == 0)
+        return 1.0;
+    x = fabs(x);
+    if (x <= 15) {
+        y = x * x;
+        return eval_poly(p1, FF_ARRAY_ELEMS(p1), y) / eval_poly(q1, FF_ARRAY_ELEMS(q1), y);
+    }
+    else {
+        y = 1 / x - 1.0 / 15;
+        r = eval_poly(p2, FF_ARRAY_ELEMS(p2), y) / eval_poly(q2, FF_ARRAY_ELEMS(q2), y);
+        factor = exp(x) / sqrt(x);
+        return factor * r;
+    }
+}
+
+/**
+ * builds a polyphase filterbank.
+ * @param factor resampling factor
+ * @param scale wanted sum of coefficients for each filter
+ * @param filter_type  filter type
+ * @param kaiser_beta  kaiser window beta
+ * @return 0 on success, negative on error
+ */
+static int build_filter(ResampleContext *c, void *filter, double factor, int tap_count, int alloc, int phase_count, int scale,
+                        int filter_type, double kaiser_beta){
+    int ph, i;
+    int ph_nb = phase_count % 2 ? phase_count : phase_count / 2 + 1;
+    double x, y, w, t, s;
+    double *tab = av_malloc_array(tap_count+1,  sizeof(*tab));
+    double *sin_lut = av_malloc_array(ph_nb, sizeof(*sin_lut));
+    const int center= (tap_count-1)/2;
+
+    if (!tab || !sin_lut)
+        goto fail;
+
+    /* if upsampling, only need to interpolate, no filter */
+    if (factor > 1.0)
+        factor = 1.0;
+
+    if (factor == 1.0) {
+        for (ph = 0; ph < ph_nb; ph++)
+            sin_lut[ph] = sin(M_PI * ph / phase_count);
+    }
+    for(ph = 0; ph < ph_nb; ph++) {
+        double norm = 0;
+        s = sin_lut[ph];
+        for(i=0;i<=tap_count;i++) {
+            x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
+            if (x == 0) y = 1.0;
+            else if (factor == 1.0)
+                y = s / x;
+            else
+                y = sin(x) / x;
+            switch(filter_type){
+            case SWR_FILTER_TYPE_CUBIC:{
+                const float d= -0.5; //first order derivative = -0.5
+                x = fabs(((double)(i - center) - (double)ph / phase_count) * factor);
+                if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
+                else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
+                break;}
+            case SWR_FILTER_TYPE_BLACKMAN_NUTTALL:
+                w = 2.0*x / (factor*tap_count);
+                t = -cos(w);
+                y *= 0.3635819 - 0.4891775 * t + 0.1365995 * (2*t*t-1) - 0.0106411 * (4*t*t*t - 3*t);
+                break;
+            case SWR_FILTER_TYPE_KAISER:
+                w = 2.0*x / (factor*tap_count*M_PI);
+                y *= bessel(kaiser_beta*sqrt(FFMAX(1-w*w, 0)));
+                break;
+            default:
+                av_assert0(0);
+            }
+
+            tab[i] = y;
+            s = -s;
+            if (i < tap_count)
+                norm += y;
+        }
+
+        /* normalize so that an uniform color remains the same */
+        switch(c->format){
+        case AV_SAMPLE_FMT_S16P:
+            for(i=0;i<tap_count;i++)
+                ((int16_t*)filter)[ph * alloc + i] = av_clip_int16(lrintf(tab[i] * scale / norm));
+            if (phase_count % 2) break;
+            if (tap_count % 2 == 0 || tap_count == 1) {
+                for (i = 0; i < tap_count; i++)
+                    ((int16_t*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((int16_t*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((int16_t*)filter)[(phase_count-ph) * alloc + tap_count-i] =
+                        av_clip_int16(lrintf(tab[i] * scale / (norm - tab[0] + tab[tap_count])));
+            }
+            break;
+        case AV_SAMPLE_FMT_S32P:
+            for(i=0;i<tap_count;i++)
+                ((int32_t*)filter)[ph * alloc + i] = av_clipl_int32(llrint(tab[i] * scale / norm));
+            if (phase_count % 2) break;
+            if (tap_count % 2 == 0 || tap_count == 1) {
+                for (i = 0; i < tap_count; i++)
+                    ((int32_t*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((int32_t*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((int32_t*)filter)[(phase_count-ph) * alloc + tap_count-i] =
+                        av_clipl_int32(llrint(tab[i] * scale / (norm - tab[0] + tab[tap_count])));
+            }
+            break;
+        case AV_SAMPLE_FMT_FLTP:
+            for(i=0;i<tap_count;i++)
+                ((float*)filter)[ph * alloc + i] = tab[i] * scale / norm;
+            if (phase_count % 2) break;
+            if (tap_count % 2 == 0 || tap_count == 1) {
+                for (i = 0; i < tap_count; i++)
+                    ((float*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((float*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((float*)filter)[(phase_count-ph) * alloc + tap_count-i] = tab[i] * scale / (norm - tab[0] + tab[tap_count]);
+            }
+            break;
+        case AV_SAMPLE_FMT_DBLP:
+            for(i=0;i<tap_count;i++)
+                ((double*)filter)[ph * alloc + i] = tab[i] * scale / norm;
+            if (phase_count % 2) break;
+            if (tap_count % 2 == 0 || tap_count == 1) {
+                for (i = 0; i < tap_count; i++)
+                    ((double*)filter)[(phase_count-ph) * alloc + tap_count-1-i] = ((double*)filter)[ph * alloc + i];
+            }
+            else {
+                for (i = 1; i <= tap_count; i++)
+                    ((double*)filter)[(phase_count-ph) * alloc + tap_count-i] = tab[i] * scale / (norm - tab[0] + tab[tap_count]);
+            }
+            break;
+        }
+    }
+#if 0
+    {
+#define LEN 1024
+        int j,k;
+        double sine[LEN + tap_count];
+        double filtered[LEN];
+        double maxff=-2, minff=2, maxsf=-2, minsf=2;
+        for(i=0; i<LEN; i++){
+            double ss=0, sf=0, ff=0;
+            for(j=0; j<LEN+tap_count; j++)
+                sine[j]= cos(i*j*M_PI/LEN);
+            for(j=0; j<LEN; j++){
+                double sum=0;
+                ph=0;
+                for(k=0; k<tap_count; k++)
+                    sum += filter[ph * tap_count + k] * sine[k+j];
+                filtered[j]= sum / (1<<FILTER_SHIFT);
+                ss+= sine[j + center] * sine[j + center];
+                ff+= filtered[j] * filtered[j];
+                sf+= sine[j + center] * filtered[j];
+            }
+            ss= sqrt(2*ss/LEN);
+            ff= sqrt(2*ff/LEN);
+            sf= 2*sf/LEN;
+            maxff= FFMAX(maxff, ff);
+            minff= FFMIN(minff, ff);
+            maxsf= FFMAX(maxsf, sf);
+            minsf= FFMIN(minsf, sf);
+            if(i%11==0){
+                av_log(NULL, AV_LOG_ERROR, "i:%4d ss:%f ff:%13.6e-%13.6e sf:%13.6e-%13.6e\n", i, ss, maxff, minff, maxsf, minsf);
+                minff=minsf= 2;
+                maxff=maxsf= -2;
+            }
+        }
+    }
+#endif
+
+fail:
+    av_free(tab);
+    av_free(sin_lut);
+    return 0;
+}
+
+static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
+                                    double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta,
+                                    double precision, int cheby, int exact_rational)
+{
+    double cutoff = cutoff0? cutoff0 : 0.97;
+    double factor= FFMIN(out_rate * cutoff / in_rate, 1.0);
+    int phase_count= 1<<phase_shift;
+    int phase_count_compensation = phase_count;
+
+    if (exact_rational) {
+        int phase_count_exact, phase_count_exact_den;
+
+        av_reduce(&phase_count_exact, &phase_count_exact_den, out_rate, in_rate, INT_MAX);
+        if (phase_count_exact <= phase_count) {
+            phase_count_compensation = phase_count_exact * (phase_count / phase_count_exact);
+            phase_count = phase_count_exact;
+        }
+    }
+
+    if (!c || c->phase_count != phase_count || c->linear!=linear || c->factor != factor
+           || c->filter_length != FFMAX((int)ceil(filter_size/factor), 1) || c->format != format
+           || c->filter_type != filter_type || c->kaiser_beta != kaiser_beta) {
+        c = av_mallocz(sizeof(*c));
+        if (!c)
+            return NULL;
+
+        c->format= format;
+
+        c->felem_size= av_get_bytes_per_sample(c->format);
+
+        switch(c->format){
+        case AV_SAMPLE_FMT_S16P:
+            c->filter_shift = 15;
+            break;
+        case AV_SAMPLE_FMT_S32P:
+            c->filter_shift = 30;
+            break;
+        case AV_SAMPLE_FMT_FLTP:
+        case AV_SAMPLE_FMT_DBLP:
+            c->filter_shift = 0;
+            break;
+        default:
+            av_log(NULL, AV_LOG_ERROR, "Unsupported sample format\n");
+            av_assert0(0);
+        }
+
+        if (filter_size/factor > INT32_MAX/256) {
+            av_log(NULL, AV_LOG_ERROR, "Filter length too large\n");
+            goto error;
+        }
+
+        c->phase_count   = phase_count;
+        c->linear        = linear;
+        c->factor        = factor;
+        c->filter_length = FFMAX((int)ceil(filter_size/factor), 1);
+        c->filter_alloc  = FFALIGN(c->filter_length, 8);
+        c->filter_bank   = av_calloc(c->filter_alloc, (phase_count+1)*c->felem_size);
+        c->filter_type   = filter_type;
+        c->kaiser_beta   = kaiser_beta;
+        c->phase_count_compensation = phase_count_compensation;
+        if (!c->filter_bank)
+            goto error;
+        if (build_filter(c, (void*)c->filter_bank, factor, c->filter_length, c->filter_alloc, phase_count, 1<<c->filter_shift, filter_type, kaiser_beta))
+            goto error;
+        memcpy(c->filter_bank + (c->filter_alloc*phase_count+1)*c->felem_size, c->filter_bank, (c->filter_alloc-1)*c->felem_size);
+        memcpy(c->filter_bank + (c->filter_alloc*phase_count  )*c->felem_size, c->filter_bank + (c->filter_alloc - 1)*c->felem_size, c->felem_size);
+    }
+
+    c->compensation_distance= 0;
+    if(!av_reduce(&c->src_incr, &c->dst_incr, out_rate, in_rate * (int64_t)phase_count, INT32_MAX/2))
+        goto error;
+    while (c->dst_incr < (1<<20) && c->src_incr < (1<<20)) {
+        c->dst_incr *= 2;
+        c->src_incr *= 2;
+    }
+    c->ideal_dst_incr = c->dst_incr;
+    c->dst_incr_div   = c->dst_incr / c->src_incr;
+    c->dst_incr_mod   = c->dst_incr % c->src_incr;
+
+    c->index= -phase_count*((c->filter_length-1)/2);
+    c->frac= 0;
+
+    swri_resample_dsp_init(c);
+
+    return c;
+error:
+    av_freep(&c->filter_bank);
+    av_free(c);
+    return NULL;
+}
+
+static void resample_free(ResampleContext **c){
+    if(!*c)
+        return;
+    av_freep(&(*c)->filter_bank);
+    av_freep(c);
+}
+
+static int rebuild_filter_bank_with_compensation(ResampleContext *c)
+{
+    uint8_t *new_filter_bank;
+    int new_src_incr, new_dst_incr;
+    int phase_count = c->phase_count_compensation;
+    int ret;
+
+    if (phase_count == c->phase_count)
+        return 0;
+
+    av_assert0(!c->frac && !c->dst_incr_mod && !c->compensation_distance);
+
+    new_filter_bank = av_calloc(c->filter_alloc, (phase_count + 1) * c->felem_size);
+    if (!new_filter_bank)
+        return AVERROR(ENOMEM);
+
+    ret = build_filter(c, new_filter_bank, c->factor, c->filter_length, c->filter_alloc,
+                       phase_count, 1 << c->filter_shift, c->filter_type, c->kaiser_beta);
+    if (ret < 0) {
+        av_freep(&new_filter_bank);
+        return ret;
+    }
+    memcpy(new_filter_bank + (c->filter_alloc*phase_count+1)*c->felem_size, new_filter_bank, (c->filter_alloc-1)*c->felem_size);
+    memcpy(new_filter_bank + (c->filter_alloc*phase_count  )*c->felem_size, new_filter_bank + (c->filter_alloc - 1)*c->felem_size, c->felem_size);
+
+    if (!av_reduce(&new_src_incr, &new_dst_incr, c->src_incr,
+                   c->dst_incr * (int64_t)(phase_count/c->phase_count), INT32_MAX/2))
+    {
+        av_freep(&new_filter_bank);
+        return AVERROR(EINVAL);
+    }
+
+    c->src_incr = new_src_incr;
+    c->dst_incr = new_dst_incr;
+    while (c->dst_incr < (1<<20) && c->src_incr < (1<<20)) {
+        c->dst_incr *= 2;
+        c->src_incr *= 2;
+    }
+    c->ideal_dst_incr = c->dst_incr;
+    c->dst_incr_div   = c->dst_incr / c->src_incr;
+    c->dst_incr_mod   = c->dst_incr % c->src_incr;
+    c->index         *= phase_count / c->phase_count;
+    c->phase_count    = phase_count;
+    av_freep(&c->filter_bank);
+    c->filter_bank = new_filter_bank;
+    return 0;
+}
+
+static int set_compensation(ResampleContext *c, int sample_delta, int compensation_distance){
+    int ret;
+
+    if (compensation_distance) {
+        ret = rebuild_filter_bank_with_compensation(c);
+        if (ret < 0)
+            return ret;
+    }
+
+    c->compensation_distance= compensation_distance;
+    if (compensation_distance)
+        c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance;
+    else
+        c->dst_incr = c->ideal_dst_incr;
+
+    c->dst_incr_div   = c->dst_incr / c->src_incr;
+    c->dst_incr_mod   = c->dst_incr % c->src_incr;
+
+    return 0;
+}
+
+static int swri_resample(ResampleContext *c,
+                         uint8_t *dst, const uint8_t *src, int *consumed,
+                         int src_size, int dst_size, int update_ctx)
+{
+    if (c->filter_length == 1 && c->phase_count == 1) {
+        int index= c->index;
+        int frac= c->frac;
+        int64_t index2= (1LL<<32)*c->frac/c->src_incr + (1LL<<32)*index;
+        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
+        int new_size = (src_size * (int64_t)c->src_incr - frac + c->dst_incr - 1) / c->dst_incr;
+
+        dst_size= FFMIN(dst_size, new_size);
+        c->dsp.resample_one(dst, src, dst_size, index2, incr);
+
+        index += dst_size * c->dst_incr_div;
+        index += (frac + dst_size * (int64_t)c->dst_incr_mod) / c->src_incr;
+        av_assert2(index >= 0);
+        *consumed= index;
+        if (update_ctx) {
+            c->frac   = (frac + dst_size * (int64_t)c->dst_incr_mod) % c->src_incr;
+            c->index = 0;
+        }
+    } else {
+        int64_t end_index = (1LL + src_size - c->filter_length) * c->phase_count;
+        int64_t delta_frac = (end_index - c->index) * c->src_incr - c->frac;
+        int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr;
+
+        dst_size = FFMIN(dst_size, delta_n);
+        if (dst_size > 0) {
+            *consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx);
+        } else {
+            *consumed = 0;
+        }
+    }
+
+    return dst_size;
+}
+
+static int multiple_resample(ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed){
+    int i, ret= -1;
+    int av_unused mm_flags = av_get_cpu_flags();
+    int need_emms = c->format == AV_SAMPLE_FMT_S16P && ARCH_X86_32 &&
+                    (mm_flags & (AV_CPU_FLAG_MMX2 | AV_CPU_FLAG_SSE2)) == AV_CPU_FLAG_MMX2;
+    int64_t max_src_size = (INT64_MAX/2 / c->phase_count) / c->src_incr;
+
+    if (c->compensation_distance)
+        dst_size = FFMIN(dst_size, c->compensation_distance);
+    src_size = FFMIN(src_size, max_src_size);
+
+    for(i=0; i<dst->ch_count; i++){
+        ret= swri_resample(c, dst->ch[i], src->ch[i],
+                           consumed, src_size, dst_size, i+1==dst->ch_count);
+    }
+    if(need_emms)
+        emms_c();
+
+    if (c->compensation_distance) {
+        c->compensation_distance -= ret;
+        if (!c->compensation_distance) {
+            c->dst_incr     = c->ideal_dst_incr;
+            c->dst_incr_div = c->dst_incr / c->src_incr;
+            c->dst_incr_mod = c->dst_incr % c->src_incr;
+        }
+    }
+
+    return ret;
+}
+
+static int64_t get_delay(struct SwrContext *s, int64_t base){
+    ResampleContext *c = s->resample;
+    int64_t num = s->in_buffer_count - (c->filter_length-1)/2;
+    num *= c->phase_count;
+    num -= c->index;
+    num *= c->src_incr;
+    num -= c->frac;
+    return av_rescale(num, base, s->in_sample_rate*(int64_t)c->src_incr * c->phase_count);
+}
+
+static int64_t get_out_samples(struct SwrContext *s, int in_samples) {
+    ResampleContext *c = s->resample;
+    // The + 2 are added to allow implementations to be slightly inaccurate, they should not be needed currently.
+    // They also make it easier to proof that changes and optimizations do not
+    // break the upper bound.
+    int64_t num = s->in_buffer_count + 2LL + in_samples;
+    num *= c->phase_count;
+    num -= c->index;
+    num = av_rescale_rnd(num, s->out_sample_rate, ((int64_t)s->in_sample_rate) * c->phase_count, AV_ROUND_UP) + 2;
+
+    if (c->compensation_distance) {
+        if (num > INT_MAX)
+            return AVERROR(EINVAL);
+
+        num = FFMAX(num, (num * c->ideal_dst_incr - 1) / c->dst_incr + 1);
+    }
+    return num;
+}
+
+static int resample_flush(struct SwrContext *s) {
+    AudioData *a= &s->in_buffer;
+    int i, j, ret;
+    if((ret = swri_realloc_audio(a, s->in_buffer_index + 2*s->in_buffer_count)) < 0)
+        return ret;
+    av_assert0(a->planar);
+    for(i=0; i<a->ch_count; i++){
+        for(j=0; j<s->in_buffer_count; j++){
+            memcpy(a->ch[i] + (s->in_buffer_index+s->in_buffer_count+j  )*a->bps,
+                a->ch[i] + (s->in_buffer_index+s->in_buffer_count-j-1)*a->bps, a->bps);
+        }
+    }
+    s->in_buffer_count += (s->in_buffer_count+1)/2;
+    return 0;
+}
+
+// in fact the whole handle multiple ridiculously small buffers might need more thinking...
+static int invert_initial_buffer(ResampleContext *c, AudioData *dst, const AudioData *src,
+                                 int in_count, int *out_idx, int *out_sz)
+{
+    int n, ch, num = FFMIN(in_count + *out_sz, c->filter_length + 1), res;
+
+    if (c->index >= 0)
+        return 0;
+
+    if ((res = swri_realloc_audio(dst, c->filter_length * 2 + 1)) < 0)
+        return res;
+
+    // copy
+    for (n = *out_sz; n < num; n++) {
+        for (ch = 0; ch < src->ch_count; ch++) {
+            memcpy(dst->ch[ch] + ((c->filter_length + n) * c->felem_size),
+                   src->ch[ch] + ((n - *out_sz) * c->felem_size), c->felem_size);
+        }
+    }
+
+    // if not enough data is in, return and wait for more
+    if (num < c->filter_length + 1) {
+        *out_sz = num;
+        *out_idx = c->filter_length;
+        return INT_MAX;
+    }
+
+    // else invert
+    for (n = 1; n <= c->filter_length; n++) {
+        for (ch = 0; ch < src->ch_count; ch++) {
+            memcpy(dst->ch[ch] + ((c->filter_length - n) * c->felem_size),
+                   dst->ch[ch] + ((c->filter_length + n) * c->felem_size),
+                   c->felem_size);
+        }
+    }
+
+    res = num - *out_sz;
+    *out_idx = c->filter_length;
+    while (c->index < 0) {
+        --*out_idx;
+        c->index += c->phase_count;
+    }
+    *out_sz = FFMAX(*out_sz + c->filter_length,
+                    1 + c->filter_length * 2) - *out_idx;
+
+    return FFMAX(res, 0);
+}
+
+struct Resampler const swri_resampler={
+  resample_init,
+  resample_free,
+  multiple_resample,
+  resample_flush,
+  set_compensation,
+  get_delay,
+  invert_initial_buffer,
+  get_out_samples,
+};
diff --git a/libswresample/resample.h b/libswresample/resample.h
new file mode 100644
index 0000000000..7fe9b97f7f
--- /dev/null
+++ b/libswresample/resample.h
@@ -0,0 +1,65 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWRESAMPLE_RESAMPLE_H
+#define SWRESAMPLE_RESAMPLE_H
+
+#include "libavutil/log.h"
+#include "libavutil/samplefmt.h"
+
+#include "swresample_internal.h"
+
+typedef struct ResampleContext {
+    const AVClass *av_class;
+    uint8_t *filter_bank;
+    int filter_length;
+    int filter_alloc;
+    int ideal_dst_incr;
+    int dst_incr;
+    int dst_incr_div;
+    int dst_incr_mod;
+    int index;
+    int frac;
+    int src_incr;
+    int compensation_distance;
+    int phase_count;
+    int linear;
+    enum SwrFilterType filter_type;
+    double kaiser_beta;
+    double factor;
+    enum AVSampleFormat format;
+    int felem_size;
+    int filter_shift;
+    int phase_count_compensation;      /* desired phase_count when compensation is enabled */
+
+    struct {
+        void (*resample_one)(void *dst, const void *src,
+                             int n, int64_t index, int64_t incr);
+        int (*resample)(struct ResampleContext *c, void *dst,
+                        const void *src, int n, int update_ctx);
+    } dsp;
+} ResampleContext;
+
+void swri_resample_dsp_init(ResampleContext *c);
+void swri_resample_dsp_x86_init(ResampleContext *c);
+void swri_resample_dsp_arm_init(ResampleContext *c);
+
+#endif /* SWRESAMPLE_RESAMPLE_H */
diff --git a/libswresample/resample_dsp.c b/libswresample/resample_dsp.c
new file mode 100644
index 0000000000..41369f3f8a
--- /dev/null
+++ b/libswresample/resample_dsp.c
@@ -0,0 +1,69 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "resample.h"
+
+#define TEMPLATE_RESAMPLE_S16
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_S16
+
+#define TEMPLATE_RESAMPLE_S32
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_S32
+
+#define TEMPLATE_RESAMPLE_FLT
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_FLT
+
+#define TEMPLATE_RESAMPLE_DBL
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_DBL
+
+void swri_resample_dsp_init(ResampleContext *c)
+{
+    switch(c->format){
+    case AV_SAMPLE_FMT_S16P:
+        c->dsp.resample_one = resample_one_int16;
+        c->dsp.resample     = c->linear ? resample_linear_int16 : resample_common_int16;
+        break;
+    case AV_SAMPLE_FMT_S32P:
+        c->dsp.resample_one = resample_one_int32;
+        c->dsp.resample     = c->linear ? resample_linear_int32 : resample_common_int32;
+        break;
+    case AV_SAMPLE_FMT_FLTP:
+        c->dsp.resample_one = resample_one_float;
+        c->dsp.resample     = c->linear ? resample_linear_float : resample_common_float;
+        break;
+    case AV_SAMPLE_FMT_DBLP:
+        c->dsp.resample_one = resample_one_double;
+        c->dsp.resample     = c->linear ? resample_linear_double : resample_common_double;
+        break;
+    }
+
+    if (ARCH_X86) swri_resample_dsp_x86_init(c);
+    else if (ARCH_ARM) swri_resample_dsp_arm_init(c);
+}
diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c
new file mode 100644
index 0000000000..1636f4e95d
--- /dev/null
+++ b/libswresample/resample_template.c
@@ -0,0 +1,201 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#if defined(TEMPLATE_RESAMPLE_DBL)
+
+#    define RENAME(N) N ## _double
+#    define FILTER_SHIFT 0
+#    define DELEM  double
+#    define FELEM  double
+#    define FELEM2 double
+#    define OUT(d, v) d = v
+
+#elif    defined(TEMPLATE_RESAMPLE_FLT)
+
+#    define RENAME(N) N ## _float
+#    define FILTER_SHIFT 0
+#    define DELEM  float
+#    define FELEM  float
+#    define FELEM2 float
+#    define OUT(d, v) d = v
+
+#elif defined(TEMPLATE_RESAMPLE_S32)
+
+#    define RENAME(N) N ## _int32
+#    define FILTER_SHIFT 30
+#    define DELEM  int32_t
+#    define FELEM  int32_t
+#    define FELEM2 int64_t
+#    define FELEM_MAX INT32_MAX
+#    define FELEM_MIN INT32_MIN
+#    define OUT(d, v) (v) = ((v) + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\
+                      (d) = av_clipl_int32(v)
+
+#elif    defined(TEMPLATE_RESAMPLE_S16)
+
+#    define RENAME(N) N ## _int16
+#    define FILTER_SHIFT 15
+#    define DELEM  int16_t
+#    define FELEM  int16_t
+#    define FELEM2 int32_t
+#    define FELEML int64_t
+#    define FELEM_MAX INT16_MAX
+#    define FELEM_MIN INT16_MIN
+#    define OUT(d, v) (v) = ((v) + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\
+                      (d) = av_clip_int16(v)
+
+#endif
+
+static void RENAME(resample_one)(void *dest, const void *source,
+                                 int dst_size, int64_t index2, int64_t incr)
+{
+    DELEM *dst = dest;
+    const DELEM *src = source;
+    int dst_index;
+
+    for (dst_index = 0; dst_index < dst_size; dst_index++) {
+        dst[dst_index] = src[index2 >> 32];
+        index2 += incr;
+    }
+}
+
+static int RENAME(resample_common)(ResampleContext *c,
+                                   void *dest, const void *source,
+                                   int n, int update_ctx)
+{
+    DELEM *dst = dest;
+    const DELEM *src = source;
+    int dst_index;
+    int index= c->index;
+    int frac= c->frac;
+    int sample_index = 0;
+
+    while (index >= c->phase_count) {
+        sample_index++;
+        index -= c->phase_count;
+    }
+
+    for (dst_index = 0; dst_index < n; dst_index++) {
+        FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
+
+        FELEM2 val=0;
+        int i;
+        for (i = 0; i < c->filter_length; i++) {
+            val += src[sample_index + i] * (FELEM2)filter[i];
+        }
+        OUT(dst[dst_index], val);
+
+        frac  += c->dst_incr_mod;
+        index += c->dst_incr_div;
+        if (frac >= c->src_incr) {
+            frac -= c->src_incr;
+            index++;
+        }
+
+        while (index >= c->phase_count) {
+            sample_index++;
+            index -= c->phase_count;
+        }
+    }
+
+    if(update_ctx){
+        c->frac= frac;
+        c->index= index;
+    }
+
+    return sample_index;
+}
+
+static int RENAME(resample_linear)(ResampleContext *c,
+                                   void *dest, const void *source,
+                                   int n, int update_ctx)
+{
+    DELEM *dst = dest;
+    const DELEM *src = source;
+    int dst_index;
+    int index= c->index;
+    int frac= c->frac;
+    int sample_index = 0;
+#if FILTER_SHIFT == 0
+    double inv_src_incr = 1.0 / c->src_incr;
+#endif
+
+    while (index >= c->phase_count) {
+        sample_index++;
+        index -= c->phase_count;
+    }
+
+    for (dst_index = 0; dst_index < n; dst_index++) {
+        FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index;
+        FELEM2 val=0, v2 = 0;
+
+        int i;
+        for (i = 0; i < c->filter_length; i++) {
+            val += src[sample_index + i] * (FELEM2)filter[i];
+            v2  += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc];
+        }
+#ifdef FELEML
+        val += (v2 - val) * (FELEML) frac / c->src_incr;
+#else
+#    if FILTER_SHIFT == 0
+        val += (v2 - val) * inv_src_incr * frac;
+#    else
+        val += (v2 - val) / c->src_incr * frac;
+#    endif
+#endif
+        OUT(dst[dst_index], val);
+
+        frac += c->dst_incr_mod;
+        index += c->dst_incr_div;
+        if (frac >= c->src_incr) {
+            frac -= c->src_incr;
+            index++;
+        }
+
+        while (index >= c->phase_count) {
+            sample_index++;
+            index -= c->phase_count;
+        }
+    }
+
+    if(update_ctx){
+        c->frac= frac;
+        c->index= index;
+    }
+
+    return sample_index;
+}
+
+#undef RENAME
+#undef FILTER_SHIFT
+#undef DELEM
+#undef FELEM
+#undef FELEM2
+#undef FELEML
+#undef FELEM_MAX
+#undef FELEM_MIN
+#undef OUT
diff --git a/libswresample/soxr_resample.c b/libswresample/soxr_resample.c
new file mode 100644
index 0000000000..b9c6735028
--- /dev/null
+++ b/libswresample/soxr_resample.c
@@ -0,0 +1,130 @@
+/*
+ * audio resampling with soxr
+ * Copyright (c) 2012 Rob Sykes <robs@users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling with soxr
+ */
+
+#include "libavutil/log.h"
+#include "swresample_internal.h"
+
+#include <soxr.h>
+
+static struct ResampleContext *create(struct ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
+        double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, double precision, int cheby, int exact_rational){
+    soxr_error_t error;
+
+    soxr_datatype_t type =
+        format == AV_SAMPLE_FMT_S16P? SOXR_INT16_S :
+        format == AV_SAMPLE_FMT_S16 ? SOXR_INT16_I :
+        format == AV_SAMPLE_FMT_S32P? SOXR_INT32_S :
+        format == AV_SAMPLE_FMT_S32 ? SOXR_INT32_I :
+        format == AV_SAMPLE_FMT_FLTP? SOXR_FLOAT32_S :
+        format == AV_SAMPLE_FMT_FLT ? SOXR_FLOAT32_I :
+        format == AV_SAMPLE_FMT_DBLP? SOXR_FLOAT64_S :
+        format == AV_SAMPLE_FMT_DBL ? SOXR_FLOAT64_I : (soxr_datatype_t)-1;
+
+    soxr_io_spec_t io_spec = soxr_io_spec(type, type);
+
+    soxr_quality_spec_t q_spec = soxr_quality_spec((int)((precision-2)/4), (SOXR_HI_PREC_CLOCK|SOXR_ROLLOFF_NONE)*!!cheby);
+    q_spec.precision = linear? 0 : precision;
+#if !defined SOXR_VERSION /* Deprecated @ March 2013: */
+    q_spec.bw_pc = cutoff? FFMAX(FFMIN(cutoff,.995),.8)*100 : q_spec.bw_pc;
+#else
+    q_spec.passband_end = cutoff? FFMAX(FFMIN(cutoff,.995),.8) : q_spec.passband_end;
+#endif
+
+    soxr_delete((soxr_t)c);
+    c = (struct ResampleContext *)
+        soxr_create(in_rate, out_rate, 0, &error, &io_spec, &q_spec, 0);
+    if (!c)
+        av_log(NULL, AV_LOG_ERROR, "soxr_create: %s\n", error);
+    return c;
+}
+
+static void destroy(struct ResampleContext * *c){
+    soxr_delete((soxr_t)*c);
+    *c = NULL;
+}
+
+static int flush(struct SwrContext *s){
+    s->delayed_samples_fixup = soxr_delay((soxr_t)s->resample);
+
+    soxr_process((soxr_t)s->resample, NULL, 0, NULL, NULL, 0, NULL);
+
+    {
+        float f;
+        size_t idone, odone;
+        soxr_process((soxr_t)s->resample, &f, 0, &idone, &f, 0, &odone);
+        s->delayed_samples_fixup -= soxr_delay((soxr_t)s->resample);
+    }
+
+    return 0;
+}
+
+static int process(
+        struct ResampleContext * c, AudioData *dst, int dst_size,
+        AudioData *src, int src_size, int *consumed){
+    size_t idone, odone;
+    soxr_error_t error = soxr_set_error((soxr_t)c, soxr_set_num_channels((soxr_t)c, src->ch_count));
+    if (!error)
+        error = soxr_process((soxr_t)c, src->ch, (size_t)src_size,
+                             &idone, dst->ch, (size_t)dst_size, &odone);
+    else
+        idone = 0;
+
+    *consumed = (int)idone;
+    return error? -1 : odone;
+}
+
+static int64_t get_delay(struct SwrContext *s, int64_t base){
+    double delayed_samples = soxr_delay((soxr_t)s->resample);
+    double delay_s;
+
+    if (s->flushed)
+        delayed_samples += s->delayed_samples_fixup;
+
+    delay_s = delayed_samples / s->out_sample_rate;
+
+    return (int64_t)(delay_s * base + .5);
+}
+
+static int invert_initial_buffer(struct ResampleContext *c, AudioData *dst, const AudioData *src,
+                                 int in_count, int *out_idx, int *out_sz){
+    return 0;
+}
+
+static int64_t get_out_samples(struct SwrContext *s, int in_samples){
+    double out_samples = (double)s->out_sample_rate / s->in_sample_rate * in_samples;
+    double delayed_samples = soxr_delay((soxr_t)s->resample);
+
+    if (s->flushed)
+        delayed_samples += s->delayed_samples_fixup;
+
+    return (int64_t)(out_samples + delayed_samples + 1 + .5);
+}
+
+struct Resampler const swri_soxr_resampler={
+    create, destroy, process, flush, NULL /* set_compensation */, get_delay,
+    invert_initial_buffer, get_out_samples
+};
+
diff --git a/libswresample/swresample.c b/libswresample/swresample.c
new file mode 100644
index 0000000000..351623b9d6
--- /dev/null
+++ b/libswresample/swresample.c
@@ -0,0 +1,930 @@
+/*
+ * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/opt.h"
+#include "swresample_internal.h"
+#include "audioconvert.h"
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/internal.h"
+
+#include <float.h>
+
+#define ALIGN 32
+
+#include "libavutil/ffversion.h"
+const char swr_ffversion[] = "FFmpeg version " FFMPEG_VERSION;
+
+unsigned swresample_version(void)
+{
+    av_assert0(LIBSWRESAMPLE_VERSION_MICRO >= 100);
+    return LIBSWRESAMPLE_VERSION_INT;
+}
+
+const char *swresample_configuration(void)
+{
+    return FFMPEG_CONFIGURATION;
+}
+
+const char *swresample_license(void)
+{
+#define LICENSE_PREFIX "libswresample license: "
+    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
+}
+
+int swr_set_channel_mapping(struct SwrContext *s, const int *channel_map){
+    if(!s || s->in_convert) // s needs to be allocated but not initialized
+        return AVERROR(EINVAL);
+    s->channel_map = channel_map;
+    return 0;
+}
+
+struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
+                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
+                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
+                                      int log_offset, void *log_ctx){
+    if(!s) s= swr_alloc();
+    if(!s) return NULL;
+
+    s->log_level_offset= log_offset;
+    s->log_ctx= log_ctx;
+
+    if (av_opt_set_int(s, "ocl", out_ch_layout,   0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "osf", out_sample_fmt,  0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "osr", out_sample_rate, 0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "icl", in_ch_layout,    0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "isf", in_sample_fmt,   0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "isr", in_sample_rate,  0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "tsf", AV_SAMPLE_FMT_NONE,   0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "ich", av_get_channel_layout_nb_channels(s-> user_in_ch_layout), 0) < 0)
+        goto fail;
+
+    if (av_opt_set_int(s, "och", av_get_channel_layout_nb_channels(s->user_out_ch_layout), 0) < 0)
+        goto fail;
+
+    av_opt_set_int(s, "uch", 0, 0);
+    return s;
+fail:
+    av_log(s, AV_LOG_ERROR, "Failed to set option\n");
+    swr_free(&s);
+    return NULL;
+}
+
+static void set_audiodata_fmt(AudioData *a, enum AVSampleFormat fmt){
+    a->fmt   = fmt;
+    a->bps   = av_get_bytes_per_sample(fmt);
+    a->planar= av_sample_fmt_is_planar(fmt);
+    if (a->ch_count == 1)
+        a->planar = 1;
+}
+
+static void free_temp(AudioData *a){
+    av_free(a->data);
+    memset(a, 0, sizeof(*a));
+}
+
+static void clear_context(SwrContext *s){
+    s->in_buffer_index= 0;
+    s->in_buffer_count= 0;
+    s->resample_in_constraint= 0;
+    memset(s->in.ch, 0, sizeof(s->in.ch));
+    memset(s->out.ch, 0, sizeof(s->out.ch));
+    free_temp(&s->postin);
+    free_temp(&s->midbuf);
+    free_temp(&s->preout);
+    free_temp(&s->in_buffer);
+    free_temp(&s->silence);
+    free_temp(&s->drop_temp);
+    free_temp(&s->dither.noise);
+    free_temp(&s->dither.temp);
+    swri_audio_convert_free(&s-> in_convert);
+    swri_audio_convert_free(&s->out_convert);
+    swri_audio_convert_free(&s->full_convert);
+    swri_rematrix_free(s);
+
+    s->delayed_samples_fixup = 0;
+    s->flushed = 0;
+}
+
+av_cold void swr_free(SwrContext **ss){
+    SwrContext *s= *ss;
+    if(s){
+        clear_context(s);
+        if (s->resampler)
+            s->resampler->free(&s->resample);
+    }
+
+    av_freep(ss);
+}
+
+av_cold void swr_close(SwrContext *s){
+    clear_context(s);
+}
+
+av_cold int swr_init(struct SwrContext *s){
+    int ret;
+    char l1[1024], l2[1024];
+
+    clear_context(s);
+
+    if(s-> in_sample_fmt >= AV_SAMPLE_FMT_NB){
+        av_log(s, AV_LOG_ERROR, "Requested input sample format %d is invalid\n", s->in_sample_fmt);
+        return AVERROR(EINVAL);
+    }
+    if(s->out_sample_fmt >= AV_SAMPLE_FMT_NB){
+        av_log(s, AV_LOG_ERROR, "Requested output sample format %d is invalid\n", s->out_sample_fmt);
+        return AVERROR(EINVAL);
+    }
+
+    s->out.ch_count  = s-> user_out_ch_count;
+    s-> in.ch_count  = s->  user_in_ch_count;
+    s->used_ch_count = s->user_used_ch_count;
+
+    s-> in_ch_layout = s-> user_in_ch_layout;
+    s->out_ch_layout = s->user_out_ch_layout;
+
+    s->int_sample_fmt= s->user_int_sample_fmt;
+
+    if(av_get_channel_layout_nb_channels(s-> in_ch_layout) > SWR_CH_MAX) {
+        av_log(s, AV_LOG_WARNING, "Input channel layout 0x%"PRIx64" is invalid or unsupported.\n", s-> in_ch_layout);
+        s->in_ch_layout = 0;
+    }
+
+    if(av_get_channel_layout_nb_channels(s->out_ch_layout) > SWR_CH_MAX) {
+        av_log(s, AV_LOG_WARNING, "Output channel layout 0x%"PRIx64" is invalid or unsupported.\n", s->out_ch_layout);
+        s->out_ch_layout = 0;
+    }
+
+    switch(s->engine){
+#if CONFIG_LIBSOXR
+        case SWR_ENGINE_SOXR: s->resampler = &swri_soxr_resampler; break;
+#endif
+        case SWR_ENGINE_SWR : s->resampler = &swri_resampler; break;
+        default:
+            av_log(s, AV_LOG_ERROR, "Requested resampling engine is unavailable\n");
+            return AVERROR(EINVAL);
+    }
+
+    if(!s->used_ch_count)
+        s->used_ch_count= s->in.ch_count;
+
+    if(s->used_ch_count && s-> in_ch_layout && s->used_ch_count != av_get_channel_layout_nb_channels(s-> in_ch_layout)){
+        av_log(s, AV_LOG_WARNING, "Input channel layout has a different number of channels than the number of used channels, ignoring layout\n");
+        s-> in_ch_layout= 0;
+    }
+
+    if(!s-> in_ch_layout)
+        s-> in_ch_layout= av_get_default_channel_layout(s->used_ch_count);
+    if(!s->out_ch_layout)
+        s->out_ch_layout= av_get_default_channel_layout(s->out.ch_count);
+
+    s->rematrix= s->out_ch_layout  !=s->in_ch_layout || s->rematrix_volume!=1.0 ||
+                 s->rematrix_custom;
+
+    if(s->int_sample_fmt == AV_SAMPLE_FMT_NONE){
+        if(   av_get_bytes_per_sample(s-> in_sample_fmt) <= 2
+           && av_get_bytes_per_sample(s->out_sample_fmt) <= 2){
+            s->int_sample_fmt= AV_SAMPLE_FMT_S16P;
+        }else if(   av_get_bytes_per_sample(s-> in_sample_fmt) <= 2
+           && !s->rematrix
+           && s->out_sample_rate==s->in_sample_rate
+           && !(s->flags & SWR_FLAG_RESAMPLE)){
+            s->int_sample_fmt= AV_SAMPLE_FMT_S16P;
+        }else if(   av_get_planar_sample_fmt(s-> in_sample_fmt) == AV_SAMPLE_FMT_S32P
+                 && av_get_planar_sample_fmt(s->out_sample_fmt) == AV_SAMPLE_FMT_S32P
+                 && !s->rematrix
+                 && s->engine != SWR_ENGINE_SOXR){
+            s->int_sample_fmt= AV_SAMPLE_FMT_S32P;
+        }else if(av_get_bytes_per_sample(s->in_sample_fmt) <= 4){
+            s->int_sample_fmt= AV_SAMPLE_FMT_FLTP;
+        }else{
+            s->int_sample_fmt= AV_SAMPLE_FMT_DBLP;
+        }
+    }
+    av_log(s, AV_LOG_DEBUG, "Using %s internally between filters\n", av_get_sample_fmt_name(s->int_sample_fmt));
+
+    if(   s->int_sample_fmt != AV_SAMPLE_FMT_S16P
+        &&s->int_sample_fmt != AV_SAMPLE_FMT_S32P
+        &&s->int_sample_fmt != AV_SAMPLE_FMT_FLTP
+        &&s->int_sample_fmt != AV_SAMPLE_FMT_DBLP){
+        av_log(s, AV_LOG_ERROR, "Requested sample format %s is not supported internally, S16/S32/FLT/DBL is supported\n", av_get_sample_fmt_name(s->int_sample_fmt));
+        return AVERROR(EINVAL);
+    }
+
+    set_audiodata_fmt(&s-> in, s-> in_sample_fmt);
+    set_audiodata_fmt(&s->out, s->out_sample_fmt);
+
+    if (s->firstpts_in_samples != AV_NOPTS_VALUE) {
+        if (!s->async && s->min_compensation >= FLT_MAX/2)
+            s->async = 1;
+        s->firstpts =
+        s->outpts   = s->firstpts_in_samples * s->out_sample_rate;
+    } else
+        s->firstpts = AV_NOPTS_VALUE;
+
+    if (s->async) {
+        if (s->min_compensation >= FLT_MAX/2)
+            s->min_compensation = 0.001;
+        if (s->async > 1.0001) {
+            s->max_soft_compensation = s->async / (double) s->in_sample_rate;
+        }
+    }
+
+    if (s->out_sample_rate!=s->in_sample_rate || (s->flags & SWR_FLAG_RESAMPLE)){
+        s->resample = s->resampler->init(s->resample, s->out_sample_rate, s->in_sample_rate, s->filter_size, s->phase_shift, s->linear_interp, s->cutoff, s->int_sample_fmt, s->filter_type, s->kaiser_beta, s->precision, s->cheby, s->exact_rational);
+        if (!s->resample) {
+            av_log(s, AV_LOG_ERROR, "Failed to initialize resampler\n");
+            return AVERROR(ENOMEM);
+        }
+    }else
+        s->resampler->free(&s->resample);
+    if(    s->int_sample_fmt != AV_SAMPLE_FMT_S16P
+        && s->int_sample_fmt != AV_SAMPLE_FMT_S32P
+        && s->int_sample_fmt != AV_SAMPLE_FMT_FLTP
+        && s->int_sample_fmt != AV_SAMPLE_FMT_DBLP
+        && s->resample){
+        av_log(s, AV_LOG_ERROR, "Resampling only supported with internal s16/s32/flt/dbl\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+#define RSC 1 //FIXME finetune
+    if(!s-> in.ch_count)
+        s-> in.ch_count= av_get_channel_layout_nb_channels(s-> in_ch_layout);
+    if(!s->used_ch_count)
+        s->used_ch_count= s->in.ch_count;
+    if(!s->out.ch_count)
+        s->out.ch_count= av_get_channel_layout_nb_channels(s->out_ch_layout);
+
+    if(!s-> in.ch_count){
+        av_assert0(!s->in_ch_layout);
+        av_log(s, AV_LOG_ERROR, "Input channel count and layout are unset\n");
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    av_get_channel_layout_string(l1, sizeof(l1), s-> in.ch_count, s-> in_ch_layout);
+    av_get_channel_layout_string(l2, sizeof(l2), s->out.ch_count, s->out_ch_layout);
+    if (s->out_ch_layout && s->out.ch_count != av_get_channel_layout_nb_channels(s->out_ch_layout)) {
+        av_log(s, AV_LOG_ERROR, "Output channel layout %s mismatches specified channel count %d\n", l2, s->out.ch_count);
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+    if (s->in_ch_layout && s->used_ch_count != av_get_channel_layout_nb_channels(s->in_ch_layout)) {
+        av_log(s, AV_LOG_ERROR, "Input channel layout %s mismatches specified channel count %d\n", l1, s->used_ch_count);
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+    if ((!s->out_ch_layout || !s->in_ch_layout) && s->used_ch_count != s->out.ch_count && !s->rematrix_custom) {
+        av_log(s, AV_LOG_ERROR, "Rematrix is needed between %s and %s "
+               "but there is not enough information to do it\n", l1, l2);
+        ret = AVERROR(EINVAL);
+        goto fail;
+    }
+
+av_assert0(s->used_ch_count);
+av_assert0(s->out.ch_count);
+    s->resample_first= RSC*s->out.ch_count/s->in.ch_count - RSC < s->out_sample_rate/(float)s-> in_sample_rate - 1.0;
+
+    s->in_buffer= s->in;
+    s->silence  = s->in;
+    s->drop_temp= s->out;
+
+    if(!s->resample && !s->rematrix && !s->channel_map && !s->dither.method){
+        s->full_convert = swri_audio_convert_alloc(s->out_sample_fmt,
+                                                   s-> in_sample_fmt, s-> in.ch_count, NULL, 0);
+        return 0;
+    }
+
+    s->in_convert = swri_audio_convert_alloc(s->int_sample_fmt,
+                                             s-> in_sample_fmt, s->used_ch_count, s->channel_map, 0);
+    s->out_convert= swri_audio_convert_alloc(s->out_sample_fmt,
+                                             s->int_sample_fmt, s->out.ch_count, NULL, 0);
+
+    if (!s->in_convert || !s->out_convert) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    s->postin= s->in;
+    s->preout= s->out;
+    s->midbuf= s->in;
+
+    if(s->channel_map){
+        s->postin.ch_count=
+        s->midbuf.ch_count= s->used_ch_count;
+        if(s->resample)
+            s->in_buffer.ch_count= s->used_ch_count;
+    }
+    if(!s->resample_first){
+        s->midbuf.ch_count= s->out.ch_count;
+        if(s->resample)
+            s->in_buffer.ch_count = s->out.ch_count;
+    }
+
+    set_audiodata_fmt(&s->postin, s->int_sample_fmt);
+    set_audiodata_fmt(&s->midbuf, s->int_sample_fmt);
+    set_audiodata_fmt(&s->preout, s->int_sample_fmt);
+
+    if(s->resample){
+        set_audiodata_fmt(&s->in_buffer, s->int_sample_fmt);
+    }
+
+    if ((ret = swri_dither_init(s, s->out_sample_fmt, s->int_sample_fmt)) < 0)
+        goto fail;
+
+    if(s->rematrix || s->dither.method) {
+        ret = swri_rematrix_init(s);
+        if (ret < 0)
+            goto fail;
+    }
+
+    return 0;
+fail:
+    swr_close(s);
+    return ret;
+
+}
+
+int swri_realloc_audio(AudioData *a, int count){
+    int i, countb;
+    AudioData old;
+
+    if(count < 0 || count > INT_MAX/2/a->bps/a->ch_count)
+        return AVERROR(EINVAL);
+
+    if(a->count >= count)
+        return 0;
+
+    count*=2;
+
+    countb= FFALIGN(count*a->bps, ALIGN);
+    old= *a;
+
+    av_assert0(a->bps);
+    av_assert0(a->ch_count);
+
+    a->data= av_mallocz_array(countb, a->ch_count);
+    if(!a->data)
+        return AVERROR(ENOMEM);
+    for(i=0; i<a->ch_count; i++){
+        a->ch[i]= a->data + i*(a->planar ? countb : a->bps);
+        if(a->planar) memcpy(a->ch[i], old.ch[i], a->count*a->bps);
+    }
+    if(!a->planar) memcpy(a->ch[0], old.ch[0], a->count*a->ch_count*a->bps);
+    av_freep(&old.data);
+    a->count= count;
+
+    return 1;
+}
+
+static void copy(AudioData *out, AudioData *in,
+                 int count){
+    av_assert0(out->planar == in->planar);
+    av_assert0(out->bps == in->bps);
+    av_assert0(out->ch_count == in->ch_count);
+    if(out->planar){
+        int ch;
+        for(ch=0; ch<out->ch_count; ch++)
+            memcpy(out->ch[ch], in->ch[ch], count*out->bps);
+    }else
+        memcpy(out->ch[0], in->ch[0], count*out->ch_count*out->bps);
+}
+
+static void fill_audiodata(AudioData *out, uint8_t *in_arg [SWR_CH_MAX]){
+    int i;
+    if(!in_arg){
+        memset(out->ch, 0, sizeof(out->ch));
+    }else if(out->planar){
+        for(i=0; i<out->ch_count; i++)
+            out->ch[i]= in_arg[i];
+    }else{
+        for(i=0; i<out->ch_count; i++)
+            out->ch[i]= in_arg[0] + i*out->bps;
+    }
+}
+
+static void reversefill_audiodata(AudioData *out, uint8_t *in_arg [SWR_CH_MAX]){
+    int i;
+    if(out->planar){
+        for(i=0; i<out->ch_count; i++)
+            in_arg[i]= out->ch[i];
+    }else{
+        in_arg[0]= out->ch[0];
+    }
+}
+
+/**
+ *
+ * out may be equal in.
+ */
+static void buf_set(AudioData *out, AudioData *in, int count){
+    int ch;
+    if(in->planar){
+        for(ch=0; ch<out->ch_count; ch++)
+            out->ch[ch]= in->ch[ch] + count*out->bps;
+    }else{
+        for(ch=out->ch_count-1; ch>=0; ch--)
+            out->ch[ch]= in->ch[0] + (ch + count*out->ch_count) * out->bps;
+    }
+}
+
+/**
+ *
+ * @return number of samples output per channel
+ */
+static int resample(SwrContext *s, AudioData *out_param, int out_count,
+                             const AudioData * in_param, int in_count){
+    AudioData in, out, tmp;
+    int ret_sum=0;
+    int border=0;
+    int padless = ARCH_X86 && s->engine == SWR_ENGINE_SWR ? 7 : 0;
+
+    av_assert1(s->in_buffer.ch_count == in_param->ch_count);
+    av_assert1(s->in_buffer.planar   == in_param->planar);
+    av_assert1(s->in_buffer.fmt      == in_param->fmt);
+
+    tmp=out=*out_param;
+    in =  *in_param;
+
+    border = s->resampler->invert_initial_buffer(s->resample, &s->in_buffer,
+                 &in, in_count, &s->in_buffer_index, &s->in_buffer_count);
+    if (border == INT_MAX) {
+        return 0;
+    } else if (border < 0) {
+        return border;
+    } else if (border) {
+        buf_set(&in, &in, border);
+        in_count -= border;
+        s->resample_in_constraint = 0;
+    }
+
+    do{
+        int ret, size, consumed;
+        if(!s->resample_in_constraint && s->in_buffer_count){
+            buf_set(&tmp, &s->in_buffer, s->in_buffer_index);
+            ret= s->resampler->multiple_resample(s->resample, &out, out_count, &tmp, s->in_buffer_count, &consumed);
+            out_count -= ret;
+            ret_sum += ret;
+            buf_set(&out, &out, ret);
+            s->in_buffer_count -= consumed;
+            s->in_buffer_index += consumed;
+
+            if(!in_count)
+                break;
+            if(s->in_buffer_count <= border){
+                buf_set(&in, &in, -s->in_buffer_count);
+                in_count += s->in_buffer_count;
+                s->in_buffer_count=0;
+                s->in_buffer_index=0;
+                border = 0;
+            }
+        }
+
+        if((s->flushed || in_count > padless) && !s->in_buffer_count){
+            s->in_buffer_index=0;
+            ret= s->resampler->multiple_resample(s->resample, &out, out_count, &in, FFMAX(in_count-padless, 0), &consumed);
+            out_count -= ret;
+            ret_sum += ret;
+            buf_set(&out, &out, ret);
+            in_count -= consumed;
+            buf_set(&in, &in, consumed);
+        }
+
+        //TODO is this check sane considering the advanced copy avoidance below
+        size= s->in_buffer_index + s->in_buffer_count + in_count;
+        if(   size > s->in_buffer.count
+           && s->in_buffer_count + in_count <= s->in_buffer_index){
+            buf_set(&tmp, &s->in_buffer, s->in_buffer_index);
+            copy(&s->in_buffer, &tmp, s->in_buffer_count);
+            s->in_buffer_index=0;
+        }else
+            if((ret=swri_realloc_audio(&s->in_buffer, size)) < 0)
+                return ret;
+
+        if(in_count){
+            int count= in_count;
+            if(s->in_buffer_count && s->in_buffer_count+2 < count && out_count) count= s->in_buffer_count+2;
+
+            buf_set(&tmp, &s->in_buffer, s->in_buffer_index + s->in_buffer_count);
+            copy(&tmp, &in, /*in_*/count);
+            s->in_buffer_count += count;
+            in_count -= count;
+            border += count;
+            buf_set(&in, &in, count);
+            s->resample_in_constraint= 0;
+            if(s->in_buffer_count != count || in_count)
+                continue;
+            if (padless) {
+                padless = 0;
+                continue;
+            }
+        }
+        break;
+    }while(1);
+
+    s->resample_in_constraint= !!out_count;
+
+    return ret_sum;
+}
+
+static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_count,
+                                                      AudioData *in , int  in_count){
+    AudioData *postin, *midbuf, *preout;
+    int ret/*, in_max*/;
+    AudioData preout_tmp, midbuf_tmp;
+
+    if(s->full_convert){
+        av_assert0(!s->resample);
+        swri_audio_convert(s->full_convert, out, in, in_count);
+        return out_count;
+    }
+
+//     in_max= out_count*(int64_t)s->in_sample_rate / s->out_sample_rate + resample_filter_taps;
+//     in_count= FFMIN(in_count, in_in + 2 - s->hist_buffer_count);
+
+    if((ret=swri_realloc_audio(&s->postin, in_count))<0)
+        return ret;
+    if(s->resample_first){
+        av_assert0(s->midbuf.ch_count == s->used_ch_count);
+        if((ret=swri_realloc_audio(&s->midbuf, out_count))<0)
+            return ret;
+    }else{
+        av_assert0(s->midbuf.ch_count ==  s->out.ch_count);
+        if((ret=swri_realloc_audio(&s->midbuf,  in_count))<0)
+            return ret;
+    }
+    if((ret=swri_realloc_audio(&s->preout, out_count))<0)
+        return ret;
+
+    postin= &s->postin;
+
+    midbuf_tmp= s->midbuf;
+    midbuf= &midbuf_tmp;
+    preout_tmp= s->preout;
+    preout= &preout_tmp;
+
+    if(s->int_sample_fmt == s-> in_sample_fmt && s->in.planar && !s->channel_map)
+        postin= in;
+
+    if(s->resample_first ? !s->resample : !s->rematrix)
+        midbuf= postin;
+
+    if(s->resample_first ? !s->rematrix : !s->resample)
+        preout= midbuf;
+
+    if(s->int_sample_fmt == s->out_sample_fmt && s->out.planar
+       && !(s->out_sample_fmt==AV_SAMPLE_FMT_S32P && (s->dither.output_sample_bits&31))){
+        if(preout==in){
+            out_count= FFMIN(out_count, in_count); //TODO check at the end if this is needed or redundant
+            av_assert0(s->in.planar); //we only support planar internally so it has to be, we support copying non planar though
+            copy(out, in, out_count);
+            return out_count;
+        }
+        else if(preout==postin) preout= midbuf= postin= out;
+        else if(preout==midbuf) preout= midbuf= out;
+        else                    preout= out;
+    }
+
+    if(in != postin){
+        swri_audio_convert(s->in_convert, postin, in, in_count);
+    }
+
+    if(s->resample_first){
+        if(postin != midbuf)
+            out_count= resample(s, midbuf, out_count, postin, in_count);
+        if(midbuf != preout)
+            swri_rematrix(s, preout, midbuf, out_count, preout==out);
+    }else{
+        if(postin != midbuf)
+            swri_rematrix(s, midbuf, postin, in_count, midbuf==out);
+        if(midbuf != preout)
+            out_count= resample(s, preout, out_count, midbuf, in_count);
+    }
+
+    if(preout != out && out_count){
+        AudioData *conv_src = preout;
+        if(s->dither.method){
+            int ch;
+            int dither_count= FFMAX(out_count, 1<<16);
+
+            if (preout == in) {
+                conv_src = &s->dither.temp;
+                if((ret=swri_realloc_audio(&s->dither.temp, dither_count))<0)
+                    return ret;
+            }
+
+            if((ret=swri_realloc_audio(&s->dither.noise, dither_count))<0)
+                return ret;
+            if(ret)
+                for(ch=0; ch<s->dither.noise.ch_count; ch++)
+                    if((ret=swri_get_dither(s, s->dither.noise.ch[ch], s->dither.noise.count, (12345678913579ULL*ch + 3141592) % 2718281828U, s->dither.noise.fmt))<0)
+                        return ret;
+            av_assert0(s->dither.noise.ch_count == preout->ch_count);
+
+            if(s->dither.noise_pos + out_count > s->dither.noise.count)
+                s->dither.noise_pos = 0;
+
+            if (s->dither.method < SWR_DITHER_NS){
+                if (s->mix_2_1_simd) {
+                    int len1= out_count&~15;
+                    int off = len1 * preout->bps;
+
+                    if(len1)
+                        for(ch=0; ch<preout->ch_count; ch++)
+                            s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_simd_one, 0, 0, len1);
+                    if(out_count != len1)
+                        for(ch=0; ch<preout->ch_count; ch++)
+                            s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off + len1, s->native_one, 0, 0, out_count - len1);
+                } else {
+                    for(ch=0; ch<preout->ch_count; ch++)
+                        s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count);
+                }
+            } else {
+                switch(s->int_sample_fmt) {
+                case AV_SAMPLE_FMT_S16P :swri_noise_shaping_int16(s, conv_src, preout, &s->dither.noise, out_count); break;
+                case AV_SAMPLE_FMT_S32P :swri_noise_shaping_int32(s, conv_src, preout, &s->dither.noise, out_count); break;
+                case AV_SAMPLE_FMT_FLTP :swri_noise_shaping_float(s, conv_src, preout, &s->dither.noise, out_count); break;
+                case AV_SAMPLE_FMT_DBLP :swri_noise_shaping_double(s,conv_src, preout, &s->dither.noise, out_count); break;
+                }
+            }
+            s->dither.noise_pos += out_count;
+        }
+//FIXME packed doesn't need more than 1 chan here!
+        swri_audio_convert(s->out_convert, out, conv_src, out_count);
+    }
+    return out_count;
+}
+
+int swr_is_initialized(struct SwrContext *s) {
+    return !!s->in_buffer.ch_count;
+}
+
+int attribute_align_arg swr_convert(struct SwrContext *s, uint8_t *out_arg[SWR_CH_MAX], int out_count,
+                                                    const uint8_t *in_arg [SWR_CH_MAX], int  in_count){
+    AudioData * in= &s->in;
+    AudioData *out= &s->out;
+    int av_unused max_output;
+
+    if (!swr_is_initialized(s)) {
+        av_log(s, AV_LOG_ERROR, "Context has not been initialized\n");
+        return AVERROR(EINVAL);
+    }
+#if defined(ASSERT_LEVEL) && ASSERT_LEVEL >1
+    max_output = swr_get_out_samples(s, in_count);
+#endif
+
+    while(s->drop_output > 0){
+        int ret;
+        uint8_t *tmp_arg[SWR_CH_MAX];
+#define MAX_DROP_STEP 16384
+        if((ret=swri_realloc_audio(&s->drop_temp, FFMIN(s->drop_output, MAX_DROP_STEP)))<0)
+            return ret;
+
+        reversefill_audiodata(&s->drop_temp, tmp_arg);
+        s->drop_output *= -1; //FIXME find a less hackish solution
+        ret = swr_convert(s, tmp_arg, FFMIN(-s->drop_output, MAX_DROP_STEP), in_arg, in_count); //FIXME optimize but this is as good as never called so maybe it doesn't matter
+        s->drop_output *= -1;
+        in_count = 0;
+        if(ret>0) {
+            s->drop_output -= ret;
+            if (!s->drop_output && !out_arg)
+                return 0;
+            continue;
+        }
+
+        av_assert0(s->drop_output);
+        return 0;
+    }
+
+    if(!in_arg){
+        if(s->resample){
+            if (!s->flushed)
+                s->resampler->flush(s);
+            s->resample_in_constraint = 0;
+            s->flushed = 1;
+        }else if(!s->in_buffer_count){
+            return 0;
+        }
+    }else
+        fill_audiodata(in ,  (void*)in_arg);
+
+    fill_audiodata(out, out_arg);
+
+    if(s->resample){
+        int ret = swr_convert_internal(s, out, out_count, in, in_count);
+        if(ret>0 && !s->drop_output)
+            s->outpts += ret * (int64_t)s->in_sample_rate;
+
+        av_assert2(max_output < 0 || ret < 0 || ret <= max_output);
+
+        return ret;
+    }else{
+        AudioData tmp= *in;
+        int ret2=0;
+        int ret, size;
+        size = FFMIN(out_count, s->in_buffer_count);
+        if(size){
+            buf_set(&tmp, &s->in_buffer, s->in_buffer_index);
+            ret= swr_convert_internal(s, out, size, &tmp, size);
+            if(ret<0)
+                return ret;
+            ret2= ret;
+            s->in_buffer_count -= ret;
+            s->in_buffer_index += ret;
+            buf_set(out, out, ret);
+            out_count -= ret;
+            if(!s->in_buffer_count)
+                s->in_buffer_index = 0;
+        }
+
+        if(in_count){
+            size= s->in_buffer_index + s->in_buffer_count + in_count - out_count;
+
+            if(in_count > out_count) { //FIXME move after swr_convert_internal
+                if(   size > s->in_buffer.count
+                && s->in_buffer_count + in_count - out_count <= s->in_buffer_index){
+                    buf_set(&tmp, &s->in_buffer, s->in_buffer_index);
+                    copy(&s->in_buffer, &tmp, s->in_buffer_count);
+                    s->in_buffer_index=0;
+                }else
+                    if((ret=swri_realloc_audio(&s->in_buffer, size)) < 0)
+                        return ret;
+            }
+
+            if(out_count){
+                size = FFMIN(in_count, out_count);
+                ret= swr_convert_internal(s, out, size, in, size);
+                if(ret<0)
+                    return ret;
+                buf_set(in, in, ret);
+                in_count -= ret;
+                ret2 += ret;
+            }
+            if(in_count){
+                buf_set(&tmp, &s->in_buffer, s->in_buffer_index + s->in_buffer_count);
+                copy(&tmp, in, in_count);
+                s->in_buffer_count += in_count;
+            }
+        }
+        if(ret2>0 && !s->drop_output)
+            s->outpts += ret2 * (int64_t)s->in_sample_rate;
+        av_assert2(max_output < 0 || ret2 < 0 || ret2 <= max_output);
+        return ret2;
+    }
+}
+
+int swr_drop_output(struct SwrContext *s, int count){
+    const uint8_t *tmp_arg[SWR_CH_MAX];
+    s->drop_output += count;
+
+    if(s->drop_output <= 0)
+        return 0;
+
+    av_log(s, AV_LOG_VERBOSE, "discarding %d audio samples\n", count);
+    return swr_convert(s, NULL, s->drop_output, tmp_arg, 0);
+}
+
+int swr_inject_silence(struct SwrContext *s, int count){
+    int ret, i;
+    uint8_t *tmp_arg[SWR_CH_MAX];
+
+    if(count <= 0)
+        return 0;
+
+#define MAX_SILENCE_STEP 16384
+    while (count > MAX_SILENCE_STEP) {
+        if ((ret = swr_inject_silence(s, MAX_SILENCE_STEP)) < 0)
+            return ret;
+        count -= MAX_SILENCE_STEP;
+    }
+
+    if((ret=swri_realloc_audio(&s->silence, count))<0)
+        return ret;
+
+    if(s->silence.planar) for(i=0; i<s->silence.ch_count; i++) {
+        memset(s->silence.ch[i], s->silence.bps==1 ? 0x80 : 0, count*s->silence.bps);
+    } else
+        memset(s->silence.ch[0], s->silence.bps==1 ? 0x80 : 0, count*s->silence.bps*s->silence.ch_count);
+
+    reversefill_audiodata(&s->silence, tmp_arg);
+    av_log(s, AV_LOG_VERBOSE, "adding %d audio samples of silence\n", count);
+    ret = swr_convert(s, NULL, 0, (const uint8_t**)tmp_arg, count);
+    return ret;
+}
+
+int64_t swr_get_delay(struct SwrContext *s, int64_t base){
+    if (s->resampler && s->resample){
+        return s->resampler->get_delay(s, base);
+    }else{
+        return (s->in_buffer_count*base + (s->in_sample_rate>>1))/ s->in_sample_rate;
+    }
+}
+
+int swr_get_out_samples(struct SwrContext *s, int in_samples)
+{
+    int64_t out_samples;
+
+    if (in_samples < 0)
+        return AVERROR(EINVAL);
+
+    if (s->resampler && s->resample) {
+        if (!s->resampler->get_out_samples)
+            return AVERROR(ENOSYS);
+        out_samples = s->resampler->get_out_samples(s, in_samples);
+    } else {
+        out_samples = s->in_buffer_count + in_samples;
+        av_assert0(s->out_sample_rate == s->in_sample_rate);
+    }
+
+    if (out_samples > INT_MAX)
+        return AVERROR(EINVAL);
+
+    return out_samples;
+}
+
+int swr_set_compensation(struct SwrContext *s, int sample_delta, int compensation_distance){
+    int ret;
+
+    if (!s || compensation_distance < 0)
+        return AVERROR(EINVAL);
+    if (!compensation_distance && sample_delta)
+        return AVERROR(EINVAL);
+    if (!s->resample) {
+        s->flags |= SWR_FLAG_RESAMPLE;
+        ret = swr_init(s);
+        if (ret < 0)
+            return ret;
+    }
+    if (!s->resampler->set_compensation){
+        return AVERROR(EINVAL);
+    }else{
+        return s->resampler->set_compensation(s->resample, sample_delta, compensation_distance);
+    }
+}
+
+int64_t swr_next_pts(struct SwrContext *s, int64_t pts){
+    if(pts == INT64_MIN)
+        return s->outpts;
+
+    if (s->firstpts == AV_NOPTS_VALUE)
+        s->outpts = s->firstpts = pts;
+
+    if(s->min_compensation >= FLT_MAX) {
+        return (s->outpts = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate));
+    } else {
+        int64_t delta = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate) - s->outpts + s->drop_output*(int64_t)s->in_sample_rate;
+        double fdelta = delta /(double)(s->in_sample_rate * (int64_t)s->out_sample_rate);
+
+        if(fabs(fdelta) > s->min_compensation) {
+            if(s->outpts == s->firstpts || fabs(fdelta) > s->min_hard_compensation){
+                int ret;
+                if(delta > 0) ret = swr_inject_silence(s,  delta / s->out_sample_rate);
+                else          ret = swr_drop_output   (s, -delta / s-> in_sample_rate);
+                if(ret<0){
+                    av_log(s, AV_LOG_ERROR, "Failed to compensate for timestamp delta of %f\n", fdelta);
+                }
+            } else if(s->soft_compensation_duration && s->max_soft_compensation) {
+                int duration = s->out_sample_rate * s->soft_compensation_duration;
+                double max_soft_compensation = s->max_soft_compensation / (s->max_soft_compensation < 0 ? -s->in_sample_rate : 1);
+                int comp = av_clipf(fdelta, -max_soft_compensation, max_soft_compensation) * duration ;
+                av_log(s, AV_LOG_VERBOSE, "compensating audio timestamp drift:%f compensation:%d in:%d\n", fdelta, comp, duration);
+                swr_set_compensation(s, comp, duration);
+            }
+        }
+
+        return s->outpts;
+    }
+}
diff --git a/libswresample/swresample.h b/libswresample/swresample.h
new file mode 100644
index 0000000000..10eaebc439
--- /dev/null
+++ b/libswresample/swresample.h
@@ -0,0 +1,553 @@
+/*
+ * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWRESAMPLE_SWRESAMPLE_H
+#define SWRESAMPLE_SWRESAMPLE_H
+
+/**
+ * @file
+ * @ingroup lswr
+ * libswresample public header
+ */
+
+/**
+ * @defgroup lswr Libswresample
+ * @{
+ *
+ * Libswresample (lswr) is a library that handles audio resampling, sample
+ * format conversion and mixing.
+ *
+ * Interaction with lswr is done through SwrContext, which is
+ * allocated with swr_alloc() or swr_alloc_set_opts(). It is opaque, so all parameters
+ * must be set with the @ref avoptions API.
+ *
+ * The first thing you will need to do in order to use lswr is to allocate
+ * SwrContext. This can be done with swr_alloc() or swr_alloc_set_opts(). If you
+ * are using the former, you must set options through the @ref avoptions API.
+ * The latter function provides the same feature, but it allows you to set some
+ * common options in the same statement.
+ *
+ * For example the following code will setup conversion from planar float sample
+ * format to interleaved signed 16-bit integer, downsampling from 48kHz to
+ * 44.1kHz and downmixing from 5.1 channels to stereo (using the default mixing
+ * matrix). This is using the swr_alloc() function.
+ * @code
+ * SwrContext *swr = swr_alloc();
+ * av_opt_set_channel_layout(swr, "in_channel_layout",  AV_CH_LAYOUT_5POINT1, 0);
+ * av_opt_set_channel_layout(swr, "out_channel_layout", AV_CH_LAYOUT_STEREO,  0);
+ * av_opt_set_int(swr, "in_sample_rate",     48000,                0);
+ * av_opt_set_int(swr, "out_sample_rate",    44100,                0);
+ * av_opt_set_sample_fmt(swr, "in_sample_fmt",  AV_SAMPLE_FMT_FLTP, 0);
+ * av_opt_set_sample_fmt(swr, "out_sample_fmt", AV_SAMPLE_FMT_S16,  0);
+ * @endcode
+ *
+ * The same job can be done using swr_alloc_set_opts() as well:
+ * @code
+ * SwrContext *swr = swr_alloc_set_opts(NULL,  // we're allocating a new context
+ *                       AV_CH_LAYOUT_STEREO,  // out_ch_layout
+ *                       AV_SAMPLE_FMT_S16,    // out_sample_fmt
+ *                       44100,                // out_sample_rate
+ *                       AV_CH_LAYOUT_5POINT1, // in_ch_layout
+ *                       AV_SAMPLE_FMT_FLTP,   // in_sample_fmt
+ *                       48000,                // in_sample_rate
+ *                       0,                    // log_offset
+ *                       NULL);                // log_ctx
+ * @endcode
+ *
+ * Once all values have been set, it must be initialized with swr_init(). If
+ * you need to change the conversion parameters, you can change the parameters
+ * using @ref AVOptions, as described above in the first example; or by using
+ * swr_alloc_set_opts(), but with the first argument the allocated context.
+ * You must then call swr_init() again.
+ *
+ * The conversion itself is done by repeatedly calling swr_convert().
+ * Note that the samples may get buffered in swr if you provide insufficient
+ * output space or if sample rate conversion is done, which requires "future"
+ * samples. Samples that do not require future input can be retrieved at any
+ * time by using swr_convert() (in_count can be set to 0).
+ * At the end of conversion the resampling buffer can be flushed by calling
+ * swr_convert() with NULL in and 0 in_count.
+ *
+ * The samples used in the conversion process can be managed with the libavutil
+ * @ref lavu_sampmanip "samples manipulation" API, including av_samples_alloc()
+ * function used in the following example.
+ *
+ * The delay between input and output, can at any time be found by using
+ * swr_get_delay().
+ *
+ * The following code demonstrates the conversion loop assuming the parameters
+ * from above and caller-defined functions get_input() and handle_output():
+ * @code
+ * uint8_t **input;
+ * int in_samples;
+ *
+ * while (get_input(&input, &in_samples)) {
+ *     uint8_t *output;
+ *     int out_samples = av_rescale_rnd(swr_get_delay(swr, 48000) +
+ *                                      in_samples, 44100, 48000, AV_ROUND_UP);
+ *     av_samples_alloc(&output, NULL, 2, out_samples,
+ *                      AV_SAMPLE_FMT_S16, 0);
+ *     out_samples = swr_convert(swr, &output, out_samples,
+ *                                      input, in_samples);
+ *     handle_output(output, out_samples);
+ *     av_freep(&output);
+ * }
+ * @endcode
+ *
+ * When the conversion is finished, the conversion
+ * context and everything associated with it must be freed with swr_free().
+ * A swr_close() function is also available, but it exists mainly for
+ * compatibility with libavresample, and is not required to be called.
+ *
+ * There will be no memory leak if the data is not completely flushed before
+ * swr_free().
+ */
+
+#include <stdint.h>
+#include "libavutil/frame.h"
+#include "libavutil/samplefmt.h"
+
+#include "libswresample/version.h"
+
+#if LIBSWRESAMPLE_VERSION_MAJOR < 1
+#define SWR_CH_MAX 32   ///< Maximum number of channels
+#endif
+
+/**
+ * @name Option constants
+ * These constants are used for the @ref avoptions interface for lswr.
+ * @{
+ *
+ */
+
+#define SWR_FLAG_RESAMPLE 1 ///< Force resampling even if equal sample rate
+//TODO use int resample ?
+//long term TODO can we enable this dynamically?
+
+/** Dithering algorithms */
+enum SwrDitherType {
+    SWR_DITHER_NONE = 0,
+    SWR_DITHER_RECTANGULAR,
+    SWR_DITHER_TRIANGULAR,
+    SWR_DITHER_TRIANGULAR_HIGHPASS,
+
+    SWR_DITHER_NS = 64,         ///< not part of API/ABI
+    SWR_DITHER_NS_LIPSHITZ,
+    SWR_DITHER_NS_F_WEIGHTED,
+    SWR_DITHER_NS_MODIFIED_E_WEIGHTED,
+    SWR_DITHER_NS_IMPROVED_E_WEIGHTED,
+    SWR_DITHER_NS_SHIBATA,
+    SWR_DITHER_NS_LOW_SHIBATA,
+    SWR_DITHER_NS_HIGH_SHIBATA,
+    SWR_DITHER_NB,              ///< not part of API/ABI
+};
+
+/** Resampling Engines */
+enum SwrEngine {
+    SWR_ENGINE_SWR,             /**< SW Resampler */
+    SWR_ENGINE_SOXR,            /**< SoX Resampler */
+    SWR_ENGINE_NB,              ///< not part of API/ABI
+};
+
+/** Resampling Filter Types */
+enum SwrFilterType {
+    SWR_FILTER_TYPE_CUBIC,              /**< Cubic */
+    SWR_FILTER_TYPE_BLACKMAN_NUTTALL,   /**< Blackman Nuttall windowed sinc */
+    SWR_FILTER_TYPE_KAISER,             /**< Kaiser windowed sinc */
+};
+
+/**
+ * @}
+ */
+
+/**
+ * The libswresample context. Unlike libavcodec and libavformat, this structure
+ * is opaque. This means that if you would like to set options, you must use
+ * the @ref avoptions API and cannot directly set values to members of the
+ * structure.
+ */
+typedef struct SwrContext SwrContext;
+
+/**
+ * Get the AVClass for SwrContext. It can be used in combination with
+ * AV_OPT_SEARCH_FAKE_OBJ for examining options.
+ *
+ * @see av_opt_find().
+ * @return the AVClass of SwrContext
+ */
+const AVClass *swr_get_class(void);
+
+/**
+ * @name SwrContext constructor functions
+ * @{
+ */
+
+/**
+ * Allocate SwrContext.
+ *
+ * If you use this function you will need to set the parameters (manually or
+ * with swr_alloc_set_opts()) before calling swr_init().
+ *
+ * @see swr_alloc_set_opts(), swr_init(), swr_free()
+ * @return NULL on error, allocated context otherwise
+ */
+struct SwrContext *swr_alloc(void);
+
+/**
+ * Initialize context after user parameters have been set.
+ * @note The context must be configured using the AVOption API.
+ *
+ * @see av_opt_set_int()
+ * @see av_opt_set_dict()
+ *
+ * @param[in,out]   s Swr context to initialize
+ * @return AVERROR error code in case of failure.
+ */
+int swr_init(struct SwrContext *s);
+
+/**
+ * Check whether an swr context has been initialized or not.
+ *
+ * @param[in]       s Swr context to check
+ * @see swr_init()
+ * @return positive if it has been initialized, 0 if not initialized
+ */
+int swr_is_initialized(struct SwrContext *s);
+
+/**
+ * Allocate SwrContext if needed and set/reset common parameters.
+ *
+ * This function does not require s to be allocated with swr_alloc(). On the
+ * other hand, swr_alloc() can use swr_alloc_set_opts() to set the parameters
+ * on the allocated context.
+ *
+ * @param s               existing Swr context if available, or NULL if not
+ * @param out_ch_layout   output channel layout (AV_CH_LAYOUT_*)
+ * @param out_sample_fmt  output sample format (AV_SAMPLE_FMT_*).
+ * @param out_sample_rate output sample rate (frequency in Hz)
+ * @param in_ch_layout    input channel layout (AV_CH_LAYOUT_*)
+ * @param in_sample_fmt   input sample format (AV_SAMPLE_FMT_*).
+ * @param in_sample_rate  input sample rate (frequency in Hz)
+ * @param log_offset      logging level offset
+ * @param log_ctx         parent logging context, can be NULL
+ *
+ * @see swr_init(), swr_free()
+ * @return NULL on error, allocated context otherwise
+ */
+struct SwrContext *swr_alloc_set_opts(struct SwrContext *s,
+                                      int64_t out_ch_layout, enum AVSampleFormat out_sample_fmt, int out_sample_rate,
+                                      int64_t  in_ch_layout, enum AVSampleFormat  in_sample_fmt, int  in_sample_rate,
+                                      int log_offset, void *log_ctx);
+
+/**
+ * @}
+ *
+ * @name SwrContext destructor functions
+ * @{
+ */
+
+/**
+ * Free the given SwrContext and set the pointer to NULL.
+ *
+ * @param[in] s a pointer to a pointer to Swr context
+ */
+void swr_free(struct SwrContext **s);
+
+/**
+ * Closes the context so that swr_is_initialized() returns 0.
+ *
+ * The context can be brought back to life by running swr_init(),
+ * swr_init() can also be used without swr_close().
+ * This function is mainly provided for simplifying the usecase
+ * where one tries to support libavresample and libswresample.
+ *
+ * @param[in,out] s Swr context to be closed
+ */
+void swr_close(struct SwrContext *s);
+
+/**
+ * @}
+ *
+ * @name Core conversion functions
+ * @{
+ */
+
+/** Convert audio.
+ *
+ * in and in_count can be set to 0 to flush the last few samples out at the
+ * end.
+ *
+ * If more input is provided than output space, then the input will be buffered.
+ * You can avoid this buffering by using swr_get_out_samples() to retrieve an
+ * upper bound on the required number of output samples for the given number of
+ * input samples. Conversion will run directly without copying whenever possible.
+ *
+ * @param s         allocated Swr context, with parameters set
+ * @param out       output buffers, only the first one need be set in case of packed audio
+ * @param out_count amount of space available for output in samples per channel
+ * @param in        input buffers, only the first one need to be set in case of packed audio
+ * @param in_count  number of input samples available in one channel
+ *
+ * @return number of samples output per channel, negative value on error
+ */
+int swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
+                                const uint8_t **in , int in_count);
+
+/**
+ * Convert the next timestamp from input to output
+ * timestamps are in 1/(in_sample_rate * out_sample_rate) units.
+ *
+ * @note There are 2 slightly differently behaving modes.
+ *       @li When automatic timestamp compensation is not used, (min_compensation >= FLT_MAX)
+ *              in this case timestamps will be passed through with delays compensated
+ *       @li When automatic timestamp compensation is used, (min_compensation < FLT_MAX)
+ *              in this case the output timestamps will match output sample numbers.
+ *              See ffmpeg-resampler(1) for the two modes of compensation.
+ *
+ * @param s[in]     initialized Swr context
+ * @param pts[in]   timestamp for the next input sample, INT64_MIN if unknown
+ * @see swr_set_compensation(), swr_drop_output(), and swr_inject_silence() are
+ *      function used internally for timestamp compensation.
+ * @return the output timestamp for the next output sample
+ */
+int64_t swr_next_pts(struct SwrContext *s, int64_t pts);
+
+/**
+ * @}
+ *
+ * @name Low-level option setting functions
+ * These functons provide a means to set low-level options that is not possible
+ * with the AVOption API.
+ * @{
+ */
+
+/**
+ * Activate resampling compensation ("soft" compensation). This function is
+ * internally called when needed in swr_next_pts().
+ *
+ * @param[in,out] s             allocated Swr context. If it is not initialized,
+ *                              or SWR_FLAG_RESAMPLE is not set, swr_init() is
+ *                              called with the flag set.
+ * @param[in]     sample_delta  delta in PTS per sample
+ * @param[in]     compensation_distance number of samples to compensate for
+ * @return    >= 0 on success, AVERROR error codes if:
+ *            @li @c s is NULL,
+ *            @li @c compensation_distance is less than 0,
+ *            @li @c compensation_distance is 0 but sample_delta is not,
+ *            @li compensation unsupported by resampler, or
+ *            @li swr_init() fails when called.
+ */
+int swr_set_compensation(struct SwrContext *s, int sample_delta, int compensation_distance);
+
+/**
+ * Set a customized input channel mapping.
+ *
+ * @param[in,out] s           allocated Swr context, not yet initialized
+ * @param[in]     channel_map customized input channel mapping (array of channel
+ *                            indexes, -1 for a muted channel)
+ * @return >= 0 on success, or AVERROR error code in case of failure.
+ */
+int swr_set_channel_mapping(struct SwrContext *s, const int *channel_map);
+
+/**
+ * Set a customized remix matrix.
+ *
+ * @param s       allocated Swr context, not yet initialized
+ * @param matrix  remix coefficients; matrix[i + stride * o] is
+ *                the weight of input channel i in output channel o
+ * @param stride  offset between lines of the matrix
+ * @return  >= 0 on success, or AVERROR error code in case of failure.
+ */
+int swr_set_matrix(struct SwrContext *s, const double *matrix, int stride);
+
+/**
+ * @}
+ *
+ * @name Sample handling functions
+ * @{
+ */
+
+/**
+ * Drops the specified number of output samples.
+ *
+ * This function, along with swr_inject_silence(), is called by swr_next_pts()
+ * if needed for "hard" compensation.
+ *
+ * @param s     allocated Swr context
+ * @param count number of samples to be dropped
+ *
+ * @return >= 0 on success, or a negative AVERROR code on failure
+ */
+int swr_drop_output(struct SwrContext *s, int count);
+
+/**
+ * Injects the specified number of silence samples.
+ *
+ * This function, along with swr_drop_output(), is called by swr_next_pts()
+ * if needed for "hard" compensation.
+ *
+ * @param s     allocated Swr context
+ * @param count number of samples to be dropped
+ *
+ * @return >= 0 on success, or a negative AVERROR code on failure
+ */
+int swr_inject_silence(struct SwrContext *s, int count);
+
+/**
+ * Gets the delay the next input sample will experience relative to the next output sample.
+ *
+ * Swresample can buffer data if more input has been provided than available
+ * output space, also converting between sample rates needs a delay.
+ * This function returns the sum of all such delays.
+ * The exact delay is not necessarily an integer value in either input or
+ * output sample rate. Especially when downsampling by a large value, the
+ * output sample rate may be a poor choice to represent the delay, similarly
+ * for upsampling and the input sample rate.
+ *
+ * @param s     swr context
+ * @param base  timebase in which the returned delay will be:
+ *              @li if it's set to 1 the returned delay is in seconds
+ *              @li if it's set to 1000 the returned delay is in milliseconds
+ *              @li if it's set to the input sample rate then the returned
+ *                  delay is in input samples
+ *              @li if it's set to the output sample rate then the returned
+ *                  delay is in output samples
+ *              @li if it's the least common multiple of in_sample_rate and
+ *                  out_sample_rate then an exact rounding-free delay will be
+ *                  returned
+ * @returns     the delay in 1 / @c base units.
+ */
+int64_t swr_get_delay(struct SwrContext *s, int64_t base);
+
+/**
+ * Find an upper bound on the number of samples that the next swr_convert
+ * call will output, if called with in_samples of input samples. This
+ * depends on the internal state, and anything changing the internal state
+ * (like further swr_convert() calls) will may change the number of samples
+ * swr_get_out_samples() returns for the same number of input samples.
+ *
+ * @param in_samples    number of input samples.
+ * @note any call to swr_inject_silence(), swr_convert(), swr_next_pts()
+ *       or swr_set_compensation() invalidates this limit
+ * @note it is recommended to pass the correct available buffer size
+ *       to all functions like swr_convert() even if swr_get_out_samples()
+ *       indicates that less would be used.
+ * @returns an upper bound on the number of samples that the next swr_convert
+ *          will output or a negative value to indicate an error
+ */
+int swr_get_out_samples(struct SwrContext *s, int in_samples);
+
+/**
+ * @}
+ *
+ * @name Configuration accessors
+ * @{
+ */
+
+/**
+ * Return the @ref LIBSWRESAMPLE_VERSION_INT constant.
+ *
+ * This is useful to check if the build-time libswresample has the same version
+ * as the run-time one.
+ *
+ * @returns     the unsigned int-typed version
+ */
+unsigned swresample_version(void);
+
+/**
+ * Return the swr build-time configuration.
+ *
+ * @returns     the build-time @c ./configure flags
+ */
+const char *swresample_configuration(void);
+
+/**
+ * Return the swr license.
+ *
+ * @returns     the license of libswresample, determined at build-time
+ */
+const char *swresample_license(void);
+
+/**
+ * @}
+ *
+ * @name AVFrame based API
+ * @{
+ */
+
+/**
+ * Convert the samples in the input AVFrame and write them to the output AVFrame.
+ *
+ * Input and output AVFrames must have channel_layout, sample_rate and format set.
+ *
+ * If the output AVFrame does not have the data pointers allocated the nb_samples
+ * field will be set using av_frame_get_buffer()
+ * is called to allocate the frame.
+ *
+ * The output AVFrame can be NULL or have fewer allocated samples than required.
+ * In this case, any remaining samples not written to the output will be added
+ * to an internal FIFO buffer, to be returned at the next call to this function
+ * or to swr_convert().
+ *
+ * If converting sample rate, there may be data remaining in the internal
+ * resampling delay buffer. swr_get_delay() tells the number of
+ * remaining samples. To get this data as output, call this function or
+ * swr_convert() with NULL input.
+ *
+ * If the SwrContext configuration does not match the output and
+ * input AVFrame settings the conversion does not take place and depending on
+ * which AVFrame is not matching AVERROR_OUTPUT_CHANGED, AVERROR_INPUT_CHANGED
+ * or the result of a bitwise-OR of them is returned.
+ *
+ * @see swr_delay()
+ * @see swr_convert()
+ * @see swr_get_delay()
+ *
+ * @param swr             audio resample context
+ * @param output          output AVFrame
+ * @param input           input AVFrame
+ * @return                0 on success, AVERROR on failure or nonmatching
+ *                        configuration.
+ */
+int swr_convert_frame(SwrContext *swr,
+                      AVFrame *output, const AVFrame *input);
+
+/**
+ * Configure or reconfigure the SwrContext using the information
+ * provided by the AVFrames.
+ *
+ * The original resampling context is reset even on failure.
+ * The function calls swr_close() internally if the context is open.
+ *
+ * @see swr_close();
+ *
+ * @param swr             audio resample context
+ * @param output          output AVFrame
+ * @param input           input AVFrame
+ * @return                0 on success, AVERROR on failure.
+ */
+int swr_config_frame(SwrContext *swr, const AVFrame *out, const AVFrame *in);
+
+/**
+ * @}
+ * @}
+ */
+
+#endif /* SWRESAMPLE_SWRESAMPLE_H */
diff --git a/libswresample/swresample_frame.c b/libswresample/swresample_frame.c
new file mode 100644
index 0000000000..71d3ed711a
--- /dev/null
+++ b/libswresample/swresample_frame.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2014 Luca Barbato <lu_zero@gentoo.org>
+ * Copyright (c) 2014 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swresample_internal.h"
+#include "libavutil/frame.h"
+#include "libavutil/opt.h"
+
+int swr_config_frame(SwrContext *s, const AVFrame *out, const AVFrame *in)
+{
+    swr_close(s);
+
+    if (in) {
+        if (av_opt_set_int(s, "icl", in->channel_layout, 0) < 0)
+            goto fail;
+        if (av_opt_set_int(s, "isf", in->format, 0) < 0)
+            goto fail;
+        if (av_opt_set_int(s, "isr", in->sample_rate, 0) < 0)
+            goto fail;
+    }
+
+    if (out) {
+        if (av_opt_set_int(s, "ocl", out->channel_layout, 0) < 0)
+            goto fail;
+        if (av_opt_set_int(s, "osf", out->format,  0) < 0)
+            goto fail;
+        if (av_opt_set_int(s, "osr", out->sample_rate, 0) < 0)
+            goto fail;
+    }
+
+    return 0;
+fail:
+    av_log(s, AV_LOG_ERROR, "Failed to set option\n");
+    return AVERROR(EINVAL);
+}
+
+static int config_changed(SwrContext *s,
+                          const AVFrame *out, const AVFrame *in)
+{
+    int ret = 0;
+
+    if (in) {
+        if (s->in_ch_layout   != in->channel_layout ||
+            s->in_sample_rate != in->sample_rate ||
+            s->in_sample_fmt  != in->format) {
+            ret |= AVERROR_INPUT_CHANGED;
+        }
+    }
+
+    if (out) {
+        if (s->out_ch_layout   != out->channel_layout ||
+            s->out_sample_rate != out->sample_rate ||
+            s->out_sample_fmt  != out->format) {
+            ret |= AVERROR_OUTPUT_CHANGED;
+        }
+    }
+
+    return ret;
+}
+
+static inline int convert_frame(SwrContext *s,
+                                AVFrame *out, const AVFrame *in)
+{
+    int ret;
+    uint8_t **out_data = NULL;
+    const uint8_t **in_data = NULL;
+    int out_nb_samples = 0, in_nb_samples = 0;
+
+    if (out) {
+        out_data       = out->extended_data;
+        out_nb_samples = out->nb_samples;
+    }
+
+    if (in) {
+        in_data       = (const uint8_t **)in->extended_data;
+        in_nb_samples = in->nb_samples;
+    }
+
+    ret = swr_convert(s, out_data, out_nb_samples, in_data, in_nb_samples);
+
+    if (ret < 0) {
+        if (out)
+            out->nb_samples = 0;
+        return ret;
+    }
+
+    if (out)
+        out->nb_samples = ret;
+
+    return 0;
+}
+
+static inline int available_samples(AVFrame *out)
+{
+    int bytes_per_sample = av_get_bytes_per_sample(out->format);
+    int samples = out->linesize[0] / bytes_per_sample;
+
+    if (av_sample_fmt_is_planar(out->format)) {
+        return samples;
+    } else {
+        int channels = av_get_channel_layout_nb_channels(out->channel_layout);
+        return samples / channels;
+    }
+}
+
+int swr_convert_frame(SwrContext *s,
+                      AVFrame *out, const AVFrame *in)
+{
+    int ret, setup = 0;
+
+    if (!swr_is_initialized(s)) {
+        if ((ret = swr_config_frame(s, out, in)) < 0)
+            return ret;
+        if ((ret = swr_init(s)) < 0)
+            return ret;
+        setup = 1;
+    } else {
+        // return as is or reconfigure for input changes?
+        if ((ret = config_changed(s, out, in)))
+            return ret;
+    }
+
+    if (out) {
+        if (!out->linesize[0]) {
+            out->nb_samples =   swr_get_delay(s, s->out_sample_rate)
+                              + in->nb_samples*(int64_t)s->out_sample_rate / s->in_sample_rate
+                              + 3;
+            if ((ret = av_frame_get_buffer(out, 0)) < 0) {
+                if (setup)
+                    swr_close(s);
+                return ret;
+            }
+        } else {
+            if (!out->nb_samples)
+                out->nb_samples = available_samples(out);
+        }
+    }
+
+    return convert_frame(s, out, in);
+}
+
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
new file mode 100644
index 0000000000..3828b722cc
--- /dev/null
+++ b/libswresample/swresample_internal.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWRESAMPLE_SWRESAMPLE_INTERNAL_H
+#define SWRESAMPLE_SWRESAMPLE_INTERNAL_H
+
+#include "swresample.h"
+#include "libavutil/channel_layout.h"
+#include "config.h"
+
+#define SWR_CH_MAX 64
+
+#define SQRT3_2      1.22474487139158904909  /* sqrt(3/2) */
+
+#define NS_TAPS 20
+
+#if ARCH_X86_64
+typedef int64_t integer;
+#else
+typedef int integer;
+#endif
+
+typedef void (mix_1_1_func_type)(void *out, const void *in, void *coeffp, integer index, integer len);
+typedef void (mix_2_1_func_type)(void *out, const void *in1, const void *in2, void *coeffp, integer index1, integer index2, integer len);
+
+typedef void (mix_any_func_type)(uint8_t **out, const uint8_t **in1, void *coeffp, integer len);
+
+typedef struct AudioData{
+    uint8_t *ch[SWR_CH_MAX];    ///< samples buffer per channel
+    uint8_t *data;              ///< samples buffer
+    int ch_count;               ///< number of channels
+    int bps;                    ///< bytes per sample
+    int count;                  ///< number of samples
+    int planar;                 ///< 1 if planar audio, 0 otherwise
+    enum AVSampleFormat fmt;    ///< sample format
+} AudioData;
+
+struct DitherContext {
+    int method;
+    int noise_pos;
+    float scale;
+    float noise_scale;                              ///< Noise scale
+    int ns_taps;                                    ///< Noise shaping dither taps
+    float ns_scale;                                 ///< Noise shaping dither scale
+    float ns_scale_1;                               ///< Noise shaping dither scale^-1
+    int ns_pos;                                     ///< Noise shaping dither position
+    float ns_coeffs[NS_TAPS];                       ///< Noise shaping filter coefficients
+    float ns_errors[SWR_CH_MAX][2*NS_TAPS];
+    AudioData noise;                                ///< noise used for dithering
+    AudioData temp;                                 ///< temporary storage when writing into the input buffer isn't possible
+    int output_sample_bits;                         ///< the number of used output bits, needed to scale dither correctly
+};
+
+typedef struct ResampleContext * (* resample_init_func)(struct ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
+                                    double cutoff, enum AVSampleFormat format, enum SwrFilterType filter_type, double kaiser_beta, double precision, int cheby, int exact_rational);
+typedef void    (* resample_free_func)(struct ResampleContext **c);
+typedef int     (* multiple_resample_func)(struct ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed);
+typedef int     (* resample_flush_func)(struct SwrContext *c);
+typedef int     (* set_compensation_func)(struct ResampleContext *c, int sample_delta, int compensation_distance);
+typedef int64_t (* get_delay_func)(struct SwrContext *s, int64_t base);
+typedef int     (* invert_initial_buffer_func)(struct ResampleContext *c, AudioData *dst, const AudioData *src, int src_size, int *dst_idx, int *dst_count);
+typedef int64_t (* get_out_samples_func)(struct SwrContext *s, int in_samples);
+
+struct Resampler {
+  resample_init_func            init;
+  resample_free_func            free;
+  multiple_resample_func        multiple_resample;
+  resample_flush_func           flush;
+  set_compensation_func         set_compensation;
+  get_delay_func                get_delay;
+  invert_initial_buffer_func    invert_initial_buffer;
+  get_out_samples_func          get_out_samples;
+};
+
+extern struct Resampler const swri_resampler;
+extern struct Resampler const swri_soxr_resampler;
+
+struct SwrContext {
+    const AVClass *av_class;                        ///< AVClass used for AVOption and av_log()
+    int log_level_offset;                           ///< logging level offset
+    void *log_ctx;                                  ///< parent logging context
+    enum AVSampleFormat  in_sample_fmt;             ///< input sample format
+    enum AVSampleFormat int_sample_fmt;             ///< internal sample format (AV_SAMPLE_FMT_FLTP or AV_SAMPLE_FMT_S16P)
+    enum AVSampleFormat out_sample_fmt;             ///< output sample format
+    int64_t  in_ch_layout;                          ///< input channel layout
+    int64_t out_ch_layout;                          ///< output channel layout
+    int      in_sample_rate;                        ///< input sample rate
+    int     out_sample_rate;                        ///< output sample rate
+    int flags;                                      ///< miscellaneous flags such as SWR_FLAG_RESAMPLE
+    float slev;                                     ///< surround mixing level
+    float clev;                                     ///< center mixing level
+    float lfe_mix_level;                            ///< LFE mixing level
+    float rematrix_volume;                          ///< rematrixing volume coefficient
+    float rematrix_maxval;                          ///< maximum value for rematrixing output
+    int matrix_encoding;                            /**< matrixed stereo encoding */
+    const int *channel_map;                         ///< channel index (or -1 if muted channel) map
+    int used_ch_count;                              ///< number of used input channels (mapped channel count if channel_map, otherwise in.ch_count)
+    int engine;
+
+    int user_in_ch_count;                           ///< User set input channel count
+    int user_out_ch_count;                          ///< User set output channel count
+    int user_used_ch_count;                         ///< User set used channel count
+    int64_t user_in_ch_layout;                      ///< User set input channel layout
+    int64_t user_out_ch_layout;                     ///< User set output channel layout
+    enum AVSampleFormat user_int_sample_fmt;        ///< User set internal sample format
+
+    struct DitherContext dither;
+
+    int filter_size;                                /**< length of each FIR filter in the resampling filterbank relative to the cutoff frequency */
+    int phase_shift;                                /**< log2 of the number of entries in the resampling polyphase filterbank */
+    int linear_interp;                              /**< if 1 then the resampling FIR filter will be linearly interpolated */
+    int exact_rational;                             /**< if 1 then enable non power of 2 phase_count */
+    double cutoff;                                  /**< resampling cutoff frequency (swr: 6dB point; soxr: 0dB point). 1.0 corresponds to half the output sample rate */
+    int filter_type;                                /**< swr resampling filter type */
+    double kaiser_beta;                                /**< swr beta value for Kaiser window (only applicable if filter_type == AV_FILTER_TYPE_KAISER) */
+    double precision;                               /**< soxr resampling precision (in bits) */
+    int cheby;                                      /**< soxr: if 1 then passband rolloff will be none (Chebyshev) & irrational ratio approximation precision will be higher */
+
+    float min_compensation;                         ///< swr minimum below which no compensation will happen
+    float min_hard_compensation;                    ///< swr minimum below which no silence inject / sample drop will happen
+    float soft_compensation_duration;               ///< swr duration over which soft compensation is applied
+    float max_soft_compensation;                    ///< swr maximum soft compensation in seconds over soft_compensation_duration
+    float async;                                    ///< swr simple 1 parameter async, similar to ffmpegs -async
+    int64_t firstpts_in_samples;                    ///< swr first pts in samples
+
+    int resample_first;                             ///< 1 if resampling must come first, 0 if rematrixing
+    int rematrix;                                   ///< flag to indicate if rematrixing is needed (basically if input and output layouts mismatch)
+    int rematrix_custom;                            ///< flag to indicate that a custom matrix has been defined
+
+    AudioData in;                                   ///< input audio data
+    AudioData postin;                               ///< post-input audio data: used for rematrix/resample
+    AudioData midbuf;                               ///< intermediate audio data (postin/preout)
+    AudioData preout;                               ///< pre-output audio data: used for rematrix/resample
+    AudioData out;                                  ///< converted output audio data
+    AudioData in_buffer;                            ///< cached audio data (convert and resample purpose)
+    AudioData silence;                              ///< temporary with silence
+    AudioData drop_temp;                            ///< temporary used to discard output
+    int in_buffer_index;                            ///< cached buffer position
+    int in_buffer_count;                            ///< cached buffer length
+    int resample_in_constraint;                     ///< 1 if the input end was reach before the output end, 0 otherwise
+    int flushed;                                    ///< 1 if data is to be flushed and no further input is expected
+    int64_t outpts;                                 ///< output PTS
+    int64_t firstpts;                               ///< first PTS
+    int drop_output;                                ///< number of output samples to drop
+    double delayed_samples_fixup;                   ///< soxr 0.1.1: needed to fixup delayed_samples after flush has been called.
+
+    struct AudioConvert *in_convert;                ///< input conversion context
+    struct AudioConvert *out_convert;               ///< output conversion context
+    struct AudioConvert *full_convert;              ///< full conversion context (single conversion for input and output)
+    struct ResampleContext *resample;               ///< resampling context
+    struct Resampler const *resampler;              ///< resampler virtual function table
+
+    float matrix[SWR_CH_MAX][SWR_CH_MAX];           ///< floating point rematrixing coefficients
+    uint8_t *native_matrix;
+    uint8_t *native_one;
+    uint8_t *native_simd_one;
+    uint8_t *native_simd_matrix;
+    int32_t matrix32[SWR_CH_MAX][SWR_CH_MAX];       ///< 17.15 fixed point rematrixing coefficients
+    uint8_t matrix_ch[SWR_CH_MAX][SWR_CH_MAX+1];    ///< Lists of input channels per output channel that have non zero rematrixing coefficients
+    mix_1_1_func_type *mix_1_1_f;
+    mix_1_1_func_type *mix_1_1_simd;
+
+    mix_2_1_func_type *mix_2_1_f;
+    mix_2_1_func_type *mix_2_1_simd;
+
+    mix_any_func_type *mix_any_f;
+
+    /* TODO: callbacks for ASM optimizations */
+};
+
+av_warn_unused_result
+int swri_realloc_audio(AudioData *a, int count);
+
+void swri_noise_shaping_int16 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
+void swri_noise_shaping_int32 (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
+void swri_noise_shaping_float (SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
+void swri_noise_shaping_double(SwrContext *s, AudioData *dsts, const AudioData *srcs, const AudioData *noises, int count);
+
+av_warn_unused_result
+int swri_rematrix_init(SwrContext *s);
+void swri_rematrix_free(SwrContext *s);
+int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy);
+int swri_rematrix_init_x86(struct SwrContext *s);
+
+av_warn_unused_result
+int swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat noise_fmt);
+av_warn_unused_result
+int swri_dither_init(SwrContext *s, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt);
+
+void swri_audio_convert_init_aarch64(struct AudioConvert *ac,
+                                 enum AVSampleFormat out_fmt,
+                                 enum AVSampleFormat in_fmt,
+                                 int channels);
+void swri_audio_convert_init_arm(struct AudioConvert *ac,
+                                 enum AVSampleFormat out_fmt,
+                                 enum AVSampleFormat in_fmt,
+                                 int channels);
+void swri_audio_convert_init_x86(struct AudioConvert *ac,
+                                 enum AVSampleFormat out_fmt,
+                                 enum AVSampleFormat in_fmt,
+                                 int channels);
+
+#endif
diff --git a/libswresample/swresampleres.rc b/libswresample/swresampleres.rc
new file mode 100644
index 0000000000..1320f78b9a
--- /dev/null
+++ b/libswresample/swresampleres.rc
@@ -0,0 +1,55 @@
+/*
+ * Windows resource file for libswresample
+ *
+ * Copyright (C) 2012 James Almer
+ * Copyright (C) 2013 Tiancheng "Timothy" Gu
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <windows.h>
+#include "libswresample/version.h"
+#include "libavutil/ffversion.h"
+#include "config.h"
+
+1 VERSIONINFO
+FILEVERSION     LIBSWRESAMPLE_VERSION_MAJOR, LIBSWRESAMPLE_VERSION_MINOR, LIBSWRESAMPLE_VERSION_MICRO, 0
+PRODUCTVERSION  LIBSWRESAMPLE_VERSION_MAJOR, LIBSWRESAMPLE_VERSION_MINOR, LIBSWRESAMPLE_VERSION_MICRO, 0
+FILEFLAGSMASK   VS_FFI_FILEFLAGSMASK
+FILEOS          VOS_NT_WINDOWS32
+FILETYPE        VFT_DLL
+{
+    BLOCK "StringFileInfo"
+    {
+        BLOCK "040904B0"
+        {
+            VALUE "CompanyName",      "FFmpeg Project"
+            VALUE "FileDescription",  "FFmpeg audio resampling library"
+            VALUE "FileVersion",      AV_STRINGIFY(LIBSWRESAMPLE_VERSION)
+            VALUE "InternalName",     "libswresample"
+            VALUE "LegalCopyright",   "Copyright (C) 2000-" AV_STRINGIFY(CONFIG_THIS_YEAR) " FFmpeg Project"
+            VALUE "OriginalFilename", "swresample" BUILDSUF "-" AV_STRINGIFY(LIBSWRESAMPLE_VERSION_MAJOR) SLIBSUF
+            VALUE "ProductName",      "FFmpeg"
+            VALUE "ProductVersion",   FFMPEG_VERSION
+        }
+    }
+
+    BLOCK "VarFileInfo"
+    {
+        VALUE "Translation", 0x0409, 0x04B0
+    }
+}
diff --git a/libswresample/tests/.gitignore b/libswresample/tests/.gitignore
new file mode 100644
index 0000000000..2dc986bd0e
--- /dev/null
+++ b/libswresample/tests/.gitignore
@@ -0,0 +1 @@
+/swresample
diff --git a/libswresample/tests/swresample.c b/libswresample/tests/swresample.c
new file mode 100644
index 0000000000..53896585e2
--- /dev/null
+++ b/libswresample/tests/swresample.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at)
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/common.h"
+#include "libavutil/opt.h"
+
+#include "libswresample/swresample.h"
+
+#undef time
+#include <time.h>
+#undef fprintf
+
+#define SAMPLES 1000
+
+#define SWR_CH_MAX 32
+
+#define ASSERT_LEVEL 2
+
+static double get(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f){
+    const uint8_t *p;
+    if(av_sample_fmt_is_planar(f)){
+        f= av_get_alt_sample_fmt(f, 0);
+        p= a[ch];
+    }else{
+        p= a[0];
+        index= ch + index*ch_count;
+    }
+
+    switch(f){
+    case AV_SAMPLE_FMT_U8 : return ((const uint8_t*)p)[index]/127.0-1.0;
+    case AV_SAMPLE_FMT_S16: return ((const int16_t*)p)[index]/32767.0;
+    case AV_SAMPLE_FMT_S32: return ((const int32_t*)p)[index]/2147483647.0;
+    case AV_SAMPLE_FMT_FLT: return ((const float  *)p)[index];
+    case AV_SAMPLE_FMT_DBL: return ((const double *)p)[index];
+    default: av_assert0(0);
+    }
+}
+
+static void  set(uint8_t *a[], int ch, int index, int ch_count, enum AVSampleFormat f, double v){
+    uint8_t *p;
+    if(av_sample_fmt_is_planar(f)){
+        f= av_get_alt_sample_fmt(f, 0);
+        p= a[ch];
+    }else{
+        p= a[0];
+        index= ch + index*ch_count;
+    }
+    switch(f){
+    case AV_SAMPLE_FMT_U8 : ((uint8_t*)p)[index]= av_clip_uint8 (lrint((v+1.0)*127));     break;
+    case AV_SAMPLE_FMT_S16: ((int16_t*)p)[index]= av_clip_int16 (lrint(v*32767));         break;
+    case AV_SAMPLE_FMT_S32: ((int32_t*)p)[index]= av_clipl_int32(llrint(v*2147483647));   break;
+    case AV_SAMPLE_FMT_FLT: ((float  *)p)[index]= v;                                      break;
+    case AV_SAMPLE_FMT_DBL: ((double *)p)[index]= v;                                      break;
+    default: av_assert2(0);
+    }
+}
+
+static void shift(uint8_t *a[], int index, int ch_count, enum AVSampleFormat f){
+    int ch;
+
+    if(av_sample_fmt_is_planar(f)){
+        f= av_get_alt_sample_fmt(f, 0);
+        for(ch= 0; ch<ch_count; ch++)
+            a[ch] += index*av_get_bytes_per_sample(f);
+    }else{
+        a[0] += index*ch_count*av_get_bytes_per_sample(f);
+    }
+}
+
+static const enum AVSampleFormat formats[] = {
+    AV_SAMPLE_FMT_S16,
+    AV_SAMPLE_FMT_FLTP,
+    AV_SAMPLE_FMT_S16P,
+    AV_SAMPLE_FMT_FLT,
+    AV_SAMPLE_FMT_S32P,
+    AV_SAMPLE_FMT_S32,
+    AV_SAMPLE_FMT_U8P,
+    AV_SAMPLE_FMT_U8,
+    AV_SAMPLE_FMT_DBLP,
+    AV_SAMPLE_FMT_DBL,
+};
+
+static const int rates[] = {
+    8000,
+    11025,
+    16000,
+    22050,
+    32000,
+    48000,
+};
+
+static const uint64_t layouts[]={
+    AV_CH_LAYOUT_MONO                    ,
+    AV_CH_LAYOUT_STEREO                  ,
+    AV_CH_LAYOUT_2_1                     ,
+    AV_CH_LAYOUT_SURROUND                ,
+    AV_CH_LAYOUT_4POINT0                 ,
+    AV_CH_LAYOUT_2_2                     ,
+    AV_CH_LAYOUT_QUAD                    ,
+    AV_CH_LAYOUT_5POINT0                 ,
+    AV_CH_LAYOUT_5POINT1                 ,
+    AV_CH_LAYOUT_5POINT0_BACK            ,
+    AV_CH_LAYOUT_5POINT1_BACK            ,
+    AV_CH_LAYOUT_7POINT0                 ,
+    AV_CH_LAYOUT_7POINT1                 ,
+    AV_CH_LAYOUT_7POINT1_WIDE            ,
+};
+
+static void setup_array(uint8_t *out[SWR_CH_MAX], uint8_t *in, enum AVSampleFormat format, int samples){
+    if(av_sample_fmt_is_planar(format)){
+        int i;
+        int plane_size= av_get_bytes_per_sample(format&0xFF)*samples;
+        format&=0xFF;
+        for(i=0; i<SWR_CH_MAX; i++){
+            out[i]= in + i*plane_size;
+        }
+    }else{
+        out[0]= in;
+    }
+}
+
+static int cmp(const void *a, const void *b){
+    return *(const int *)a - *(const int *)b;
+}
+
+static void audiogen(void *data, enum AVSampleFormat sample_fmt,
+                     int channels, int sample_rate, int nb_samples)
+{
+    int i, ch, k;
+    double v, f, a, ampa;
+    double tabf1[SWR_CH_MAX];
+    double tabf2[SWR_CH_MAX];
+    double taba[SWR_CH_MAX];
+    unsigned static rnd;
+
+#define PUT_SAMPLE set(data, ch, k, channels, sample_fmt, v);
+#define uint_rand(x) ((x) = (x) * 1664525 + 1013904223)
+#define dbl_rand(x) (uint_rand(x)*2.0 / (double)UINT_MAX - 1)
+    k = 0;
+
+    /* 1 second of single freq sinus at 1000 Hz */
+    a = 0;
+    for (i = 0; i < 1 * sample_rate && k < nb_samples; i++, k++) {
+        v = sin(a) * 0.30;
+        for (ch = 0; ch < channels; ch++)
+            PUT_SAMPLE
+        a += M_PI * 1000.0 * 2.0 / sample_rate;
+    }
+
+    /* 1 second of varying frequency between 100 and 10000 Hz */
+    a = 0;
+    for (i = 0; i < 1 * sample_rate && k < nb_samples; i++, k++) {
+        v = sin(a) * 0.30;
+        for (ch = 0; ch < channels; ch++)
+            PUT_SAMPLE
+        f  = 100.0 + (((10000.0 - 100.0) * i) / sample_rate);
+        a += M_PI * f * 2.0 / sample_rate;
+    }
+
+    /* 0.5 second of low amplitude white noise */
+    for (i = 0; i < sample_rate / 2 && k < nb_samples; i++, k++) {
+        v = dbl_rand(rnd) * 0.30;
+        for (ch = 0; ch < channels; ch++)
+            PUT_SAMPLE
+    }
+
+    /* 0.5 second of high amplitude white noise */
+    for (i = 0; i < sample_rate / 2 && k < nb_samples; i++, k++) {
+        v = dbl_rand(rnd);
+        for (ch = 0; ch < channels; ch++)
+            PUT_SAMPLE
+    }
+
+    /* 1 second of unrelated ramps for each channel */
+    for (ch = 0; ch < channels; ch++) {
+        taba[ch]  = 0;
+        tabf1[ch] = 100 + uint_rand(rnd) % 5000;
+        tabf2[ch] = 100 + uint_rand(rnd) % 5000;
+    }
+    for (i = 0; i < 1 * sample_rate && k < nb_samples; i++, k++) {
+        for (ch = 0; ch < channels; ch++) {
+            v = sin(taba[ch]) * 0.30;
+            PUT_SAMPLE
+            f = tabf1[ch] + (((tabf2[ch] - tabf1[ch]) * i) / sample_rate);
+            taba[ch] += M_PI * f * 2.0 / sample_rate;
+        }
+    }
+
+    /* 2 seconds of 500 Hz with varying volume */
+    a    = 0;
+    ampa = 0;
+    for (i = 0; i < 2 * sample_rate && k < nb_samples; i++, k++) {
+        for (ch = 0; ch < channels; ch++) {
+            double amp = (1.0 + sin(ampa)) * 0.15;
+            if (ch & 1)
+                amp = 0.30 - amp;
+            v = sin(a) * amp;
+            PUT_SAMPLE
+            a    += M_PI * 500.0 * 2.0 / sample_rate;
+            ampa += M_PI *  2.0 / sample_rate;
+        }
+    }
+}
+
+int main(int argc, char **argv){
+    int in_sample_rate, out_sample_rate, ch ,i, flush_count;
+    uint64_t in_ch_layout, out_ch_layout;
+    enum AVSampleFormat in_sample_fmt, out_sample_fmt;
+    uint8_t array_in[SAMPLES*8*8];
+    uint8_t array_mid[SAMPLES*8*8*3];
+    uint8_t array_out[SAMPLES*8*8+100];
+    uint8_t *ain[SWR_CH_MAX];
+    uint8_t *aout[SWR_CH_MAX];
+    uint8_t *amid[SWR_CH_MAX];
+    int flush_i=0;
+    int mode;
+    int num_tests = 10000;
+    uint32_t seed = 0;
+    uint32_t rand_seed = 0;
+    int remaining_tests[FF_ARRAY_ELEMS(rates) * FF_ARRAY_ELEMS(layouts) * FF_ARRAY_ELEMS(formats) * FF_ARRAY_ELEMS(layouts) * FF_ARRAY_ELEMS(formats)];
+    int max_tests = FF_ARRAY_ELEMS(remaining_tests);
+    int test;
+    int specific_test= -1;
+
+    struct SwrContext * forw_ctx= NULL;
+    struct SwrContext *backw_ctx= NULL;
+
+    if (argc > 1) {
+        if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+            av_log(NULL, AV_LOG_INFO, "Usage: swresample-test [<num_tests>[ <test>]]  \n"
+                   "num_tests           Default is %d\n", num_tests);
+            return 0;
+        }
+        num_tests = strtol(argv[1], NULL, 0);
+        if(num_tests < 0) {
+            num_tests = -num_tests;
+            rand_seed = time(0);
+        }
+        if(num_tests<= 0 || num_tests>max_tests)
+            num_tests = max_tests;
+        if(argc > 2) {
+            specific_test = strtol(argv[1], NULL, 0);
+        }
+    }
+
+    for(i=0; i<max_tests; i++)
+        remaining_tests[i] = i;
+
+    for(test=0; test<num_tests; test++){
+        unsigned r;
+        uint_rand(seed);
+        r = (seed * (uint64_t)(max_tests - test)) >>32;
+        FFSWAP(int, remaining_tests[r], remaining_tests[max_tests - test - 1]);
+    }
+    qsort(remaining_tests + max_tests - num_tests, num_tests, sizeof(remaining_tests[0]), cmp);
+    in_sample_rate=16000;
+    for(test=0; test<num_tests; test++){
+        char  in_layout_string[256];
+        char out_layout_string[256];
+        unsigned vector= remaining_tests[max_tests - test - 1];
+        int in_ch_count;
+        int out_count, mid_count, out_ch_count;
+
+        in_ch_layout    = layouts[vector % FF_ARRAY_ELEMS(layouts)]; vector /= FF_ARRAY_ELEMS(layouts);
+        out_ch_layout   = layouts[vector % FF_ARRAY_ELEMS(layouts)]; vector /= FF_ARRAY_ELEMS(layouts);
+        in_sample_fmt   = formats[vector % FF_ARRAY_ELEMS(formats)]; vector /= FF_ARRAY_ELEMS(formats);
+        out_sample_fmt  = formats[vector % FF_ARRAY_ELEMS(formats)]; vector /= FF_ARRAY_ELEMS(formats);
+        out_sample_rate = rates  [vector % FF_ARRAY_ELEMS(rates  )]; vector /= FF_ARRAY_ELEMS(rates);
+        av_assert0(!vector);
+
+        if(specific_test == 0){
+            if(out_sample_rate != in_sample_rate || in_ch_layout != out_ch_layout)
+                continue;
+        }
+
+        in_ch_count= av_get_channel_layout_nb_channels(in_ch_layout);
+        out_ch_count= av_get_channel_layout_nb_channels(out_ch_layout);
+        av_get_channel_layout_string( in_layout_string, sizeof( in_layout_string),  in_ch_count,  in_ch_layout);
+        av_get_channel_layout_string(out_layout_string, sizeof(out_layout_string), out_ch_count, out_ch_layout);
+        fprintf(stderr, "TEST: %s->%s, rate:%5d->%5d, fmt:%s->%s\n",
+                in_layout_string, out_layout_string,
+                in_sample_rate, out_sample_rate,
+                av_get_sample_fmt_name(in_sample_fmt), av_get_sample_fmt_name(out_sample_fmt));
+        forw_ctx  = swr_alloc_set_opts(forw_ctx, out_ch_layout, out_sample_fmt,  out_sample_rate,
+                                                    in_ch_layout,  in_sample_fmt,  in_sample_rate,
+                                        0, 0);
+        backw_ctx = swr_alloc_set_opts(backw_ctx, in_ch_layout,  in_sample_fmt,             in_sample_rate,
+                                                    out_ch_layout, out_sample_fmt, out_sample_rate,
+                                        0, 0);
+        if(!forw_ctx) {
+            fprintf(stderr, "Failed to init forw_cts\n");
+            return 1;
+        }
+        if(!backw_ctx) {
+            fprintf(stderr, "Failed to init backw_ctx\n");
+            return 1;
+        }
+        if (uint_rand(rand_seed) % 3 == 0)
+            av_opt_set_int(forw_ctx, "ich", 0, 0);
+        if (uint_rand(rand_seed) % 3 == 0)
+            av_opt_set_int(forw_ctx, "och", 0, 0);
+
+        if(swr_init( forw_ctx) < 0)
+            fprintf(stderr, "swr_init(->) failed\n");
+        if(swr_init(backw_ctx) < 0)
+            fprintf(stderr, "swr_init(<-) failed\n");
+                //FIXME test planar
+        setup_array(ain , array_in ,  in_sample_fmt,   SAMPLES);
+        setup_array(amid, array_mid, out_sample_fmt, 3*SAMPLES);
+        setup_array(aout, array_out,  in_sample_fmt           ,   SAMPLES);
+#if 0
+        for(ch=0; ch<in_ch_count; ch++){
+            for(i=0; i<SAMPLES; i++)
+                set(ain, ch, i, in_ch_count, in_sample_fmt, sin(i*i*3/SAMPLES));
+        }
+#else
+        audiogen(ain, in_sample_fmt, in_ch_count, SAMPLES/6+1, SAMPLES);
+#endif
+        mode = uint_rand(rand_seed) % 3;
+        if(mode==0 /*|| out_sample_rate == in_sample_rate*/) {
+            mid_count= swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain, SAMPLES);
+        } else if(mode==1){
+            mid_count= swr_convert(forw_ctx, amid,         0, (const uint8_t **)ain, SAMPLES);
+            mid_count+=swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain,       0);
+        } else {
+            int tmp_count;
+            mid_count= swr_convert(forw_ctx, amid,         0, (const uint8_t **)ain,       1);
+            av_assert0(mid_count==0);
+            shift(ain,  1, in_ch_count, in_sample_fmt);
+            mid_count+=swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain,       0);
+            shift(amid,  mid_count, out_ch_count, out_sample_fmt); tmp_count = mid_count;
+            mid_count+=swr_convert(forw_ctx, amid,         2, (const uint8_t **)ain,       2);
+            shift(amid,  mid_count-tmp_count, out_ch_count, out_sample_fmt); tmp_count = mid_count;
+            shift(ain,  2, in_ch_count, in_sample_fmt);
+            mid_count+=swr_convert(forw_ctx, amid,         1, (const uint8_t **)ain, SAMPLES-3);
+            shift(amid,  mid_count-tmp_count, out_ch_count, out_sample_fmt); tmp_count = mid_count;
+            shift(ain, -3, in_ch_count, in_sample_fmt);
+            mid_count+=swr_convert(forw_ctx, amid, 3*SAMPLES, (const uint8_t **)ain,       0);
+            shift(amid,  -tmp_count, out_ch_count, out_sample_fmt);
+        }
+        out_count= swr_convert(backw_ctx,aout, SAMPLES, (const uint8_t **)amid, mid_count);
+
+        for(ch=0; ch<in_ch_count; ch++){
+            double sse, maxdiff=0;
+            double sum_a= 0;
+            double sum_b= 0;
+            double sum_aa= 0;
+            double sum_bb= 0;
+            double sum_ab= 0;
+            for(i=0; i<out_count; i++){
+                double a= get(ain , ch, i, in_ch_count, in_sample_fmt);
+                double b= get(aout, ch, i, in_ch_count, in_sample_fmt);
+                sum_a += a;
+                sum_b += b;
+                sum_aa+= a*a;
+                sum_bb+= b*b;
+                sum_ab+= a*b;
+                maxdiff= FFMAX(maxdiff, fabs(a-b));
+            }
+            sse= sum_aa + sum_bb - 2*sum_ab;
+            if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
+
+            fprintf(stderr, "[e:%f c:%f max:%f] len:%5d\n", out_count ? sqrt(sse/out_count) : 0, sum_ab/(sqrt(sum_aa*sum_bb)), maxdiff, out_count);
+        }
+
+        flush_i++;
+        flush_i%=21;
+        flush_count = swr_convert(backw_ctx,aout, flush_i, 0, 0);
+        shift(aout,  flush_i, in_ch_count, in_sample_fmt);
+        flush_count+= swr_convert(backw_ctx,aout, SAMPLES-flush_i, 0, 0);
+        shift(aout, -flush_i, in_ch_count, in_sample_fmt);
+        if(flush_count){
+            for(ch=0; ch<in_ch_count; ch++){
+                double sse, maxdiff=0;
+                double sum_a= 0;
+                double sum_b= 0;
+                double sum_aa= 0;
+                double sum_bb= 0;
+                double sum_ab= 0;
+                for(i=0; i<flush_count; i++){
+                    double a= get(ain , ch, i+out_count, in_ch_count, in_sample_fmt);
+                    double b= get(aout, ch, i, in_ch_count, in_sample_fmt);
+                    sum_a += a;
+                    sum_b += b;
+                    sum_aa+= a*a;
+                    sum_bb+= b*b;
+                    sum_ab+= a*b;
+                    maxdiff= FFMAX(maxdiff, fabs(a-b));
+                }
+                sse= sum_aa + sum_bb - 2*sum_ab;
+                if(sse < 0 && sse > -0.00001) sse=0; //fix rounding error
+
+                fprintf(stderr, "[e:%f c:%f max:%f] len:%5d F:%3d\n", sqrt(sse/flush_count), sum_ab/(sqrt(sum_aa*sum_bb)), maxdiff, flush_count, flush_i);
+            }
+        }
+
+
+        fprintf(stderr, "\n");
+    }
+
+    return 0;
+}
diff --git a/libswresample/version.h b/libswresample/version.h
new file mode 100644
index 0000000000..b8e32c0e41
--- /dev/null
+++ b/libswresample/version.h
@@ -0,0 +1,45 @@
+/*
+ * Version macros.
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef SWRESAMPLE_VERSION_H
+#define SWRESAMPLE_VERSION_H
+
+/**
+ * @file
+ * Libswresample version macros
+ */
+
+#include "libavutil/avutil.h"
+
+#define LIBSWRESAMPLE_VERSION_MAJOR   2
+#define LIBSWRESAMPLE_VERSION_MINOR   1
+#define LIBSWRESAMPLE_VERSION_MICRO 100
+
+#define LIBSWRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBSWRESAMPLE_VERSION_MAJOR, \
+                                                  LIBSWRESAMPLE_VERSION_MINOR, \
+                                                  LIBSWRESAMPLE_VERSION_MICRO)
+#define LIBSWRESAMPLE_VERSION      AV_VERSION(LIBSWRESAMPLE_VERSION_MAJOR, \
+                                              LIBSWRESAMPLE_VERSION_MINOR, \
+                                              LIBSWRESAMPLE_VERSION_MICRO)
+#define LIBSWRESAMPLE_BUILD        LIBSWRESAMPLE_VERSION_INT
+
+#define LIBSWRESAMPLE_IDENT        "SwR" AV_STRINGIFY(LIBSWRESAMPLE_VERSION)
+
+#endif /* SWRESAMPLE_VERSION_H */
diff --git a/libswresample/x86/Makefile b/libswresample/x86/Makefile
new file mode 100644
index 0000000000..be44df56aa
--- /dev/null
+++ b/libswresample/x86/Makefile
@@ -0,0 +1,9 @@
+YASM-OBJS                       += x86/audio_convert.o\
+                                   x86/rematrix.o\
+                                   x86/resample.o\
+
+OBJS                            += x86/audio_convert_init.o\
+                                   x86/rematrix_init.o\
+                                   x86/resample_init.o\
+
+OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
new file mode 100644
index 0000000000..d441636d3c
--- /dev/null
+++ b/libswresample/x86/audio_convert.asm
@@ -0,0 +1,739 @@
+;******************************************************************************
+;* Copyright (c) 2012 Michael Niedermayer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA 32
+flt2pm31: times 8 dd 4.6566129e-10
+flt2p31 : times 8 dd 2147483648.0
+flt2p15 : times 8 dd 32768.0
+
+word_unpack_shuf : db  0, 1, 4, 5, 8, 9,12,13, 2, 3, 6, 7,10,11,14,15
+
+SECTION .text
+
+
+;to, from, a/u, log2_outsize, log_intsize, const
+%macro PACK_2CH 5-7
+cglobal pack_2ch_%2_to_%1_%3, 3, 4, 6, dst, src, len, src2
+    mov src2q   , [srcq+gprsize]
+    mov srcq    , [srcq]
+    mov dstq    , [dstq]
+%ifidn %3, a
+    test dstq, mmsize-1
+        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
+    test src2q, mmsize-1
+        jne pack_2ch_%2_to_%1_u_int %+ SUFFIX
+%else
+pack_2ch_%2_to_%1_u_int %+ SUFFIX:
+%endif
+    lea     srcq , [srcq  + (1<<%5)*lenq]
+    lea     src2q, [src2q + (1<<%5)*lenq]
+    lea     dstq , [dstq  + (2<<%4)*lenq]
+    neg     lenq
+    %7 m0,m1,m2,m3,m4,m5
+.next:
+%if %4 >= %5
+    mov%3     m0, [         srcq +(1<<%5)*lenq]
+    mova      m1, m0
+    mov%3     m2, [         src2q+(1<<%5)*lenq]
+%if %5 == 1
+    punpcklwd m0, m2
+    punpckhwd m1, m2
+%else
+    punpckldq m0, m2
+    punpckhdq m1, m2
+%endif
+    %6 m0,m1,m2,m3,m4,m5
+%else
+    mov%3     m0, [         srcq +(1<<%5)*lenq]
+    mov%3     m1, [mmsize + srcq +(1<<%5)*lenq]
+    mov%3     m2, [         src2q+(1<<%5)*lenq]
+    mov%3     m3, [mmsize + src2q+(1<<%5)*lenq]
+    %6 m0,m1,m2,m3,m4,m5
+    mova      m2, m0
+    punpcklwd m0, m1
+    punpckhwd m2, m1
+    SWAP 1,2
+%endif
+    mov%3 [           dstq+(2<<%4)*lenq], m0
+    mov%3 [  mmsize + dstq+(2<<%4)*lenq], m1
+%if %4 > %5
+    mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
+    mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
+    add lenq, 4*mmsize/(2<<%4)
+%else
+    add lenq, 2*mmsize/(2<<%4)
+%endif
+        jl .next
+    REP_RET
+%endmacro
+
+%macro UNPACK_2CH 5-7
+cglobal unpack_2ch_%2_to_%1_%3, 3, 4, 7, dst, src, len, dst2
+    mov dst2q   , [dstq+gprsize]
+    mov srcq    , [srcq]
+    mov dstq    , [dstq]
+%ifidn %3, a
+    test dstq, mmsize-1
+        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
+    test dst2q, mmsize-1
+        jne unpack_2ch_%2_to_%1_u_int %+ SUFFIX
+%else
+unpack_2ch_%2_to_%1_u_int %+ SUFFIX:
+%endif
+    lea     srcq , [srcq  + (2<<%5)*lenq]
+    lea     dstq , [dstq  + (1<<%4)*lenq]
+    lea     dst2q, [dst2q + (1<<%4)*lenq]
+    neg     lenq
+    %7 m0,m1,m2,m3,m4,m5
+    mova      m6, [word_unpack_shuf]
+.next:
+    mov%3     m0, [           srcq +(2<<%5)*lenq]
+    mov%3     m2, [  mmsize + srcq +(2<<%5)*lenq]
+%if %5 == 1
+%ifidn SUFFIX, _ssse3
+    pshufb    m0, m6
+    mova      m1, m0
+    pshufb    m2, m6
+    punpcklqdq m0,m2
+    punpckhqdq m1,m2
+%else
+    mova      m1, m0
+    punpcklwd m0,m2
+    punpckhwd m1,m2
+
+    mova      m2, m0
+    punpcklwd m0,m1
+    punpckhwd m2,m1
+
+    mova      m1, m0
+    punpcklwd m0,m2
+    punpckhwd m1,m2
+%endif
+%else
+    mova      m1, m0
+    shufps    m0, m2, 10001000b
+    shufps    m1, m2, 11011101b
+%endif
+%if %4 < %5
+    mov%3     m2, [2*mmsize + srcq +(2<<%5)*lenq]
+    mova      m3, m2
+    mov%3     m4, [3*mmsize + srcq +(2<<%5)*lenq]
+    shufps    m2, m4, 10001000b
+    shufps    m3, m4, 11011101b
+    SWAP 1,2
+%endif
+    %6 m0,m1,m2,m3,m4,m5
+    mov%3 [           dstq+(1<<%4)*lenq], m0
+%if %4 > %5
+    mov%3 [          dst2q+(1<<%4)*lenq], m2
+    mov%3 [ mmsize +  dstq+(1<<%4)*lenq], m1
+    mov%3 [ mmsize + dst2q+(1<<%4)*lenq], m3
+    add lenq, 2*mmsize/(1<<%4)
+%else
+    mov%3 [          dst2q+(1<<%4)*lenq], m1
+    add lenq, mmsize/(1<<%4)
+%endif
+        jl .next
+    REP_RET
+%endmacro
+
+%macro CONV 5-7
+cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
+    mov srcq    , [srcq]
+    mov dstq    , [dstq]
+%ifidn %3, a
+    test dstq, mmsize-1
+        jne %2_to_%1_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne %2_to_%1_u_int %+ SUFFIX
+%else
+%2_to_%1_u_int %+ SUFFIX:
+%endif
+    lea     srcq , [srcq  + (1<<%5)*lenq]
+    lea     dstq , [dstq  + (1<<%4)*lenq]
+    neg     lenq
+    %7 m0,m1,m2,m3,m4,m5
+.next:
+    mov%3     m0, [           srcq +(1<<%5)*lenq]
+    mov%3     m1, [  mmsize + srcq +(1<<%5)*lenq]
+%if %4 < %5
+    mov%3     m2, [2*mmsize + srcq +(1<<%5)*lenq]
+    mov%3     m3, [3*mmsize + srcq +(1<<%5)*lenq]
+%endif
+    %6 m0,m1,m2,m3,m4,m5
+    mov%3 [           dstq+(1<<%4)*lenq], m0
+    mov%3 [  mmsize + dstq+(1<<%4)*lenq], m1
+%if %4 > %5
+    mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
+    mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
+    add lenq, 4*mmsize/(1<<%4)
+%else
+    add lenq, 2*mmsize/(1<<%4)
+%endif
+        jl .next
+%if mmsize == 8
+    emms
+    RET
+%else
+    REP_RET
+%endif
+%endmacro
+
+%macro PACK_6CH 8
+cglobal pack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, src1, src2, src3, src4, src5, len
+%if ARCH_X86_64
+    mov     lend, r2d
+%else
+    %define lend dword r2m
+%endif
+    mov    src1q, [srcq+1*gprsize]
+    mov    src2q, [srcq+2*gprsize]
+    mov    src3q, [srcq+3*gprsize]
+    mov    src4q, [srcq+4*gprsize]
+    mov    src5q, [srcq+5*gprsize]
+    mov     srcq, [srcq]
+    mov     dstq, [dstq]
+%ifidn %3, a
+    test dstq, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test src1q, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test src2q, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test src3q, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test src4q, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test src5q, mmsize-1
+        jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+%else
+pack_6ch_%2_to_%1_u_int %+ SUFFIX:
+%endif
+    sub    src1q, srcq
+    sub    src2q, srcq
+    sub    src3q, srcq
+    sub    src4q, srcq
+    sub    src5q, srcq
+    %8 x,x,x,x,m7,x
+.loop:
+    mov%3     m0, [srcq      ]
+    mov%3     m1, [srcq+src1q]
+    mov%3     m2, [srcq+src2q]
+    mov%3     m3, [srcq+src3q]
+    mov%3     m4, [srcq+src4q]
+    mov%3     m5, [srcq+src5q]
+%if cpuflag(sse)
+    SBUTTERFLYPS 0, 1, 6
+    SBUTTERFLYPS 2, 3, 6
+    SBUTTERFLYPS 4, 5, 6
+
+%if cpuflag(avx)
+    blendps   m6, m4, m0, 1100b
+%else
+    movaps    m6, m4
+    shufps    m4, m0, q3210
+    SWAP 4,6
+%endif
+    movlhps   m0, m2
+    movhlps   m4, m2
+%if cpuflag(avx)
+    blendps   m2, m5, m1, 1100b
+%else
+    movaps    m2, m5
+    shufps    m5, m1, q3210
+    SWAP 2,5
+%endif
+    movlhps   m1, m3
+    movhlps   m5, m3
+
+    %7 m0,m6,x,x,m7,m3
+    %7 m4,m1,x,x,m7,m3
+    %7 m2,m5,x,x,m7,m3
+
+    mov %+ %3 %+ ps [dstq   ], m0
+    mov %+ %3 %+ ps [dstq+16], m6
+    mov %+ %3 %+ ps [dstq+32], m4
+    mov %+ %3 %+ ps [dstq+48], m1
+    mov %+ %3 %+ ps [dstq+64], m2
+    mov %+ %3 %+ ps [dstq+80], m5
+%else ; mmx
+    SBUTTERFLY dq, 0, 1, 6
+    SBUTTERFLY dq, 2, 3, 6
+    SBUTTERFLY dq, 4, 5, 6
+
+    movq   [dstq   ], m0
+    movq   [dstq+ 8], m2
+    movq   [dstq+16], m4
+    movq   [dstq+24], m1
+    movq   [dstq+32], m3
+    movq   [dstq+40], m5
+%endif
+    add      srcq, mmsize
+    add      dstq, mmsize*6
+    sub      lend, mmsize/4
+    jg .loop
+%if mmsize == 8
+    emms
+    RET
+%else
+    REP_RET
+%endif
+%endmacro
+
+%macro UNPACK_6CH 8
+cglobal unpack_6ch_%2_to_%1_%3, 2, 8, %6, dst, src, dst1, dst2, dst3, dst4, dst5, len
+%if ARCH_X86_64
+    mov     lend, r2d
+%else
+    %define lend dword r2m
+%endif
+    mov    dst1q, [dstq+1*gprsize]
+    mov    dst2q, [dstq+2*gprsize]
+    mov    dst3q, [dstq+3*gprsize]
+    mov    dst4q, [dstq+4*gprsize]
+    mov    dst5q, [dstq+5*gprsize]
+    mov     dstq, [dstq]
+    mov     srcq, [srcq]
+%ifidn %3, a
+    test dstq, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test dst1q, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test dst2q, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test dst3q, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test dst4q, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+    test dst5q, mmsize-1
+        jne unpack_6ch_%2_to_%1_u_int %+ SUFFIX
+%else
+unpack_6ch_%2_to_%1_u_int %+ SUFFIX:
+%endif
+    sub    dst1q, dstq
+    sub    dst2q, dstq
+    sub    dst3q, dstq
+    sub    dst4q, dstq
+    sub    dst5q, dstq
+    %8 x,x,x,x,m7,x
+.loop:
+    mov%3     m0, [srcq   ]
+    mov%3     m1, [srcq+16]
+    mov%3     m2, [srcq+32]
+    mov%3     m3, [srcq+48]
+    mov%3     m4, [srcq+64]
+    mov%3     m5, [srcq+80]
+
+    SBUTTERFLYPS 0, 3, 6
+    SBUTTERFLYPS 1, 4, 6
+    SBUTTERFLYPS 2, 5, 6
+    SBUTTERFLYPS 0, 4, 6
+    SBUTTERFLYPS 3, 2, 6
+    SBUTTERFLYPS 1, 5, 6
+    SWAP 1, 4
+    SWAP 2, 3
+
+    %7 m0,m1,x,x,m7,m6
+    %7 m2,m3,x,x,m7,m6
+    %7 m4,m5,x,x,m7,m6
+
+    mov %+ %3 %+ ps [dstq      ], m0
+    mov %+ %3 %+ ps [dstq+dst1q], m1
+    mov %+ %3 %+ ps [dstq+dst2q], m2
+    mov %+ %3 %+ ps [dstq+dst3q], m3
+    mov %+ %3 %+ ps [dstq+dst4q], m4
+    mov %+ %3 %+ ps [dstq+dst5q], m5
+
+    add      srcq, mmsize*6
+    add      dstq, mmsize
+    sub      lend, mmsize/4
+    jg .loop
+    REP_RET
+%endmacro
+
+%define PACK_8CH_GPRS (10 * ARCH_X86_64) + ((6 + HAVE_ALIGNED_STACK) * ARCH_X86_32)
+
+%macro PACK_8CH 8
+cglobal pack_8ch_%2_to_%1_%3, 2, PACK_8CH_GPRS, %6, ARCH_X86_32*48, dst, src, len, src1, src2, src3, src4, src5, src6, src7
+    mov     dstq, [dstq]
+%if ARCH_X86_32
+    DEFINE_ARGS dst, src, src2, src3, src4, src5, src6
+    %define lend dword r2m
+    %define src1q r0q
+    %define src1m dword [rsp+32]
+%if HAVE_ALIGNED_STACK == 0
+    DEFINE_ARGS dst, src, src2, src3, src5, src6
+    %define src4q r0q
+    %define src4m dword [rsp+36]
+%endif
+    %define src7q r0q
+    %define src7m dword [rsp+40]
+    mov     dstm, dstq
+%endif
+    mov    src7q, [srcq+7*gprsize]
+    mov    src6q, [srcq+6*gprsize]
+%if ARCH_X86_32
+    mov    src7m, src7q
+%endif
+    mov    src5q, [srcq+5*gprsize]
+    mov    src4q, [srcq+4*gprsize]
+    mov    src3q, [srcq+3*gprsize]
+%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
+    mov    src4m, src4q
+%endif
+    mov    src2q, [srcq+2*gprsize]
+    mov    src1q, [srcq+1*gprsize]
+    mov     srcq, [srcq]
+%ifidn %3, a
+%if ARCH_X86_32
+    test dstmp, mmsize-1
+%else
+    test dstq, mmsize-1
+%endif
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+    test srcq, mmsize-1
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+    test src1q, mmsize-1
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+    test src2q, mmsize-1
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+    test src3q, mmsize-1
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
+    test src4m, mmsize-1
+%else
+    test src4q, mmsize-1
+%endif
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+    test src5q, mmsize-1
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+    test src6q, mmsize-1
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+%if ARCH_X86_32
+    test src7m, mmsize-1
+%else
+    test src7q, mmsize-1
+%endif
+        jne pack_8ch_%2_to_%1_u_int %+ SUFFIX
+%else
+pack_8ch_%2_to_%1_u_int %+ SUFFIX:
+%endif
+    sub    src1q, srcq
+    sub    src2q, srcq
+    sub    src3q, srcq
+%if ARCH_X86_64 || HAVE_ALIGNED_STACK
+    sub    src4q, srcq
+%else
+    sub    src4m, srcq
+%endif
+    sub    src5q, srcq
+    sub    src6q, srcq
+%if ARCH_X86_64
+    sub    src7q, srcq
+%else
+    mov src1m, src1q
+    sub src7m, srcq
+%endif
+
+%if ARCH_X86_64
+    %8 x,x,x,x,m9,x
+%elifidn %1, int32
+    %define m9 [flt2p31]
+%else
+    %define m9 [flt2pm31]
+%endif
+
+.loop:
+    mov%3     m0, [srcq      ]
+    mov%3     m1, [srcq+src1q]
+    mov%3     m2, [srcq+src2q]
+%if ARCH_X86_32 && HAVE_ALIGNED_STACK == 0
+    mov    src4q, src4m
+%endif
+    mov%3     m3, [srcq+src3q]
+    mov%3     m4, [srcq+src4q]
+    mov%3     m5, [srcq+src5q]
+%if ARCH_X86_32
+    mov    src7q, src7m
+%endif
+    mov%3     m6, [srcq+src6q]
+    mov%3     m7, [srcq+src7q]
+
+%if ARCH_X86_64
+    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, 8
+
+    %7 m0,m1,x,x,m9,m8
+    %7 m2,m3,x,x,m9,m8
+    %7 m4,m5,x,x,m9,m8
+    %7 m6,m7,x,x,m9,m8
+
+    mov%3 [dstq], m0
+%else
+    mov     dstq, dstm
+
+    TRANSPOSE8x4D 0, 1, 2, 3, 4, 5, 6, 7, [rsp], [rsp+16], 1
+
+    %7 m0,m1,x,x,m9,m2
+    mova     m2, [rsp]
+    mov%3   [dstq], m0
+    %7 m2,m3,x,x,m9,m0
+    %7 m4,m5,x,x,m9,m0
+    %7 m6,m7,x,x,m9,m0
+
+%endif
+
+    mov%3 [dstq+16],  m1
+    mov%3 [dstq+32],  m2
+    mov%3 [dstq+48],  m3
+    mov%3 [dstq+64],  m4
+    mov%3 [dstq+80],  m5
+    mov%3 [dstq+96],  m6
+    mov%3 [dstq+112], m7
+
+    add      srcq, mmsize
+    add      dstq, mmsize*8
+%if ARCH_X86_32
+    mov      dstm, dstq
+    mov      src1q, src1m
+%endif
+    sub      lend, mmsize/4
+    jg .loop
+    REP_RET
+%endmacro
+
+%macro INT16_TO_INT32_N 6
+    pxor      m2, m2
+    pxor      m3, m3
+    punpcklwd m2, m1
+    punpckhwd m3, m1
+    SWAP 4,0
+    pxor      m0, m0
+    pxor      m1, m1
+    punpcklwd m0, m4
+    punpckhwd m1, m4
+%endmacro
+
+%macro INT32_TO_INT16_N 6
+    psrad     m0, 16
+    psrad     m1, 16
+    psrad     m2, 16
+    psrad     m3, 16
+    packssdw  m0, m1
+    packssdw  m2, m3
+    SWAP 1,2
+%endmacro
+
+%macro INT32_TO_FLOAT_INIT 6
+    mova      %5, [flt2pm31]
+%endmacro
+%macro INT32_TO_FLOAT_N 6
+    cvtdq2ps  %1, %1
+    cvtdq2ps  %2, %2
+    mulps %1, %1, %5
+    mulps %2, %2, %5
+%endmacro
+
+%macro FLOAT_TO_INT32_INIT 6
+    mova      %5, [flt2p31]
+%endmacro
+%macro FLOAT_TO_INT32_N 6
+    mulps %1, %5
+    mulps %2, %5
+    cvtps2dq  %6, %1
+    cmpps %1, %1, %5, 5
+    paddd %1, %6
+    cvtps2dq  %6, %2
+    cmpps %2, %2, %5, 5
+    paddd %2, %6
+%endmacro
+
+%macro INT16_TO_FLOAT_INIT 6
+    mova      m5, [flt2pm31]
+%endmacro
+%macro INT16_TO_FLOAT_N 6
+    INT16_TO_INT32_N %1,%2,%3,%4,%5,%6
+    cvtdq2ps  m0, m0
+    cvtdq2ps  m1, m1
+    cvtdq2ps  m2, m2
+    cvtdq2ps  m3, m3
+    mulps m0, m0, m5
+    mulps m1, m1, m5
+    mulps m2, m2, m5
+    mulps m3, m3, m5
+%endmacro
+
+%macro FLOAT_TO_INT16_INIT 6
+    mova      m5, [flt2p15]
+%endmacro
+%macro FLOAT_TO_INT16_N 6
+    mulps m0, m5
+    mulps m1, m5
+    mulps m2, m5
+    mulps m3, m5
+    cvtps2dq  m0, m0
+    cvtps2dq  m1, m1
+    packssdw  m0, m1
+    cvtps2dq  m1, m2
+    cvtps2dq  m3, m3
+    packssdw  m1, m3
+%endmacro
+
+%macro NOP_N 0-6
+%endmacro
+
+INIT_MMX mmx
+CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
+CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
+CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
+CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
+
+PACK_6CH float, float, u, 2, 2, 0, NOP_N, NOP_N
+PACK_6CH float, float, a, 2, 2, 0, NOP_N, NOP_N
+
+INIT_XMM sse
+PACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
+PACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
+
+UNPACK_6CH float, float, u, 2, 2, 7, NOP_N, NOP_N
+UNPACK_6CH float, float, a, 2, 2, 7, NOP_N, NOP_N
+
+INIT_XMM sse2
+CONV int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
+CONV int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
+CONV int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
+CONV int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
+
+PACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
+PACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
+PACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
+PACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
+PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
+PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
+PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
+PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
+
+UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
+UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
+UNPACK_2CH int32, int32, u, 2, 2, NOP_N, NOP_N
+UNPACK_2CH int32, int32, a, 2, 2, NOP_N, NOP_N
+UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
+UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
+UNPACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N, NOP_N
+UNPACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N, NOP_N
+
+CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
+CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
+
+PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
+PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
+
+UNPACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+UNPACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
+UNPACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
+
+PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+
+UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+
+PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
+PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
+
+PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+
+INIT_XMM ssse3
+UNPACK_2CH int16, int16, u, 1, 1, NOP_N, NOP_N
+UNPACK_2CH int16, int16, a, 1, 1, NOP_N, NOP_N
+UNPACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N, NOP_N
+UNPACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N, NOP_N
+UNPACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+UNPACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
+
+%if HAVE_AVX_EXTERNAL
+INIT_XMM avx
+PACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
+PACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
+
+UNPACK_6CH float, float, u, 2, 2, 8, NOP_N, NOP_N
+UNPACK_6CH float, float, a, 2, 2, 8, NOP_N, NOP_N
+
+PACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+
+UNPACK_6CH float, int32, u, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH float, int32, a, 2, 2, 8, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+UNPACK_6CH int32, float, u, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+UNPACK_6CH int32, float, a, 2, 2, 8, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+
+PACK_8CH float, float, u, 2, 2, 9, NOP_N, NOP_N
+PACK_8CH float, float, a, 2, 2, 9, NOP_N, NOP_N
+
+PACK_8CH float, int32, u, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH float, int32, a, 2, 2, 10, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+PACK_8CH int32, float, u, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+PACK_8CH int32, float, a, 2, 2, 10, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+
+INIT_YMM avx
+CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
+%endif
+
+%if HAVE_AVX2_EXTERNAL
+INIT_YMM avx2
+CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
+%endif
diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c
new file mode 100644
index 0000000000..bb89cf604b
--- /dev/null
+++ b/libswresample/x86/audio_convert_init.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/cpu.h"
+#include "libswresample/swresample_internal.h"
+#include "libswresample/audioconvert.h"
+
+#define PROTO(pre, in, out, cap) void ff ## pre ## in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
+#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
+#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
+#define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx) PROTO3(pre, avx2)
+PROTO4(_)
+PROTO4(_pack_2ch_)
+PROTO4(_pack_6ch_)
+PROTO4(_pack_8ch_)
+PROTO4(_unpack_2ch_)
+PROTO4(_unpack_6ch_)
+
+av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
+                                 enum AVSampleFormat out_fmt,
+                                 enum AVSampleFormat in_fmt,
+                                 int channels){
+    int mm_flags = av_get_cpu_flags();
+
+    ac->simd_f= NULL;
+
+//FIXME add memcpy case
+
+#define MULTI_CAPS_FUNC(flag, cap) \
+    if (EXTERNAL_##flag(mm_flags)) {\
+        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
+            ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
+        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
+            ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
+    }
+
+MULTI_CAPS_FUNC(MMX, mmx)
+MULTI_CAPS_FUNC(SSE2, sse2)
+
+    if(EXTERNAL_MMX(mm_flags)) {
+        if(channels == 6) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
+        }
+    }
+    if(EXTERNAL_SSE(mm_flags)) {
+        if(channels == 6) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_6ch_float_to_float_a_sse;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_6ch_float_to_float_a_sse;
+        }
+    }
+    if(EXTERNAL_SSE2(mm_flags)) {
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
+            ac->simd_f =  ff_int32_to_float_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
+            ac->simd_f =  ff_int16_to_float_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_int32_a_sse2;
+        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_int16_a_sse2;
+
+        if(channels == 2) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
+                ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
+                ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
+                ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
+                ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
+        }
+        if(channels == 6) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
+                ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse2;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_6ch_int32_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
+                ac->simd_f =  ff_unpack_6ch_float_to_int32_a_sse2;
+        }
+        if(channels == 8) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_8ch_float_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_8ch_int32_to_float_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
+                ac->simd_f =  ff_pack_8ch_float_to_int32_a_sse2;
+        }
+    }
+    if(EXTERNAL_SSSE3(mm_flags)) {
+        if(channels == 2) {
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
+        }
+    }
+    if(EXTERNAL_AVX_FAST(mm_flags)) {
+        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
+            ac->simd_f =  ff_int32_to_float_a_avx;
+    }
+    if(EXTERNAL_AVX(mm_flags)) {
+        if(channels == 6) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
+                ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_6ch_float_to_float_a_avx;
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_6ch_int32_to_float_a_avx;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
+                ac->simd_f =  ff_unpack_6ch_float_to_int32_a_avx;
+        }
+        if(channels == 8) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_8ch_float_to_float_a_avx;
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_8ch_int32_to_float_a_avx;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
+                ac->simd_f =  ff_pack_8ch_float_to_int32_a_avx;
+        }
+    }
+    if(EXTERNAL_AVX2_FAST(mm_flags)) {
+        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
+            ac->simd_f =  ff_float_to_int32_a_avx2;
+    }
+}
diff --git a/libswresample/x86/rematrix.asm b/libswresample/x86/rematrix.asm
new file mode 100644
index 0000000000..7984b9a729
--- /dev/null
+++ b/libswresample/x86/rematrix.asm
@@ -0,0 +1,250 @@
+;******************************************************************************
+;* Copyright (c) 2012 Michael Niedermayer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+
+SECTION_RODATA 32
+dw1: times 8  dd 1
+w1 : times 16 dw 1
+
+SECTION .text
+
+%macro MIX2_FLT 1
+cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
+%ifidn %1, a
+    test in1q, mmsize-1
+        jne mix_2_1_float_u_int %+ SUFFIX
+    test in2q, mmsize-1
+        jne mix_2_1_float_u_int %+ SUFFIX
+    test outq, mmsize-1
+        jne mix_2_1_float_u_int %+ SUFFIX
+%else
+mix_2_1_float_u_int %+ SUFFIX:
+%endif
+    VBROADCASTSS m4, [coeffpq + 4*index1q]
+    VBROADCASTSS m5, [coeffpq + 4*index2q]
+    shl lend    , 2
+    add in1q    , lenq
+    add in2q    , lenq
+    add outq    , lenq
+    neg lenq
+.next:
+%ifidn %1, a
+    mulps        m0, m4, [in1q + lenq         ]
+    mulps        m1, m5, [in2q + lenq         ]
+    mulps        m2, m4, [in1q + lenq + mmsize]
+    mulps        m3, m5, [in2q + lenq + mmsize]
+%else
+    movu         m0, [in1q + lenq         ]
+    movu         m1, [in2q + lenq         ]
+    movu         m2, [in1q + lenq + mmsize]
+    movu         m3, [in2q + lenq + mmsize]
+    mulps        m0, m0, m4
+    mulps        m1, m1, m5
+    mulps        m2, m2, m4
+    mulps        m3, m3, m5
+%endif
+    addps        m0, m0, m1
+    addps        m2, m2, m3
+    mov%1  [outq + lenq         ], m0
+    mov%1  [outq + lenq + mmsize], m2
+    add        lenq, mmsize*2
+        jl .next
+    REP_RET
+%endmacro
+
+%macro MIX1_FLT 1
+cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
+%ifidn %1, a
+    test inq, mmsize-1
+        jne mix_1_1_float_u_int %+ SUFFIX
+    test outq, mmsize-1
+        jne mix_1_1_float_u_int %+ SUFFIX
+%else
+mix_1_1_float_u_int %+ SUFFIX:
+%endif
+    VBROADCASTSS m2, [coeffpq + 4*indexq]
+    shl lenq    , 2
+    add inq     , lenq
+    add outq    , lenq
+    neg lenq
+.next:
+%ifidn %1, a
+    mulps        m0, m2, [inq + lenq         ]
+    mulps        m1, m2, [inq + lenq + mmsize]
+%else
+    movu         m0, [inq + lenq         ]
+    movu         m1, [inq + lenq + mmsize]
+    mulps        m0, m0, m2
+    mulps        m1, m1, m2
+%endif
+    mov%1  [outq + lenq         ], m0
+    mov%1  [outq + lenq + mmsize], m1
+    add        lenq, mmsize*2
+        jl .next
+    REP_RET
+%endmacro
+
+%macro MIX1_INT16 1
+cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
+%ifidn %1, a
+    test inq, mmsize-1
+        jne mix_1_1_int16_u_int %+ SUFFIX
+    test outq, mmsize-1
+        jne mix_1_1_int16_u_int %+ SUFFIX
+%else
+mix_1_1_int16_u_int %+ SUFFIX:
+%endif
+    movd   m4, [coeffpq + 4*indexq]
+    SPLATW m5, m4
+    psllq  m4, 32
+    psrlq  m4, 48
+    mova   m0, [w1]
+    psllw  m0, m4
+    psrlw  m0, 1
+    punpcklwd m5, m0
+    add lenq    , lenq
+    add inq     , lenq
+    add outq    , lenq
+    neg lenq
+.next:
+    mov%1        m0, [inq + lenq         ]
+    mov%1        m2, [inq + lenq + mmsize]
+    mova         m1, m0
+    mova         m3, m2
+    punpcklwd    m0, [w1]
+    punpckhwd    m1, [w1]
+    punpcklwd    m2, [w1]
+    punpckhwd    m3, [w1]
+    pmaddwd      m0, m5
+    pmaddwd      m1, m5
+    pmaddwd      m2, m5
+    pmaddwd      m3, m5
+    psrad        m0, m4
+    psrad        m1, m4
+    psrad        m2, m4
+    psrad        m3, m4
+    packssdw     m0, m1
+    packssdw     m2, m3
+    mov%1  [outq + lenq         ], m0
+    mov%1  [outq + lenq + mmsize], m2
+    add        lenq, mmsize*2
+        jl .next
+%if mmsize == 8
+    emms
+    RET
+%else
+    REP_RET
+%endif
+%endmacro
+
+%macro MIX2_INT16 1
+cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
+%ifidn %1, a
+    test in1q, mmsize-1
+        jne mix_2_1_int16_u_int %+ SUFFIX
+    test in2q, mmsize-1
+        jne mix_2_1_int16_u_int %+ SUFFIX
+    test outq, mmsize-1
+        jne mix_2_1_int16_u_int %+ SUFFIX
+%else
+mix_2_1_int16_u_int %+ SUFFIX:
+%endif
+    movd   m4, [coeffpq + 4*index1q]
+    movd   m6, [coeffpq + 4*index2q]
+    SPLATW m5, m4
+    SPLATW m6, m6
+    psllq  m4, 32
+    psrlq  m4, 48
+    mova   m7, [dw1]
+    pslld  m7, m4
+    psrld  m7, 1
+    punpcklwd m5, m6
+    add lend    , lend
+    add in1q    , lenq
+    add in2q    , lenq
+    add outq    , lenq
+    neg lenq
+.next:
+    mov%1        m0, [in1q + lenq         ]
+    mov%1        m2, [in2q + lenq         ]
+    mova         m1, m0
+    punpcklwd    m0, m2
+    punpckhwd    m1, m2
+
+    mov%1        m2, [in1q + lenq + mmsize]
+    mov%1        m6, [in2q + lenq + mmsize]
+    mova         m3, m2
+    punpcklwd    m2, m6
+    punpckhwd    m3, m6
+
+    pmaddwd      m0, m5
+    pmaddwd      m1, m5
+    pmaddwd      m2, m5
+    pmaddwd      m3, m5
+    paddd        m0, m7
+    paddd        m1, m7
+    paddd        m2, m7
+    paddd        m3, m7
+    psrad        m0, m4
+    psrad        m1, m4
+    psrad        m2, m4
+    psrad        m3, m4
+    packssdw     m0, m1
+    packssdw     m2, m3
+    mov%1  [outq + lenq         ], m0
+    mov%1  [outq + lenq + mmsize], m2
+    add        lenq, mmsize*2
+        jl .next
+%if mmsize == 8
+    emms
+    RET
+%else
+    REP_RET
+%endif
+%endmacro
+
+
+INIT_MMX mmx
+MIX1_INT16 u
+MIX1_INT16 a
+MIX2_INT16 u
+MIX2_INT16 a
+
+INIT_XMM sse
+MIX2_FLT u
+MIX2_FLT a
+MIX1_FLT u
+MIX1_FLT a
+
+INIT_XMM sse2
+MIX1_INT16 u
+MIX1_INT16 a
+MIX2_INT16 u
+MIX2_INT16 a
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+MIX2_FLT u
+MIX2_FLT a
+MIX1_FLT u
+MIX1_FLT a
+%endif
diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c
new file mode 100644
index 0000000000..5f2c5fe170
--- /dev/null
+++ b/libswresample/x86/rematrix_init.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at)
+ *
+ * This file is part of libswresample
+ *
+ * libswresample is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libswresample is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libswresample; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/cpu.h"
+#include "libswresample/swresample_internal.h"
+
+#define D(type, simd) \
+mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
+mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
+
+D(float, sse)
+D(float, avx)
+D(int16, mmx)
+D(int16, sse2)
+
+av_cold int swri_rematrix_init_x86(struct SwrContext *s){
+#if HAVE_YASM
+    int mm_flags = av_get_cpu_flags();
+    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
+    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
+    int num    = nb_in * nb_out;
+    int i,j;
+
+    s->mix_1_1_simd = NULL;
+    s->mix_2_1_simd = NULL;
+
+    if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
+        if(EXTERNAL_MMX(mm_flags)) {
+            s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
+            s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
+        }
+        if(EXTERNAL_SSE2(mm_flags)) {
+            s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
+            s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
+        }
+        s->native_simd_matrix = av_mallocz_array(num,  2 * sizeof(int16_t));
+        s->native_simd_one    = av_mallocz(2 * sizeof(int16_t));
+        if (!s->native_simd_matrix || !s->native_simd_one)
+            return AVERROR(ENOMEM);
+
+        for(i=0; i<nb_out; i++){
+            int sh = 0;
+            for(j=0; j<nb_in; j++)
+                sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
+            sh = FFMAX(av_log2(sh) - 14, 0);
+            for(j=0; j<nb_in; j++) {
+                ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
+                ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
+                    ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
+            }
+        }
+        ((int16_t*)s->native_simd_one)[1] = 14;
+        ((int16_t*)s->native_simd_one)[0] = 16384;
+    } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
+        if(EXTERNAL_SSE(mm_flags)) {
+            s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
+            s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
+        }
+        if(EXTERNAL_AVX_FAST(mm_flags)) {
+            s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
+            s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
+        }
+        s->native_simd_matrix = av_mallocz_array(num, sizeof(float));
+        s->native_simd_one = av_mallocz(sizeof(float));
+        if (!s->native_simd_matrix || !s->native_simd_one)
+            return AVERROR(ENOMEM);
+        memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
+        memcpy(s->native_simd_one, s->native_one, sizeof(float));
+    }
+#endif
+
+    return 0;
+}
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
new file mode 100644
index 0000000000..4163df1aa1
--- /dev/null
+++ b/libswresample/x86/resample.asm
@@ -0,0 +1,610 @@
+;******************************************************************************
+;* Copyright (c) 2012 Michael Niedermayer
+;* Copyright (c) 2014 James Almer <jamrial <at> gmail.com>
+;* Copyright (c) 2014 Ronald S. Bultje <rsbultje@gmail.com>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+%if ARCH_X86_64
+%define pointer resq
+%else
+%define pointer resd
+%endif
+
+struc ResampleContext
+    .av_class:              pointer 1
+    .filter_bank:           pointer 1
+    .filter_length:         resd 1
+    .filter_alloc:          resd 1
+    .ideal_dst_incr:        resd 1
+    .dst_incr:              resd 1
+    .dst_incr_div:          resd 1
+    .dst_incr_mod:          resd 1
+    .index:                 resd 1
+    .frac:                  resd 1
+    .src_incr:              resd 1
+    .compensation_distance: resd 1
+    .phase_count:           resd 1
+
+    ; there's a few more here but we only care about the first few
+endstruc
+
+SECTION_RODATA
+
+pf_1:      dd 1.0
+pdbl_1:    dq 1.0
+pd_0x4000: dd 0x4000
+
+SECTION .text
+
+; FIXME remove unneeded variables (index_incr, phase_mask)
+%macro RESAMPLE_FNS 3-5 ; format [float or int16], bps, log2_bps, float op suffix [s or d], 1.0 constant
+; int resample_common_$format(ResampleContext *ctx, $format *dst,
+;                             const $format *src, int size, int update_ctx)
+%if ARCH_X86_64 ; unix64 and win64
+cglobal resample_common_%1, 0, 15, 2, ctx, dst, src, phase_count, index, frac, \
+                                      dst_incr_mod, size, min_filter_count_x4, \
+                                      min_filter_len_x4, dst_incr_div, src_incr, \
+                                      phase_mask, dst_end, filter_bank
+
+    ; use red-zone for variable storage
+%define ctx_stackq            [rsp-0x8]
+%define src_stackq            [rsp-0x10]
+%if WIN64
+%define update_context_stackd r4m
+%else ; unix64
+%define update_context_stackd [rsp-0x14]
+%endif
+
+    ; load as many variables in registers as possible; for the rest, store
+    ; on stack so that we have 'ctx' available as one extra register
+    mov                        sized, r3d
+%if UNIX64
+    mov        update_context_stackd, r4d
+%endif
+    mov                       indexd, [ctxq+ResampleContext.index]
+    mov                        fracd, [ctxq+ResampleContext.frac]
+    mov                dst_incr_modd, [ctxq+ResampleContext.dst_incr_mod]
+    mov                 filter_bankq, [ctxq+ResampleContext.filter_bank]
+    mov                    src_incrd, [ctxq+ResampleContext.src_incr]
+    mov                   ctx_stackq, ctxq
+    mov           min_filter_len_x4d, [ctxq+ResampleContext.filter_length]
+    mov                dst_incr_divd, [ctxq+ResampleContext.dst_incr_div]
+    shl           min_filter_len_x4d, %3
+    lea                     dst_endq, [dstq+sizeq*%2]
+
+%if UNIX64
+    mov                          ecx, [ctxq+ResampleContext.phase_count]
+    mov                          edi, [ctxq+ResampleContext.filter_alloc]
+
+    DEFINE_ARGS filter_alloc, dst, src, phase_count, index, frac, dst_incr_mod, \
+                filter, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \
+                src_incr, phase_mask, dst_end, filter_bank
+%elif WIN64
+    mov                          R9d, [ctxq+ResampleContext.filter_alloc]
+    mov                          ecx, [ctxq+ResampleContext.phase_count]
+
+    DEFINE_ARGS phase_count, dst, src, filter_alloc, index, frac, dst_incr_mod, \
+                filter, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \
+                src_incr, phase_mask, dst_end, filter_bank
+%endif
+
+    neg           min_filter_len_x4q
+    sub                 filter_bankq, min_filter_len_x4q
+    sub                         srcq, min_filter_len_x4q
+    mov                   src_stackq, srcq
+%else ; x86-32
+cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \
+                                     index, min_filter_length_x4, filter_bank
+
+    ; push temp variables to stack
+%define ctx_stackq            r0mp
+%define src_stackq            r2mp
+%define update_context_stackd r4m
+
+    mov                         dstq, r1mp
+    mov                           r3, r3mp
+    lea                           r3, [dstq+r3*%2]
+    PUSH                              dword [ctxq+ResampleContext.dst_incr_div]
+    PUSH                              dword [ctxq+ResampleContext.dst_incr_mod]
+    PUSH                              dword [ctxq+ResampleContext.filter_alloc]
+    PUSH                              r3
+    PUSH                              dword [ctxq+ResampleContext.phase_count]  ; unneeded replacement for phase_mask
+    PUSH                              dword [ctxq+ResampleContext.src_incr]
+    mov        min_filter_length_x4d, [ctxq+ResampleContext.filter_length]
+    mov                       indexd, [ctxq+ResampleContext.index]
+    shl        min_filter_length_x4d, %3
+    mov                        fracd, [ctxq+ResampleContext.frac]
+    neg        min_filter_length_x4q
+    mov                 filter_bankq, [ctxq+ResampleContext.filter_bank]
+    sub                         r2mp, min_filter_length_x4q
+    sub                 filter_bankq, min_filter_length_x4q
+    PUSH                              min_filter_length_x4q
+    PUSH                              filter_bankq
+    mov                 phase_countd, [ctxq+ResampleContext.phase_count]
+
+    DEFINE_ARGS src, phase_count, dst, frac, index, min_filter_count_x4, filter
+
+%define filter_bankq          dword [rsp+0x0]
+%define min_filter_length_x4q dword [rsp+0x4]
+%define src_incrd             dword [rsp+0x8]
+%define phase_maskd           dword [rsp+0xc]
+%define dst_endq              dword [rsp+0x10]
+%define filter_allocd         dword [rsp+0x14]
+%define dst_incr_modd         dword [rsp+0x18]
+%define dst_incr_divd         dword [rsp+0x1c]
+
+    mov                         srcq, r2mp
+%endif
+
+.loop:
+    mov                      filterd, filter_allocd
+    imul                     filterd, indexd
+%if ARCH_X86_64
+    mov         min_filter_count_x4q, min_filter_len_x4q
+    lea                      filterq, [filter_bankq+filterq*%2]
+%else ; x86-32
+    mov         min_filter_count_x4q, filter_bankq
+    lea                      filterq, [min_filter_count_x4q+filterq*%2]
+    mov         min_filter_count_x4q, min_filter_length_x4q
+%endif
+%ifidn %1, int16
+    movd                          m0, [pd_0x4000]
+%else ; float/double
+    xorps                         m0, m0, m0
+%endif
+
+    align 16
+.inner_loop:
+    movu                          m1, [srcq+min_filter_count_x4q*1]
+%ifidn %1, int16
+%if cpuflag(xop)
+    vpmadcswd                     m0, m1, [filterq+min_filter_count_x4q*1], m0
+%else
+    pmaddwd                       m1, [filterq+min_filter_count_x4q*1]
+    paddd                         m0, m1
+%endif
+%else ; float/double
+%if cpuflag(fma4) || cpuflag(fma3)
+    fmaddp%4                      m0, m1, [filterq+min_filter_count_x4q*1], m0
+%else
+    mulp%4                        m1, m1, [filterq+min_filter_count_x4q*1]
+    addp%4                        m0, m0, m1
+%endif ; cpuflag
+%endif
+    add         min_filter_count_x4q, mmsize
+    js .inner_loop
+
+%ifidn %1, int16
+    HADDD                         m0, m1
+    psrad                         m0, 15
+    add                        fracd, dst_incr_modd
+    packssdw                      m0, m0
+    add                       indexd, dst_incr_divd
+    movd                      [dstq], m0
+%else ; float/double
+    ; horizontal sum & store
+%if mmsize == 32
+    vextractf128                 xm1, m0, 0x1
+    addps                        xm0, xm1
+%endif
+    movhlps                      xm1, xm0
+%ifidn %1, float
+    addps                        xm0, xm1
+    shufps                       xm1, xm0, xm0, q0001
+%endif
+    add                        fracd, dst_incr_modd
+    addp%4                       xm0, xm1
+    add                       indexd, dst_incr_divd
+    movs%4                    [dstq], xm0
+%endif
+    cmp                        fracd, src_incrd
+    jl .skip
+    sub                        fracd, src_incrd
+    inc                       indexd
+
+%if UNIX64
+    DEFINE_ARGS filter_alloc, dst, src, phase_count, index, frac, dst_incr_mod, \
+                index_incr, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \
+                src_incr, phase_mask, dst_end, filter_bank
+%elif WIN64
+    DEFINE_ARGS phase_count, dst, src, filter_alloc, index, frac, dst_incr_mod, \
+                index_incr, min_filter_count_x4, min_filter_len_x4, dst_incr_div, \
+                src_incr, phase_mask, dst_end, filter_bank
+%else ; x86-32
+    DEFINE_ARGS src, phase_count, dst, frac, index, index_incr
+%endif
+
+.skip:
+    add                         dstq, %2
+    cmp                       indexd, phase_countd
+    jb .index_skip
+.index_while:
+    sub                       indexd, phase_countd
+    lea                         srcq, [srcq+%2]
+    cmp                       indexd, phase_countd
+    jnb .index_while
+.index_skip:
+    cmp                         dstq, dst_endq
+    jne .loop
+
+%if ARCH_X86_64
+    DEFINE_ARGS ctx, dst, src, phase_count, index, frac
+%else ; x86-32
+    DEFINE_ARGS src, ctx, update_context, frac, index
+%endif
+
+    cmp  dword update_context_stackd, 0
+    jz .skip_store
+    ; strictly speaking, the function should always return the consumed
+    ; number of bytes; however, we only use the value if update_context
+    ; is true, so let's just leave it uninitialized otherwise
+    mov                         ctxq, ctx_stackq
+    movifnidn                    rax, srcq
+    mov [ctxq+ResampleContext.frac ], fracd
+    sub                          rax, src_stackq
+    mov [ctxq+ResampleContext.index], indexd
+    shr                          rax, %3
+
+.skip_store:
+%if ARCH_X86_32
+    ADD                          rsp, 0x20
+%endif
+    RET
+
+; int resample_linear_$format(ResampleContext *ctx, float *dst,
+;                             const float *src, int size, int update_ctx)
+%if ARCH_X86_64 ; unix64 and win64
+%if UNIX64
+cglobal resample_linear_%1, 0, 15, 5, ctx, dst, phase_mask, phase_count, index, frac, \
+                                      size, dst_incr_mod, min_filter_count_x4, \
+                                      min_filter_len_x4, dst_incr_div, src_incr, \
+                                      src, dst_end, filter_bank
+
+    mov                         srcq, r2mp
+%else ; win64
+cglobal resample_linear_%1, 0, 15, 5, ctx, phase_mask, src, phase_count, index, frac, \
+                                      size, dst_incr_mod, min_filter_count_x4, \
+                                      min_filter_len_x4, dst_incr_div, src_incr, \
+                                      dst, dst_end, filter_bank
+
+    mov                         dstq, r1mp
+%endif
+
+    ; use red-zone for variable storage
+%define ctx_stackq            [rsp-0x8]
+%define src_stackq            [rsp-0x10]
+%define phase_mask_stackd     [rsp-0x14]
+%if WIN64
+%define update_context_stackd r4m
+%else ; unix64
+%define update_context_stackd [rsp-0x18]
+%endif
+
+    ; load as many variables in registers as possible; for the rest, store
+    ; on stack so that we have 'ctx' available as one extra register
+    mov                        sized, r3d
+%if UNIX64
+    mov        update_context_stackd, r4d
+%endif
+    mov                       indexd, [ctxq+ResampleContext.index]
+    mov                        fracd, [ctxq+ResampleContext.frac]
+    mov                dst_incr_modd, [ctxq+ResampleContext.dst_incr_mod]
+    mov                 filter_bankq, [ctxq+ResampleContext.filter_bank]
+    mov                    src_incrd, [ctxq+ResampleContext.src_incr]
+    mov                   ctx_stackq, ctxq
+    mov           min_filter_len_x4d, [ctxq+ResampleContext.filter_length]
+%ifidn %1, int16
+    movd                          m4, [pd_0x4000]
+%else ; float/double
+    cvtsi2s%4                    xm0, src_incrd
+    movs%4                       xm4, [%5]
+    divs%4                       xm4, xm0
+%endif
+    mov                dst_incr_divd, [ctxq+ResampleContext.dst_incr_div]
+    shl           min_filter_len_x4d, %3
+    lea                     dst_endq, [dstq+sizeq*%2]
+
+%if UNIX64
+    mov                          ecx, [ctxq+ResampleContext.phase_count]
+    mov                          edi, [ctxq+ResampleContext.filter_alloc]
+
+    DEFINE_ARGS filter_alloc, dst, filter2, phase_count, index, frac, filter1, \
+                dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \
+                dst_incr_div, src_incr, src, dst_end, filter_bank
+%elif WIN64
+    mov                          R9d, [ctxq+ResampleContext.filter_alloc]
+    mov                          ecx, [ctxq+ResampleContext.phase_count]
+
+    DEFINE_ARGS phase_count, filter2, src, filter_alloc, index, frac, filter1, \
+                dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \
+                dst_incr_div, src_incr, dst, dst_end, filter_bank
+%endif
+
+    neg           min_filter_len_x4q
+    sub                 filter_bankq, min_filter_len_x4q
+    sub                         srcq, min_filter_len_x4q
+    mov                   src_stackq, srcq
+%else ; x86-32
+cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
+                                     frac, index, dst, filter_bank
+
+    ; push temp variables to stack
+%define ctx_stackq            r0mp
+%define src_stackq            r2mp
+%define update_context_stackd r4m
+
+    mov                         dstq, r1mp
+    mov                           r3, r3mp
+    lea                           r3, [dstq+r3*%2]
+    PUSH                              dword [ctxq+ResampleContext.dst_incr_div]
+    PUSH                              r3
+    mov                           r3, dword [ctxq+ResampleContext.filter_alloc]
+    PUSH                              dword [ctxq+ResampleContext.dst_incr_mod]
+    PUSH                              r3
+    shl                           r3, %3
+    PUSH                              r3
+    mov                           r3, dword [ctxq+ResampleContext.src_incr]
+    PUSH                              dword [ctxq+ResampleContext.phase_count]  ; unneeded replacement of phase_mask
+    PUSH                              r3d
+%ifidn %1, int16
+    movd                          m4, [pd_0x4000]
+%else ; float/double
+    cvtsi2s%4                    xm0, r3d
+    movs%4                       xm4, [%5]
+    divs%4                       xm4, xm0
+%endif
+    mov        min_filter_length_x4d, [ctxq+ResampleContext.filter_length]
+    mov                       indexd, [ctxq+ResampleContext.index]
+    shl        min_filter_length_x4d, %3
+    mov                        fracd, [ctxq+ResampleContext.frac]
+    neg        min_filter_length_x4q
+    mov                 filter_bankq, [ctxq+ResampleContext.filter_bank]
+    sub                         r2mp, min_filter_length_x4q
+    sub                 filter_bankq, min_filter_length_x4q
+    PUSH                              min_filter_length_x4q
+    PUSH                              filter_bankq
+    PUSH                              dword [ctxq+ResampleContext.phase_count]
+
+    DEFINE_ARGS filter1, min_filter_count_x4, filter2, frac, index, dst, src
+
+%define phase_count_stackd    dword [rsp+0x0]
+%define filter_bankq          dword [rsp+0x4]
+%define min_filter_length_x4q dword [rsp+0x8]
+%define src_incrd             dword [rsp+0xc]
+%define phase_mask_stackd     dword [rsp+0x10]
+%define filter_alloc_x4q      dword [rsp+0x14]
+%define filter_allocd         dword [rsp+0x18]
+%define dst_incr_modd         dword [rsp+0x1c]
+%define dst_endq              dword [rsp+0x20]
+%define dst_incr_divd         dword [rsp+0x24]
+
+    mov                         srcq, r2mp
+%endif
+
+.loop:
+    mov                     filter1d, filter_allocd
+    imul                    filter1d, indexd
+%if ARCH_X86_64
+    mov         min_filter_count_x4q, min_filter_len_x4q
+    lea                     filter1q, [filter_bankq+filter1q*%2]
+    lea                     filter2q, [filter1q+filter_allocq*%2]
+%else ; x86-32
+    mov         min_filter_count_x4q, filter_bankq
+    lea                     filter1q, [min_filter_count_x4q+filter1q*%2]
+    mov         min_filter_count_x4q, min_filter_length_x4q
+    mov                     filter2q, filter1q
+    add                     filter2q, filter_alloc_x4q
+%endif
+%ifidn %1, int16
+    mova                          m0, m4
+    mova                          m2, m4
+%else ; float/double
+    xorps                         m0, m0, m0
+    xorps                         m2, m2, m2
+%endif
+
+    align 16
+.inner_loop:
+    movu                          m1, [srcq+min_filter_count_x4q*1]
+%ifidn %1, int16
+%if cpuflag(xop)
+    vpmadcswd                     m2, m1, [filter2q+min_filter_count_x4q*1], m2
+    vpmadcswd                     m0, m1, [filter1q+min_filter_count_x4q*1], m0
+%else
+    pmaddwd                       m3, m1, [filter2q+min_filter_count_x4q*1]
+    pmaddwd                       m1, [filter1q+min_filter_count_x4q*1]
+    paddd                         m2, m3
+    paddd                         m0, m1
+%endif ; cpuflag
+%else ; float/double
+%if cpuflag(fma4) || cpuflag(fma3)
+    fmaddp%4                      m2, m1, [filter2q+min_filter_count_x4q*1], m2
+    fmaddp%4                      m0, m1, [filter1q+min_filter_count_x4q*1], m0
+%else
+    mulp%4                        m3, m1, [filter2q+min_filter_count_x4q*1]
+    mulp%4                        m1, m1, [filter1q+min_filter_count_x4q*1]
+    addp%4                        m2, m2, m3
+    addp%4                        m0, m0, m1
+%endif ; cpuflag
+%endif
+    add         min_filter_count_x4q, mmsize
+    js .inner_loop
+
+%ifidn %1, int16
+%if mmsize == 16
+%if cpuflag(xop)
+    vphadddq                      m2, m2
+    vphadddq                      m0, m0
+%endif
+    pshufd                        m3, m2, q0032
+    pshufd                        m1, m0, q0032
+    paddd                         m2, m3
+    paddd                         m0, m1
+%endif
+%if notcpuflag(xop)
+    PSHUFLW                       m3, m2, q0032
+    PSHUFLW                       m1, m0, q0032
+    paddd                         m2, m3
+    paddd                         m0, m1
+%endif
+    psubd                         m2, m0
+    ; This is probably a really bad idea on atom and other machines with a
+    ; long transfer latency between GPRs and XMMs (atom). However, it does
+    ; make the clip a lot simpler...
+    movd                         eax, m2
+    add                       indexd, dst_incr_divd
+    imul                              fracd
+    idiv                              src_incrd
+    movd                          m1, eax
+    add                        fracd, dst_incr_modd
+    paddd                         m0, m1
+    psrad                         m0, 15
+    packssdw                      m0, m0
+    movd                      [dstq], m0
+
+    ; note that for imul/idiv, I need to move filter to edx/eax for each:
+    ; - 32bit: eax=r0[filter1], edx=r2[filter2]
+    ; - win64: eax=r6[filter1], edx=r1[todo]
+    ; - unix64: eax=r6[filter1], edx=r2[todo]
+%else ; float/double
+    ; val += (v2 - val) * (FELEML) frac / c->src_incr;
+%if mmsize == 32
+    vextractf128                 xm1, m0, 0x1
+    vextractf128                 xm3, m2, 0x1
+    addps                        xm0, xm1
+    addps                        xm2, xm3
+%endif
+    cvtsi2s%4                    xm1, fracd
+    subp%4                       xm2, xm0
+    mulp%4                       xm1, xm4
+    shufp%4                      xm1, xm1, q0000
+%if cpuflag(fma4) || cpuflag(fma3)
+    fmaddp%4                     xm0, xm2, xm1, xm0
+%else
+    mulp%4                       xm2, xm1
+    addp%4                       xm0, xm2
+%endif ; cpuflag
+
+    ; horizontal sum & store
+    movhlps                      xm1, xm0
+%ifidn %1, float
+    addps                        xm0, xm1
+    shufps                       xm1, xm0, xm0, q0001
+%endif
+    add                        fracd, dst_incr_modd
+    addp%4                       xm0, xm1
+    add                       indexd, dst_incr_divd
+    movs%4                    [dstq], xm0
+%endif
+    cmp                        fracd, src_incrd
+    jl .skip
+    sub                        fracd, src_incrd
+    inc                       indexd
+
+%if UNIX64
+    DEFINE_ARGS filter_alloc, dst, filter2, phase_count, index, frac, index_incr, \
+                dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \
+                dst_incr_div, src_incr, src, dst_end, filter_bank
+%elif WIN64
+    DEFINE_ARGS phase_count, filter2, src, filter_alloc, index, frac, index_incr, \
+                dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \
+                dst_incr_div, src_incr, dst, dst_end, filter_bank
+%else ; x86-32
+    DEFINE_ARGS filter1, phase_count, index_incr, frac, index, dst, src
+%endif
+
+.skip:
+%if ARCH_X86_32
+    mov                 phase_countd, phase_count_stackd
+%endif
+    add                         dstq, %2
+    cmp                       indexd, phase_countd
+    jb .index_skip
+.index_while:
+    sub                       indexd, phase_countd
+    lea                         srcq, [srcq+%2]
+    cmp                       indexd, phase_countd
+    jnb .index_while
+.index_skip:
+    cmp                         dstq, dst_endq
+    jne .loop
+
+%if UNIX64
+    DEFINE_ARGS ctx, dst, filter2, phase_count, index, frac, index_incr, \
+                dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \
+                dst_incr_div, src_incr, src, dst_end, filter_bank
+%elif WIN64
+    DEFINE_ARGS ctx, filter2, src, phase_count, index, frac, index_incr, \
+                dst_incr_mod, min_filter_count_x4, min_filter_len_x4, \
+                dst_incr_div, src_incr, dst, dst_end, filter_bank
+%else ; x86-32
+    DEFINE_ARGS filter1, ctx, update_context, frac, index, dst, src
+%endif
+
+    cmp  dword update_context_stackd, 0
+    jz .skip_store
+    ; strictly speaking, the function should always return the consumed
+    ; number of bytes; however, we only use the value if update_context
+    ; is true, so let's just leave it uninitialized otherwise
+    mov                         ctxq, ctx_stackq
+    movifnidn                    rax, srcq
+    mov [ctxq+ResampleContext.frac ], fracd
+    sub                          rax, src_stackq
+    mov [ctxq+ResampleContext.index], indexd
+    shr                          rax, %3
+
+.skip_store:
+%if ARCH_X86_32
+    ADD                          rsp, 0x28
+%endif
+    RET
+%endmacro
+
+INIT_XMM sse
+RESAMPLE_FNS float, 4, 2, s, pf_1
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+RESAMPLE_FNS float, 4, 2, s, pf_1
+%endif
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+RESAMPLE_FNS float, 4, 2, s, pf_1
+%endif
+%if HAVE_FMA4_EXTERNAL
+INIT_XMM fma4
+RESAMPLE_FNS float, 4, 2, s, pf_1
+%endif
+
+%if ARCH_X86_32
+INIT_MMX mmxext
+RESAMPLE_FNS int16, 2, 1
+%endif
+
+INIT_XMM sse2
+RESAMPLE_FNS int16, 2, 1
+%if HAVE_XOP_EXTERNAL
+INIT_XMM xop
+RESAMPLE_FNS int16, 2, 1
+%endif
+
+INIT_XMM sse2
+RESAMPLE_FNS double, 8, 3, d, pdbl_1
diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
new file mode 100644
index 0000000000..9d7d5cf89e
--- /dev/null
+++ b/libswresample/x86/resample_init.c
@@ -0,0 +1,90 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004-2012 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "libavutil/x86/cpu.h"
+#include "libswresample/resample.h"
+
+#define RESAMPLE_FUNCS(type, opt) \
+int ff_resample_common_##type##_##opt(ResampleContext *c, void *dst, \
+                                      const void *src, int sz, int upd); \
+int ff_resample_linear_##type##_##opt(ResampleContext *c, void *dst, \
+                                      const void *src, int sz, int upd)
+
+RESAMPLE_FUNCS(int16,  mmxext);
+RESAMPLE_FUNCS(int16,  sse2);
+RESAMPLE_FUNCS(int16,  xop);
+RESAMPLE_FUNCS(float,  sse);
+RESAMPLE_FUNCS(float,  avx);
+RESAMPLE_FUNCS(float,  fma3);
+RESAMPLE_FUNCS(float,  fma4);
+RESAMPLE_FUNCS(double, sse2);
+
+av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
+{
+    int av_unused mm_flags = av_get_cpu_flags();
+
+    switch(c->format){
+    case AV_SAMPLE_FMT_S16P:
+        if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext
+                                        : ff_resample_common_int16_mmxext;
+        }
+        if (EXTERNAL_SSE2(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2
+                                        : ff_resample_common_int16_sse2;
+        }
+        if (EXTERNAL_XOP(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_int16_xop
+                                        : ff_resample_common_int16_xop;
+        }
+        break;
+    case AV_SAMPLE_FMT_FLTP:
+        if (EXTERNAL_SSE(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_float_sse
+                                        : ff_resample_common_float_sse;
+        }
+        if (EXTERNAL_AVX_FAST(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_float_avx
+                                        : ff_resample_common_float_avx;
+        }
+        if (EXTERNAL_FMA3_FAST(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_float_fma3
+                                        : ff_resample_common_float_fma3;
+        }
+        if (EXTERNAL_FMA4(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_float_fma4
+                                        : ff_resample_common_float_fma4;
+        }
+        break;
+    case AV_SAMPLE_FMT_DBLP:
+        if (EXTERNAL_SSE2(mm_flags)) {
+            c->dsp.resample = c->linear ? ff_resample_linear_double_sse2
+                                        : ff_resample_common_double_sse2;
+        }
+        break;
+    }
+}
diff --git a/libswresample/x86/w64xmmtest.c b/libswresample/x86/w64xmmtest.c
new file mode 100644
index 0000000000..9cddb4a858
--- /dev/null
+++ b/libswresample/x86/w64xmmtest.c
@@ -0,0 +1,29 @@
+/*
+ * check XMM registers for clobbers on Win64
+ * Copyright (c) 2013 Martin Storsjo
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libswresample/swresample.h"
+#include "libavutil/x86/w64xmmtest.h"
+
+wrap(swr_convert(struct SwrContext *s, uint8_t **out, int out_count,
+                 const uint8_t **in , int in_count))
+{
+    testxmmclobbers(swr_convert, s, out, out_count, in, in_count);
+}