aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Niedermayer2012-05-13 19:20:47 +0200
committerMichael Niedermayer2012-05-13 19:31:59 +0200
commit24c0d1583ce6c7bb68fb40167a8b7bbaccac9e4a (patch)
tree62934ef1da061155ed92f697b4a1ce9d56dcd7a0
parent6f67d9833b5a7d78d27c283d7e4238e6d3e5fd98 (diff)
libswresample: unaligned AVX/SSE4 float and int32 6ch pack
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libswresample/x86/audio_convert.asm43
1 files changed, 31 insertions, 12 deletions
diff --git a/libswresample/x86/audio_convert.asm b/libswresample/x86/audio_convert.asm
index 1fa519aa84..cbff72c565 100644
--- a/libswresample/x86/audio_convert.asm
+++ b/libswresample/x86/audio_convert.asm
@@ -213,18 +213,34 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
mov src5q, [srcq+5*gprsize]
mov srcq, [srcq]
mov dstq, [dstq]
+%ifidn %3, a
+ test dstq, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test srcq, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src2q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src3q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src4q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+ test src5q, mmsize-1
+ jne pack_6ch_%2_to_%1_u_int %+ SUFFIX
+%else
+pack_6ch_%2_to_%1_u_int %+ SUFFIX
+%endif
sub src1q, srcq
sub src2q, srcq
sub src3q, srcq
sub src4q, srcq
sub src5q, srcq
.loop:
- mova m0, [srcq ]
- mova m1, [srcq+src1q]
- mova m2, [srcq+src2q]
- mova m3, [srcq+src3q]
- mova m4, [srcq+src4q]
- mova m5, [srcq+src5q]
+ mov%3 m0, [srcq ]
+ mov%3 m1, [srcq+src1q]
+ mov%3 m2, [srcq+src2q]
+ mov%3 m3, [srcq+src3q]
+ mov%3 m4, [srcq+src4q]
+ mov%3 m5, [srcq+src5q]
%if cpuflag(sse4)
SBUTTERFLYPS 0, 1, 6
SBUTTERFLYPS 2, 3, 6
@@ -237,12 +253,12 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
movlhps m1, m3
movhlps m5, m3
- movaps [dstq ], m0
- movaps [dstq+16], m6
- movaps [dstq+32], m4
- movaps [dstq+48], m1
- movaps [dstq+64], m2
- movaps [dstq+80], m5
+ mov %+ %3 %+ ps [dstq ], m0
+ mov %+ %3 %+ ps [dstq+16], m6
+ mov %+ %3 %+ ps [dstq+32], m4
+ mov %+ %3 %+ ps [dstq+48], m1
+ mov %+ %3 %+ ps [dstq+64], m2
+ mov %+ %3 %+ ps [dstq+80], m5
%else ; mmx
SBUTTERFLY dq, 0, 1, 6
SBUTTERFLY dq, 2, 3, 6
@@ -268,11 +284,14 @@ cglobal pack_6ch_%2_to_%1_%3, 2,8,7, dst, src, src1, src2, src3, src4, src5, len
%endmacro
INIT_MMX mmx
+CONV_FLTP_TO_FLT_6CH float,float,u
CONV_FLTP_TO_FLT_6CH float,float,a
INIT_XMM sse4
+CONV_FLTP_TO_FLT_6CH float,float,u
CONV_FLTP_TO_FLT_6CH float,float,a
%if HAVE_AVX
INIT_XMM avx
+CONV_FLTP_TO_FLT_6CH float,float,u
CONV_FLTP_TO_FLT_6CH float,float,a
%endif