aboutsummaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer2011-05-18 05:42:42 +0200
committerMichael Niedermayer2011-05-18 05:42:42 +0200
commitb4bcd1e2f1d603419ea9d4fdaab400b1ad35e58c (patch)
tree9a1cf8971e8f1247e0c99a160050be8d547328cb /libavcodec
parent7a88617c43ce534d94591dd78d4958333492b939 (diff)
parenta26d2b4bc8af02b27168c277c5097273c05652c2 (diff)
Merge remote-tracking branch 'qatar/master'
* qatar/master: Fix compilation of iirfilter-test. libx264: handle closed GOP codec flag lavf: remove duplicate assignment in avformat_alloc_context. lavf: use designated initializers for AVClasses. flvdec: clenup debug code asfdec: fix possible overread on broken files. asfdec: do not fall back to binary/generic search asfdec: reindent after previous commit c7bd5ed asfdec: fallback to binary search internally mpegaudio: add _fixed suffix to some names Modify x86util.asm to ease transitioning to 10-bit H.264 assembly. dct: build dct32 as separate object files qdm2: include correct header for rdft Conflicts: ffpresets/libx264-fast.ffpreset ffpresets/libx264-fast_firstpass.ffpreset ffpresets/libx264-faster.ffpreset ffpresets/libx264-faster_firstpass.ffpreset ffpresets/libx264-medium.ffpreset ffpresets/libx264-medium_firstpass.ffpreset ffpresets/libx264-placebo.ffpreset ffpresets/libx264-placebo_firstpass.ffpreset ffpresets/libx264-slow.ffpreset ffpresets/libx264-slow_firstpass.ffpreset ffpresets/libx264-slower.ffpreset ffpresets/libx264-slower_firstpass.ffpreset ffpresets/libx264-superfast.ffpreset ffpresets/libx264-superfast_firstpass.ffpreset ffpresets/libx264-ultrafast.ffpreset ffpresets/libx264-ultrafast_firstpass.ffpreset ffpresets/libx264-veryfast.ffpreset ffpresets/libx264-veryfast_firstpass.ffpreset ffpresets/libx264-veryslow.ffpreset ffpresets/libx264-veryslow_firstpass.ffpreset libavformat/flvdec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/Makefile2
-rw-r--r--libavcodec/dct.c6
-rw-r--r--libavcodec/dct32.c13
-rw-r--r--libavcodec/dct32.h25
-rw-r--r--libavcodec/dct32_fixed.c20
-rw-r--r--libavcodec/dct32_float.c20
-rw-r--r--libavcodec/iirfilter.c2
-rw-r--r--libavcodec/libx264.c2
-rw-r--r--libavcodec/mpc.c6
-rw-r--r--libavcodec/mpegaudio.h6
-rw-r--r--libavcodec/mpegaudio_tablegen.c4
-rw-r--r--libavcodec/mpegaudio_tablegen.h8
-rw-r--r--libavcodec/mpegaudiodec.c13
-rw-r--r--libavcodec/qdm2.c8
-rw-r--r--libavcodec/x86/dsputilenc_yasm.asm12
-rw-r--r--libavcodec/x86/h264_idct.asm26
-rw-r--r--libavcodec/x86/vp8dsp.asm10
-rw-r--r--libavcodec/x86/x86util.asm222
18 files changed, 262 insertions, 143 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 2b00575463..6723118693 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -28,7 +28,7 @@ OBJS-$(CONFIG_AANDCT) += aandcttab.o
OBJS-$(CONFIG_AC3DSP) += ac3dsp.o
OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
-OBJS-$(CONFIG_DCT) += dct.o
+OBJS-$(CONFIG_DCT) += dct.o dct32_fixed.o dct32_float.o
OBJS-$(CONFIG_DWT) += dwt.o
OBJS-$(CONFIG_DXVA2) += dxva2.o
FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o
diff --git a/libavcodec/dct.c b/libavcodec/dct.c
index ef3cd50a79..c30cff664e 100644
--- a/libavcodec/dct.c
+++ b/libavcodec/dct.c
@@ -30,9 +30,7 @@
#include <math.h>
#include "libavutil/mathematics.h"
#include "dct.h"
-
-#define DCT32_FLOAT
-#include "dct32.c"
+#include "dct32.h"
/* sin((M_PI * x / (2*n)) */
#define SIN(s,n,x) (s->costab[(n) - (x)])
@@ -210,7 +208,7 @@ av_cold int ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType inverse)
}
}
- s->dct32 = dct32;
+ s->dct32 = ff_dct32_float;
if (HAVE_MMX) ff_dct_init_mmx(s);
return 0;
diff --git a/libavcodec/dct32.c b/libavcodec/dct32.c
index 4e843ee832..fb53d53ab1 100644
--- a/libavcodec/dct32.c
+++ b/libavcodec/dct32.c
@@ -19,10 +19,19 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifdef DCT32_FLOAT
+#include "dct32.h"
+#include "mathops.h"
+
+#if DCT32_FLOAT
+# define dct32 ff_dct32_float
# define FIXHR(x) ((float)(x))
# define MULH3(x, y, s) ((s)*(y)*(x))
# define INTFLOAT float
+#else
+# define dct32 ff_dct32_fixed
+# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
+# define MULH3(x, y, s) MULH((s)*(x), y)
+# define INTFLOAT int
#endif
@@ -103,7 +112,7 @@
#define ADD(a, b) val##a += val##b
/* DCT32 without 1/sqrt(2) coef zero scaling. */
-static void dct32(INTFLOAT *out, const INTFLOAT *tab)
+void dct32(INTFLOAT *out, const INTFLOAT *tab)
{
INTFLOAT tmp0, tmp1;
diff --git a/libavcodec/dct32.h b/libavcodec/dct32.h
new file mode 100644
index 0000000000..110338d25c
--- /dev/null
+++ b/libavcodec/dct32.h
@@ -0,0 +1,25 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DCT32_H
+#define AVCODEC_DCT32_H
+
+void ff_dct32_float(float *dst, const float *src);
+void ff_dct32_fixed(int *dst, const int *src);
+
+#endif
diff --git a/libavcodec/dct32_fixed.c b/libavcodec/dct32_fixed.c
new file mode 100644
index 0000000000..7eb9dc1a53
--- /dev/null
+++ b/libavcodec/dct32_fixed.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 0
+#include "dct32.c"
diff --git a/libavcodec/dct32_float.c b/libavcodec/dct32_float.c
new file mode 100644
index 0000000000..727ec3caca
--- /dev/null
+++ b/libavcodec/dct32_float.c
@@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define DCT32_FLOAT 1
+#include "dct32.c"
diff --git a/libavcodec/iirfilter.c b/libavcodec/iirfilter.c
index bc63c3991a..27461fb389 100644
--- a/libavcodec/iirfilter.c
+++ b/libavcodec/iirfilter.c
@@ -324,7 +324,7 @@ int main(void)
int i;
FILE* fd;
- fcoeffs = ff_iir_filter_init_coeffs(FF_FILTER_TYPE_BUTTERWORTH,
+ fcoeffs = ff_iir_filter_init_coeffs(NULL, FF_FILTER_TYPE_BUTTERWORTH,
FF_FILTER_MODE_LOWPASS, FILT_ORDER,
cutoff_coeff, 0.0, 0.0);
fstate = ff_iir_filter_init_state(FILT_ORDER);
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 838cb703e8..eae21fe2bb 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -367,6 +367,8 @@ static av_cold int X264_init(AVCodecContext *avctx)
x4->params.b_interlaced = avctx->flags & CODEC_FLAG_INTERLACED_DCT;
+ x4->params.b_open_gop = !(avctx->flags & CODEC_FLAG_CLOSED_GOP);
+
x4->params.i_slice_count = avctx->slices;
x4->params.vui.b_fullrange = avctx->pix_fmt == PIX_FMT_YUVJ420P;
diff --git a/libavcodec/mpc.c b/libavcodec/mpc.c
index d9a1fb776a..ca4c3d0dcb 100644
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@@ -36,7 +36,7 @@
void ff_mpc_init(void)
{
- ff_mpa_synth_init(ff_mpa_synth_window);
+ ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
}
/**
@@ -51,8 +51,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
for(ch = 0; ch < channels; ch++){
samples_ptr = samples + ch;
for(i = 0; i < SAMPLES_PER_BAND; i++) {
- ff_mpa_synth_filter(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
- ff_mpa_synth_window, &dither_state,
+ ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
+ ff_mpa_synth_window_fixed, &dither_state,
samples_ptr, channels,
c->sb_samples[ch][i]);
samples_ptr += 32 * channels;
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 138085366f..005598797d 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -158,9 +158,9 @@ typedef struct HuffTable {
int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-extern MPA_INT ff_mpa_synth_window[];
-void ff_mpa_synth_init(MPA_INT *window);
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+extern MPA_INT ff_mpa_synth_window_fixed[];
+void ff_mpa_synth_init_fixed(MPA_INT *window);
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
MPA_INT *window, int *dither_state,
OUT_INT *samples, int incr,
INTFLOAT sb_samples[SBLIMIT]);
diff --git a/libavcodec/mpegaudio_tablegen.c b/libavcodec/mpegaudio_tablegen.c
index 0888e78620..90c9de430a 100644
--- a/libavcodec/mpegaudio_tablegen.c
+++ b/libavcodec/mpegaudio_tablegen.c
@@ -33,9 +33,9 @@ int main(void)
WRITE_ARRAY("static const", int8_t, table_4_3_exp);
WRITE_ARRAY("static const", uint32_t, table_4_3_value);
- WRITE_ARRAY("static const", uint32_t, exp_table);
+ WRITE_ARRAY("static const", uint32_t, exp_table_fixed);
WRITE_ARRAY("static const", float, exp_table_float);
- WRITE_2D_ARRAY("static const", uint32_t, expval_table);
+ WRITE_2D_ARRAY("static const", uint32_t, expval_table_fixed);
WRITE_2D_ARRAY("static const", float, expval_table_float);
return 0;
diff --git a/libavcodec/mpegaudio_tablegen.h b/libavcodec/mpegaudio_tablegen.h
index 01c4174a60..214959348a 100644
--- a/libavcodec/mpegaudio_tablegen.h
+++ b/libavcodec/mpegaudio_tablegen.h
@@ -33,8 +33,8 @@
#else
static int8_t table_4_3_exp[TABLE_4_3_SIZE];
static uint32_t table_4_3_value[TABLE_4_3_SIZE];
-static uint32_t exp_table[512];
-static uint32_t expval_table[512][16];
+static uint32_t exp_table_fixed[512];
+static uint32_t expval_table_fixed[512][16];
static float exp_table_float[512];
static float expval_table_float[512][16];
@@ -59,10 +59,10 @@ static void mpegaudio_tableinit(void)
for (exponent = 0; exponent < 512; exponent++) {
for (value = 0; value < 16; value++) {
double f = (double)value * cbrtf(value) * pow(2, (exponent - 400) * 0.25 + FRAC_BITS + 5);
- expval_table[exponent][value] = llrint(f);
+ expval_table_fixed[exponent][value] = llrint(f);
expval_table_float[exponent][value] = f;
}
- exp_table[exponent] = expval_table[exponent][1];
+ exp_table_fixed[exponent] = expval_table_fixed[exponent][1];
exp_table_float[exponent] = expval_table_float[exponent][1];
}
}
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index bdff815d4d..c2c822223e 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -29,6 +29,7 @@
#include "get_bits.h"
#include "dsputil.h"
#include "mathops.h"
+#include "dct32.h"
/*
* TODO:
@@ -57,7 +58,7 @@
# define FIXHR(a) ((int)((a) * (1LL<<32) + 0.5))
# define MULH3(x, y, s) MULH((s)*(x), y)
# define MULLx(x, y, s) MULL(x,y,s)
-# define RENAME(a) a
+# define RENAME(a) a ## _fixed
# define OUT_FMT AV_SAMPLE_FMT_S16
#endif
@@ -68,12 +69,6 @@
#include "mpegaudiodata.h"
#include "mpegaudiodectab.h"
-#if CONFIG_FLOAT
-# include "fft.h"
-#else
-# include "dct32.c"
-#endif
-
static void compute_antialias(MPADecodeContext *s, GranuleDef *g);
static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
int *dither_state, OUT_INT *samples, int incr);
@@ -626,7 +621,7 @@ static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
32 samples. */
/* XXX: optimize by avoiding ring buffer usage */
#if !CONFIG_FLOAT
-void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
+void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
MPA_INT *window, int *dither_state,
OUT_INT *samples, int incr,
INTFLOAT sb_samples[SBLIMIT])
@@ -637,7 +632,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
offset = *synth_buf_offset;
synth_buf = synth_buf_ptr + offset;
- dct32(synth_buf, sb_samples);
+ ff_dct32_fixed(synth_buf, sb_samples);
apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
offset = (offset - 32) & 511;
diff --git a/libavcodec/qdm2.c b/libavcodec/qdm2.c
index a64870a3f9..e1165074f7 100644
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@@ -38,7 +38,7 @@
#include "avcodec.h"
#include "get_bits.h"
#include "dsputil.h"
-#include "fft.h"
+#include "rdft.h"
#include "mpegaudio.h"
#include "qdm2data.h"
@@ -1616,8 +1616,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
OUT_INT *samples_ptr = samples + ch;
for (i = 0; i < 8; i++) {
- ff_mpa_synth_filter(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
- ff_mpa_synth_window, &dither_state,
+ ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+ ff_mpa_synth_window_fixed, &dither_state,
samples_ptr, q->nb_channels,
q->sb_samples[ch][(8 * index) + i]);
samples_ptr += 32 * q->nb_channels;
@@ -1646,7 +1646,7 @@ static av_cold void qdm2_init(QDM2Context *q) {
initialized = 1;
qdm2_init_vlc();
- ff_mpa_synth_init(ff_mpa_synth_window);
+ ff_mpa_synth_init_fixed(ff_mpa_synth_window_fixed);
softclip_table_init();
rnd_table_init();
init_noise_samples();
diff --git a/libavcodec/x86/dsputilenc_yasm.asm b/libavcodec/x86/dsputilenc_yasm.asm
index a4f2d0cf51..016b354d6c 100644
--- a/libavcodec/x86/dsputilenc_yasm.asm
+++ b/libavcodec/x86/dsputilenc_yasm.asm
@@ -59,12 +59,12 @@ SECTION .text
%endmacro
%macro HADAMARD8 0
- SUMSUB_BADC m0, m1, m2, m3
- SUMSUB_BADC m4, m5, m6, m7
- SUMSUB_BADC m0, m2, m1, m3
- SUMSUB_BADC m4, m6, m5, m7
- SUMSUB_BADC m0, m4, m1, m5
- SUMSUB_BADC m2, m6, m3, m7
+ SUMSUB_BADC w, 0, 1, 2, 3
+ SUMSUB_BADC w, 4, 5, 6, 7
+ SUMSUB_BADC w, 0, 2, 1, 3
+ SUMSUB_BADC w, 4, 6, 5, 7
+ SUMSUB_BADC w, 0, 4, 1, 5
+ SUMSUB_BADC w, 2, 6, 3, 7
%endmacro
%macro ABS1_SUM 3
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index e90b0b1186..4f6f1d7bf8 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -59,11 +59,11 @@ SECTION .text
movq m2, [%2+16]
movq m3, [%2+24]
- IDCT4_1D 0, 1, 2, 3, 4, 5
+ IDCT4_1D w, 0, 1, 2, 3, 4, 5
mova m6, [pw_32]
TRANSPOSE4x4W 0, 1, 2, 3, 4
paddw m0, m6
- IDCT4_1D 0, 1, 2, 3, 4, 5
+ IDCT4_1D w, 0, 1, 2, 3, 4, 5
pxor m7, m7
STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
@@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0
mova m2, %1
mova m5, %2
- SUMSUB_BA m5, m2
- SUMSUB_BA m6, m5
- SUMSUB_BA m4, m2
- SUMSUB_BA m7, m6
- SUMSUB_BA m0, m4
- SUMSUB_BA m3, m2
- SUMSUB_BA m1, m5
+ SUMSUB_BA w, 5, 2
+ SUMSUB_BA w, 6, 5
+ SUMSUB_BA w, 4, 2
+ SUMSUB_BA w, 7, 6
+ SUMSUB_BA w, 0, 4
+ SUMSUB_BA w, 3, 2
+ SUMSUB_BA w, 1, 5
SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
%endmacro
@@ -715,10 +715,10 @@ x264_add8x4_idct_sse2:
movhps m1, [r2+40]
movhps m2, [r2+48]
movhps m3, [r2+56]
- IDCT4_1D 0,1,2,3,4,5
+ IDCT4_1D w,0,1,2,3,4,5
TRANSPOSE2x4x4W 0,1,2,3,4
paddw m0, [pw_32]
- IDCT4_1D 0,1,2,3,4,5
+ IDCT4_1D w,0,1,2,3,4,5
pxor m7, m7
STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
lea r0, [r0+r3*2]
@@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8
;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
%macro WALSH4_1D 5
- SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
- SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
+ SUMSUB_BADC w, %4, %3, %2, %1, %5
+ SUMSUB_BADC w, %4, %2, %3, %1, %5
SWAP %1, %4, %3
%endmacro
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 9b175c1488..14b49705dc 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -1106,10 +1106,10 @@ cglobal vp8_idct_dc_add4uv_mmx, 3, 3
; %5/%6 are temporary registers
; we assume m6/m7 have constant words 20091/17734 loaded in them
%macro VP8_IDCT_TRANSFORM4x4_1D 6
- SUMSUB_BA m%3, m%1, m%5 ;t0, t1
+ SUMSUB_BA w, %3, %1, %5 ;t0, t1
VP8_MULTIPLY_SUMSUB m%2, m%4, m%5,m%6 ;t2, t3
- SUMSUB_BA m%4, m%3, m%5 ;tmp0, tmp3
- SUMSUB_BA m%2, m%1, m%5 ;tmp1, tmp2
+ SUMSUB_BA w, %4, %3, %5 ;tmp0, tmp3
+ SUMSUB_BA w, %2, %1, %5 ;tmp1, tmp2
SWAP %4, %1
SWAP %4, %3
%endmacro
@@ -1181,8 +1181,8 @@ VP8_IDCT_ADD sse
%endmacro
%macro HADAMARD4_1D 4
- SUMSUB_BADC m%2, m%1, m%4, m%3
- SUMSUB_BADC m%4, m%2, m%3, m%1
+ SUMSUB_BADC w, %2, %1, %4, %3
+ SUMSUB_BADC w, %4, %2, %3, %1
SWAP %1, %4, %3
%endmacro
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
index 7e5b67419a..18ba9d1ad2 100644
--- a/libavcodec/x86/x86util.asm
+++ b/libavcodec/x86/x86util.asm
@@ -208,6 +208,17 @@
pminub %2, %4
%endmacro
+%macro ABSD2_MMX 4
+ pxor %3, %3
+ pxor %4, %4
+ pcmpgtd %3, %1
+ pcmpgtd %4, %2
+ pxor %1, %3
+ pxor %2, %4
+ psubd %1, %3
+ psubd %2, %4
+%endmacro
+
%macro ABSB_SSSE3 2
pabsb %1, %1
%endmacro
@@ -230,12 +241,7 @@
%macro SPLATB_MMX 3
movd %1, [%2-3] ;to avoid crossing a cacheline
punpcklbw %1, %1
-%if mmsize==16
- pshuflw %1, %1, 0xff
- punpcklqdq %1, %1
-%else
- pshufw %1, %1, 0xff
-%endif
+ SPLATW %1, %1, 3
%endmacro
%macro SPLATB_SSSE3 3
@@ -243,125 +249,169 @@
pshufb %1, %3
%endmacro
-%macro PALIGNR_MMX 4
- %ifnidn %4, %2
+%macro PALIGNR_MMX 4-5 ; [dst,] src1, src2, imm, tmp
+ %define %%dst %1
+%if %0==5
+%ifnidn %1, %2
+ mova %%dst, %2
+%endif
+ %rotate 1
+%endif
+%ifnidn %4, %2
mova %4, %2
- %endif
- %if mmsize == 8
- psllq %1, (8-%3)*8
+%endif
+%if mmsize==8
+ psllq %%dst, (8-%3)*8
psrlq %4, %3*8
- %else
- pslldq %1, 16-%3
+%else
+ pslldq %%dst, 16-%3
psrldq %4, %3
- %endif
- por %1, %4
+%endif
+ por %%dst, %4
%endmacro
-%macro PALIGNR_SSSE3 4
+%macro PALIGNR_SSSE3 4-5
+%if %0==5
+ palignr %1, %2, %3, %4
+%else
palignr %1, %2, %3
+%endif
%endmacro
%macro DEINTB 5 ; mask, reg1, mask, reg2, optional src to fill masks from
%ifnum %5
- mova m%1, m%5
- mova m%3, m%5
+ pand m%3, m%5, m%4 ; src .. y6 .. y4
+ pand m%1, m%5, m%2 ; dst .. y6 .. y4
%else
mova m%1, %5
- mova m%3, m%1
+ pand m%3, m%1, m%4 ; src .. y6 .. y4
+ pand m%1, m%1, m%2 ; dst .. y6 .. y4
%endif
- pand m%1, m%2 ; dst .. y6 .. y4
- pand m%3, m%4 ; src .. y6 .. y4
- psrlw m%2, 8 ; dst .. y7 .. y5
- psrlw m%4, 8 ; src .. y7 .. y5
+ psrlw m%2, 8 ; dst .. y7 .. y5
+ psrlw m%4, 8 ; src .. y7 .. y5
%endmacro
-%macro SUMSUB_BA 2-3
-%if %0==2
- paddw %1, %2
- paddw %2, %2
- psubw %2, %1
+%macro SUMSUB_BA 3-4
+%if %0==3
+ padd%1 m%2, m%3
+ padd%1 m%3, m%3
+ psub%1 m%3, m%2
+%else
+%if avx_enabled == 0
+ mova m%4, m%2
+ padd%1 m%2, m%3
+ psub%1 m%3, m%4
%else
- mova %3, %1
- paddw %1, %2
- psubw %2, %3
+ padd%1 m%4, m%2, m%3
+ psub%1 m%3, m%2
+ SWAP %2, %4
+%endif
%endif
%endmacro
-%macro SUMSUB_BADC 4-5
-%if %0==5
- SUMSUB_BA %1, %2, %5
- SUMSUB_BA %3, %4, %5
+%macro SUMSUB_BADC 5-6
+%if %0==6
+ SUMSUB_BA %1, %2, %3, %6
+ SUMSUB_BA %1, %4, %5, %6
%else
- paddw %1, %2
- paddw %3, %4
- paddw %2, %2
- paddw %4, %4
- psubw %2, %1
- psubw %4, %3
+ padd%1 m%2, m%3
+ padd%1 m%4, m%5
+ padd%1 m%3, m%3
+ padd%1 m%5, m%5
+ psub%1 m%3, m%2
+ psub%1 m%5, m%4
%endif
%endmacro
-%macro SUMSUB2_AB 3
- mova %3, %1
- paddw %1, %1
- paddw %1, %2
- psubw %3, %2
- psubw %3, %2
+%macro SUMSUB2_AB 4
+%ifnum %3
+ psub%1 m%4, m%2, m%3
+ psub%1 m%4, m%3
+ padd%1 m%2, m%2
+ padd%1 m%2, m%3
+%else
+ mova m%4, m%2
+ padd%1 m%2, m%2
+ padd%1 m%2, %3
+ psub%1 m%4, %3
+ psub%1 m%4, %3
+%endif
%endmacro
-%macro SUMSUB2_BA 3
- mova m%3, m%1
- paddw m%1, m%2
- paddw m%1, m%2
- psubw m%2, m%3
- psubw m%2, m%3
+%macro SUMSUB2_BA 4
+%if avx_enabled == 0
+ mova m%4, m%2
+ padd%1 m%2, m%3
+ padd%1 m%2, m%3
+ psub%1 m%3, m%4
+ psub%1 m%3, m%4
+%else
+ padd%1 m%4, m%2, m%3
+ padd%1 m%4, m%3
+ psub%1 m%3, m%2
+ psub%1 m%3, m%2
+ SWAP %2, %4
+%endif
%endmacro
-%macro SUMSUBD2_AB 4
- mova %4, %1
- mova %3, %2
- psraw %2, 1 ; %2: %2>>1
- psraw %1, 1 ; %1: %1>>1
- paddw %2, %4 ; %2: %2>>1+%1
- psubw %1, %3 ; %1: %1>>1-%2
+%macro SUMSUBD2_AB 5
+%ifnum %4
+ psra%1 m%5, m%2, 1 ; %3: %3>>1
+ psra%1 m%4, m%3, 1 ; %2: %2>>1
+ padd%1 m%4, m%2 ; %3: %3>>1+%2
+ psub%1 m%5, m%3 ; %2: %2>>1-%3
+ SWAP %2, %5
+ SWAP %3, %4
+%else
+ mova %5, m%2
+ mova %4, m%3
+ psra%1 m%3, 1 ; %3: %3>>1
+ psra%1 m%2, 1 ; %2: %2>>1
+ padd%1 m%3, %5 ; %3: %3>>1+%2
+ psub%1 m%2, %4 ; %2: %2>>1-%3
+%endif
%endmacro
%macro DCT4_1D 5
%ifnum %5
- SUMSUB_BADC m%4, m%1, m%3, m%2; m%5
- SUMSUB_BA m%3, m%4, m%5
- SUMSUB2_AB m%1, m%2, m%5
+ SUMSUB_BADC w, %4, %1, %3, %2, %5
+ SUMSUB_BA w, %3, %4, %5
+ SUMSUB2_AB w, %1, %2, %5
SWAP %1, %3, %4, %5, %2
%else
- SUMSUB_BADC m%4, m%1, m%3, m%2
- SUMSUB_BA m%3, m%4
- mova [%5], m%2
- SUMSUB2_AB m%1, [%5], m%2
+ SUMSUB_BADC w, %4, %1, %3, %2
+ SUMSUB_BA w, %3, %4
+ mova [%5], m%2
+ SUMSUB2_AB w, %1, [%5], %2
SWAP %1, %3, %4, %2
%endif
%endmacro
-%macro IDCT4_1D 5-6
-%ifnum %5
- SUMSUBD2_AB m%2, m%4, m%6, m%5
- ; %2: %2>>1-%4 %4: %2+%4>>1
- SUMSUB_BA m%3, m%1, m%6
- ; %3: %1+%3 %1: %1-%3
- SUMSUB_BADC m%4, m%3, m%2, m%1, m%6
- ; %4: %1+%3 + (%2+%4>>1)
- ; %3: %1+%3 - (%2+%4>>1)
- ; %2: %1-%3 + (%2>>1-%4)
- ; %1: %1-%3 - (%2>>1-%4)
+%macro IDCT4_1D 6-7
+%ifnum %6
+ SUMSUBD2_AB %1, %3, %5, %7, %6
+ ; %3: %3>>1-%5 %5: %3+%5>>1
+ SUMSUB_BA %1, %4, %2, %7
+ ; %4: %2+%4 %2: %2-%4
+ SUMSUB_BADC %1, %5, %4, %3, %2, %7
+ ; %5: %2+%4 + (%3+%5>>1)
+ ; %4: %2+%4 - (%3+%5>>1)
+ ; %3: %2-%4 + (%3>>1-%5)
+ ; %2: %2-%4 - (%3>>1-%5)
%else
- SUMSUBD2_AB m%2, m%4, [%5], [%5+16]
- SUMSUB_BA m%3, m%1
- SUMSUB_BADC m%4, m%3, m%2, m%1
+%ifidn %1, w
+ SUMSUBD2_AB %1, %3, %5, [%6], [%6+16]
+%else
+ SUMSUBD2_AB %1, %3, %5, [%6], [%6+32]
+%endif
+ SUMSUB_BA %1, %4, %2
+ SUMSUB_BADC %1, %5, %4, %3, %2
%endif
- SWAP %1, %4, %3
- ; %1: %1+%3 + (%2+%4>>1) row0
- ; %2: %1-%3 + (%2>>1-%4) row1
- ; %3: %1-%3 - (%2>>1-%4) row2
- ; %4: %1+%3 - (%2+%4>>1) row3
+ SWAP %2, %5, %4
+ ; %2: %2+%4 + (%3+%5>>1) row0
+ ; %3: %2-%4 + (%3>>1-%5) row1
+ ; %4: %2-%4 - (%3>>1-%5) row2
+ ; %5: %2+%4 - (%3+%5>>1) row3
%endmacro