aboutsummaryrefslogtreecommitdiff
path: root/libavutil
diff options
context:
space:
mode:
authorShiyou Yin2018-08-31 21:41:49 +0800
committerMichael Niedermayer2018-09-02 03:37:32 +0200
commitdf13b75aa18633f95761b34775ab5e6797d92c57 (patch)
treebdc8df903143764ab4fc756e628fd1b265720320 /libavutil
parent1124df0397372c4d1dd798dc2cfb7d4e0f2bb890 (diff)
avcodec/mips: [loongson] reoptimize simple idct with mmi.
Performance of mpeg4 decoding improved about 23%(from 128fps to 158fps, tested on loongson 3A3000). Reoptimized following functions with mmi. 1. ff_simple_idct_put_8_mmi 2. ff_simple_idct_add_8_mmi 3. ff_simple_idct_8_mmi Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/mips/mmiutils.h49
1 files changed, 49 insertions, 0 deletions
diff --git a/libavutil/mips/mmiutils.h b/libavutil/mips/mmiutils.h
index 491579ec6c..2b1a52105e 100644
--- a/libavutil/mips/mmiutils.h
+++ b/libavutil/mips/mmiutils.h
@@ -201,6 +201,55 @@
#endif /* HAVE_LOONGSON2 */
+/**
+ * backup register
+ */
+#define BACKUP_REG \
+ double temp_backup_reg[8]; \
+ if (_MIPS_SIM == _ABI64) \
+ __asm__ volatile ( \
+ "gssqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
+ "gssqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
+ "gssqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
+ "gssqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
+ : \
+ : [temp]"r"(temp_backup_reg) \
+ : "memory" \
+ ); \
+ else \
+ __asm__ volatile ( \
+ "gssqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
+ "gssqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
+ "gssqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
+ : \
+ : [temp]"r"(temp_backup_reg) \
+ : "memory" \
+ );
+
+/**
+ * recover register
+ */
+#define RECOVER_REG \
+ if (_MIPS_SIM == _ABI64) \
+ __asm__ volatile ( \
+ "gslqc1 $f25, $f24, 0x00(%[temp]) \n\t" \
+ "gslqc1 $f27, $f26, 0x10(%[temp]) \n\t" \
+ "gslqc1 $f29, $f28, 0x20(%[temp]) \n\t" \
+ "gslqc1 $f31, $f30, 0x30(%[temp]) \n\t" \
+ : \
+ : [temp]"r"(temp_backup_reg) \
+ : "memory" \
+ ); \
+ else \
+ __asm__ volatile ( \
+ "gslqc1 $f22, $f20, 0x00(%[temp]) \n\t" \
+ "gslqc1 $f26, $f24, 0x10(%[temp]) \n\t" \
+ "gslqc1 $f30, $f28, 0x20(%[temp]) \n\t" \
+ : \
+ : [temp]"r"(temp_backup_reg) \
+ : "memory" \
+ );
+
#define TRANSPOSE_4H(m1, m2, m3, m4, t1, t2, t3, t4, t5, r1, zero, shift) \
"li "#r1", 0x93 \n\t" \
"xor "#zero","#zero","#zero" \n\t" \