diff options
Diffstat (limited to 'libavcodec/i386/h264dsp_mmx.c')
-rw-r--r-- | libavcodec/i386/h264dsp_mmx.c | 104 |
1 files changed, 52 insertions, 52 deletions
diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c index f94f7088cf..bb9c82d612 100644 --- a/libavcodec/i386/h264dsp_mmx.c +++ b/libavcodec/i386/h264dsp_mmx.c @@ -57,14 +57,14 @@ DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL; static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) { /* Load dct coeffs */ - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" "movq 8(%0), %%mm1 \n\t" "movq 16(%0), %%mm2 \n\t" "movq 24(%0), %%mm3 \n\t" :: "r"(block) ); - asm volatile( + __asm__ volatile( /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */ IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 ) @@ -80,7 +80,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) "pxor %%mm7, %%mm7 \n\t" :: "m"(ff_pw_32)); - asm volatile( + __asm__ volatile( STORE_DIFF_4P( %%mm0, %%mm1, %%mm7) "add %1, %0 \n\t" STORE_DIFF_4P( %%mm2, %%mm1, %%mm7) @@ -95,7 +95,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) static inline void h264_idct8_1d(int16_t *block) { - asm volatile( + __asm__ volatile( "movq 112(%0), %%mm7 \n\t" "movq 80(%0), %%mm0 \n\t" "movq 48(%0), %%mm3 \n\t" @@ -166,7 +166,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) h264_idct8_1d(block+4*i); - asm volatile( + __asm__ volatile( "movq %%mm7, %0 \n\t" TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 ) "movq %%mm0, 8(%1) \n\t" @@ -188,7 +188,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) for(i=0; i<2; i++){ h264_idct8_1d(b2+4*i); - asm volatile( + __asm__ volatile( "psraw $6, %%mm7 \n\t" "psraw $6, %%mm6 \n\t" "psraw $6, %%mm5 \n\t" @@ -269,7 +269,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) { - asm volatile( + __asm__ volatile( "movdqa 0x10(%1), %%xmm1 \n" "movdqa 0x20(%1), %%xmm2 \n" "movdqa 0x30(%1), %%xmm3 \n" @@ -304,7 +304,7 @@ static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) { int dc = (block[0] + 32) >> 6; - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "pshufw $0, %%mm0, %%mm0 \n\t" "pxor %%mm1, %%mm1 \n\t" @@ -313,7 +313,7 @@ static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) "packuswb %%mm1, %%mm1 \n\t" ::"r"(dc) ); - asm volatile( + __asm__ volatile( "movd %0, %%mm2 \n\t" "movd %1, %%mm3 \n\t" "movd %2, %%mm4 \n\t" @@ -341,7 +341,7 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) { int dc = (block[0] + 32) >> 6; int y; - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "pshufw $0, %%mm0, %%mm0 \n\t" "pxor %%mm1, %%mm1 \n\t" @@ -351,7 +351,7 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) ::"r"(dc) ); for(y=2; y--; dst += 4*stride){ - asm volatile( + __asm__ volatile( "movq %0, %%mm2 \n\t" "movq %1, %%mm3 \n\t" "movq %2, %%mm4 \n\t" @@ -463,7 +463,7 @@ static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alph { DECLARE_ALIGNED_8(uint64_t, tmp0[2]); - asm volatile( + __asm__ volatile( "movq (%1,%3), %%mm0 \n\t" //p1 "movq (%1,%3,2), %%mm1 \n\t" //p0 "movq (%2), %%mm2 \n\t" //q0 @@ -540,7 +540,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) { - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" //p1 "movq (%0,%2), %%mm1 \n\t" //p0 "movq (%1), %%mm2 \n\t" //q0 @@ -586,7 +586,7 @@ static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1) { - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" "movq (%0,%2), %%mm1 \n\t" "movq (%1), %%mm2 \n\t" @@ -628,7 +628,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { int dir; - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "movq %0, %%mm6 \n\t" "movq %1, %%mm5 \n\t" @@ -636,7 +636,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7) ); if(field) - asm volatile( + __asm__ volatile( "movq %0, %%mm5 \n\t" "movq %1, %%mm4 \n\t" ::"m"(ff_pb_3_1), "m"(ff_pb_7_3) @@ -650,14 +650,14 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; int b_idx, edge, l; for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { - asm volatile( + __asm__ volatile( "pand %0, %%mm0 \n\t" ::"m"(mask_dir) ); if(!(mask_mv & edge)) { - asm volatile("pxor %%mm0, %%mm0 \n\t":); + __asm__ volatile("pxor %%mm0, %%mm0 \n\t":); for( l = bidir; l >= 0; l-- ) { - asm volatile( + __asm__ volatile( "movd %0, %%mm1 \n\t" "punpckldq %1, %%mm1 \n\t" "movq %%mm1, %%mm2 \n\t" @@ -688,7 +688,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ); } } - asm volatile( + __asm__ volatile( "movd %0, %%mm1 \n\t" "por %1, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" @@ -696,7 +696,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ::"m"(nnz[b_idx]), "m"(nnz[b_idx+d_idx]) ); - asm volatile( + __asm__ volatile( "pcmpeqw %%mm7, %%mm0 \n\t" "pcmpeqw %%mm7, %%mm0 \n\t" "psrlw $15, %%mm0 \n\t" // nonzero -> 1 @@ -713,7 +713,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] edges = 4; step = 1; } - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" "movq 8(%0), %%mm1 \n\t" "movq 16(%0), %%mm2 \n\t" @@ -774,7 +774,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=4;\ \ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %5, %%mm4 \n\t"\ "movq %6, %%mm5 \n\t"\ @@ -813,14 +813,14 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin }\ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=4;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %0, %%mm4 \n\t"\ "movq %1, %%mm5 \n\t"\ :: "m"(ff_pw_5), "m"(ff_pw_16)\ );\ do{\ - asm volatile(\ + __asm__ volatile(\ "movd -1(%0), %%mm1 \n\t"\ "movd (%0), %%mm2 \n\t"\ "movd 1(%0), %%mm3 \n\t"\ @@ -857,7 +857,7 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, }\ static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ src -= 2*srcStride;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -889,7 +889,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in int w=3;\ src -= 2*srcStride+2;\ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -919,7 +919,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in src += 4 - 9*srcStride;\ }\ tmp -= 3*4;\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ "paddw 10(%0), %%mm0 \n\t"\ @@ -948,7 +948,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in \ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %5, %%mm6 \n\t"\ "1: \n\t"\ @@ -1005,13 +1005,13 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uin \ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %0, %%mm6 \n\t"\ :: "m"(ff_pw_5)\ );\ do{\ - asm volatile(\ + __asm__ volatile(\ "movq (%0), %%mm0 \n\t"\ "movq 1(%0), %%mm2 \n\t"\ "movq %%mm0, %%mm1 \n\t"\ @@ -1071,7 +1071,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, src -= 2*srcStride;\ \ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -1102,7 +1102,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, : "memory"\ );\ if(h==16){\ - asm volatile(\ + __asm__ volatile(\ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ @@ -1125,7 +1125,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ int w = (size+8)>>2;\ src -= 2*srcStride+2;\ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -1155,7 +1155,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ : "memory"\ );\ if(size==16){\ - asm volatile(\ + __asm__ volatile(\ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\ QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\ @@ -1177,7 +1177,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_ int w = size>>4;\ do{\ int h = size;\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ "movq 8(%0), %%mm3 \n\t"\ @@ -1261,7 +1261,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, \ static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ {\ - asm volatile(\ + __asm__ volatile(\ "movq (%1), %%mm0 \n\t"\ "movq 24(%1), %%mm1 \n\t"\ "psraw $5, %%mm0 \n\t"\ @@ -1291,7 +1291,7 @@ static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_ static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ {\ do{\ - asm volatile(\ + __asm__ volatile(\ "movq (%1), %%mm0 \n\t"\ "movq 8(%1), %%mm1 \n\t"\ "movq 48(%1), %%mm2 \n\t"\ @@ -1325,7 +1325,7 @@ static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, u #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=16;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm15, %%xmm15 \n\t"\ "movdqa %6, %%xmm14 \n\t"\ "movdqa %7, %%xmm13 \n\t"\ @@ -1403,13 +1403,13 @@ static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ "movdqa %0, %%xmm6 \n\t"\ :: "m"(ff_pw_5)\ );\ do{\ - asm volatile(\ + __asm__ volatile(\ "lddqu -5(%0), %%xmm1 \n\t"\ "movdqa %%xmm1, %%xmm0 \n\t"\ "punpckhbw %%xmm7, %%xmm1 \n\t"\ @@ -1450,7 +1450,7 @@ QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ \ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ "movdqa %5, %%xmm6 \n\t"\ "1: \n\t"\ @@ -1501,7 +1501,7 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ src -= 2*srcStride;\ \ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ "movq (%0), %%xmm0 \n\t"\ "add %2, %0 \n\t"\ @@ -1532,7 +1532,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, : "memory"\ );\ if(h==16){\ - asm volatile(\ + __asm__ volatile(\ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\ QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\ @@ -1560,7 +1560,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u int w = (size+8)>>3; src -= 2*srcStride+2; while(w--){ - asm volatile( + __asm__ volatile( "pxor %%xmm7, %%xmm7 \n\t" "movq (%0), %%xmm0 \n\t" "add %2, %0 \n\t" @@ -1590,7 +1590,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u : "memory" ); if(size==16){ - asm volatile( + __asm__ volatile( QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48) QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48) QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48) @@ -1613,7 +1613,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ int h = size;\ if(size == 16){\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movdqa 32(%0), %%xmm4 \n\t"\ "movdqa 16(%0), %%xmm5 \n\t"\ @@ -1668,7 +1668,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_ : "memory"\ );\ }else{\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movdqa 16(%0), %%xmm1 \n\t"\ "movdqa (%0), %%xmm0 \n\t"\ @@ -2022,7 +2022,7 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de int x, y; offset <<= log2_denom; offset += (1 << log2_denom) >> 1; - asm volatile( + __asm__ volatile( "movd %0, %%mm4 \n\t" "movd %1, %%mm5 \n\t" "movd %2, %%mm6 \n\t" @@ -2033,7 +2033,7 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de ); for(y=0; y<h; y+=2){ for(x=0; x<w; x+=4){ - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "movd %1, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -2060,7 +2060,7 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str { int x, y; offset = ((offset + 1) | 1) << log2_denom; - asm volatile( + __asm__ volatile( "movd %0, %%mm3 \n\t" "movd %1, %%mm4 \n\t" "movd %2, %%mm5 \n\t" @@ -2073,7 +2073,7 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str ); for(y=0; y<h; y++){ for(x=0; x<w; x+=4){ - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "movd %1, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" |