diff options
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.c | 32 | ||||
-rw-r--r-- | libavcodec/x86/h264_intrapred_init.c | 6 | ||||
-rw-r--r-- | libavcodec/x86/h264dsp_mmx.c | 4 |
3 files changed, 40 insertions, 2 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c index f98e6ae0fa..a0cb11aa40 100644 --- a/libavcodec/x86/dsputil_mmx.c +++ b/libavcodec/x86/dsputil_mmx.c @@ -2418,6 +2418,7 @@ float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { int mm_flags = av_get_cpu_flags(); + const int high_bit_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; if (avctx->dsp_mask) { if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) @@ -2499,6 +2500,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->put_pixels_clamped = ff_put_pixels_clamped_mmx; c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; c->add_pixels_clamped = ff_add_pixels_clamped_mmx; + if (!high_bit_depth) { c->clear_block = clear_block_mmx; c->clear_blocks = clear_blocks_mmx; if ((mm_flags & AV_CPU_FLAG_SSE) && @@ -2507,6 +2509,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->clear_block = clear_block_sse; c->clear_blocks = clear_blocks_sse; } + } #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ @@ -2514,6 +2517,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU + if (!high_bit_depth) { SET_HPEL_FUNCS(put, 0, 16, mmx); SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx); SET_HPEL_FUNCS(avg, 0, 16, mmx); @@ -2522,17 +2526,20 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx); SET_HPEL_FUNCS(avg, 1, 8, mmx); SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx); + } #if ARCH_X86_32 || !HAVE_YASM c->gmc= gmc_mmx; #endif #if ARCH_X86_32 && HAVE_YASM + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_mmx; #endif c->add_bytes= add_bytes_mmx; c->add_bytes_l2= add_bytes_l2_mmx; + if (!high_bit_depth) c->draw_edges = draw_edges_mmx; if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { @@ -2541,8 +2548,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } #if HAVE_YASM + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; + } c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; @@ -2551,6 +2560,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) if (mm_flags & AV_CPU_FLAG_MMX2) { c->prefetch = prefetch_mmx2; + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; @@ -2564,14 +2574,17 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; + } if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + if (!high_bit_depth) { c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + } if (CONFIG_VP3_DECODER && HAVE_YASM) { c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; @@ -2613,12 +2626,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2); + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); + } SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); @@ -2629,10 +2644,12 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; + } c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; #endif @@ -2645,6 +2662,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } else if (mm_flags & AV_CPU_FLAG_3DNOW) { c->prefetch = prefetch_3dnow; + if (!high_bit_depth) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; @@ -2667,6 +2685,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; } + } if (CONFIG_VP3_DECODER && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { @@ -2681,12 +2700,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow); + if (!high_bit_depth) { SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); + } SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); @@ -2694,8 +2715,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); #if HAVE_YASM + if (!high_bit_depth) { c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; + } c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; @@ -2710,12 +2733,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){ // these functions are slower than mmx on AMD, but faster on Intel + if (!high_bit_depth) { c->put_pixels_tab[0][0] = put_pixels16_sse2; c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2; c->avg_pixels_tab[0][0] = avg_pixels16_sse2; H264_QPEL_FUNCS(0, 0, sse2); + } } if(mm_flags & AV_CPU_FLAG_SSE2){ + if (!high_bit_depth) { H264_QPEL_FUNCS(0, 1, sse2); H264_QPEL_FUNCS(0, 2, sse2); H264_QPEL_FUNCS(0, 3, sse2); @@ -2728,9 +2754,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, sse2); H264_QPEL_FUNCS(3, 2, sse2); H264_QPEL_FUNCS(3, 3, sse2); + } } #if HAVE_SSSE3 if(mm_flags & AV_CPU_FLAG_SSSE3){ + if (!high_bit_depth) { H264_QPEL_FUNCS(1, 0, ssse3); H264_QPEL_FUNCS(1, 1, ssse3); H264_QPEL_FUNCS(1, 2, ssse3); @@ -2743,12 +2771,15 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) H264_QPEL_FUNCS(3, 1, ssse3); H264_QPEL_FUNCS(3, 2, ssse3); H264_QPEL_FUNCS(3, 3, ssse3); + } c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; #if HAVE_YASM + if (!high_bit_depth) { c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3; + } c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; @@ -2805,6 +2836,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) } } + if (!high_bit_depth) c->emulated_edge_mc = emulated_edge_mc_sse; c->gmc= gmc_sse; #endif diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c index 4142cc1edd..9eb752581b 100644 --- a/libavcodec/x86/h264_intrapred_init.c +++ b/libavcodec/x86/h264_intrapred_init.c @@ -95,9 +95,13 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); -void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) +void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) { int mm_flags = av_get_cpu_flags(); + const int high_depth = bit_depth > 8; + + if (high_depth) + return; #if HAVE_YASM if (mm_flags & AV_CPU_FLAG_MMX) { diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index 9f004a5314..b4936a618e 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -285,10 +285,11 @@ H264_BIWEIGHT_MMX ( 4, 8) H264_BIWEIGHT_MMX ( 4, 4) H264_BIWEIGHT_MMX ( 4, 2) -void ff_h264dsp_init_x86(H264DSPContext *c) +void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) { int mm_flags = av_get_cpu_flags(); + if (bit_depth == 8) { if (mm_flags & AV_CPU_FLAG_MMX2) { c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; } @@ -378,5 +379,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c) } } } + } #endif } |