| Index: source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c
|
| ===================================================================
|
| --- source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c (revision 59334)
|
| +++ source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c (working copy)
|
| @@ -22,23 +22,18 @@
|
| * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
|
| */
|
|
|
| +#include "libavutil/cpu.h"
|
| #include "libavutil/x86_cpu.h"
|
| #include "libavcodec/dsputil.h"
|
| #include "libavcodec/h264dsp.h"
|
| #include "libavcodec/mpegvideo.h"
|
| #include "libavcodec/simple_idct.h"
|
| #include "dsputil_mmx.h"
|
| -#include "vp3dsp_mmx.h"
|
| -#include "vp3dsp_sse2.h"
|
| -#include "vp6dsp_mmx.h"
|
| -#include "vp6dsp_sse2.h"
|
| #include "idct_xvid.h"
|
|
|
| //#undef NDEBUG
|
| //#include <assert.h>
|
|
|
| -int mm_flags; /* multimedia extension flags */
|
| -
|
| /* pixel operations */
|
| DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
|
| DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
|
| @@ -66,6 +61,7 @@
|
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
|
| DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
|
|
|
| +DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x0000000000000000ULL};
|
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x0101010101010101ULL};
|
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x0303030303030303ULL};
|
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x0404040404040404ULL};
|
| @@ -74,7 +70,7 @@
|
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
|
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x8080808080808080ULL};
|
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
|
| -DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
|
| +DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1 ) = {0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A1A1A1A1A1A1ULL};
|
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F8F8F8F8F8F8ULL};
|
| DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
|
| DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL};
|
| @@ -232,7 +228,7 @@
|
| /***********************************/
|
| /* standard MMX */
|
|
|
| -void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
| +void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
| {
|
| const DCTELEM *p;
|
| uint8_t *pix;
|
| @@ -308,7 +304,7 @@
|
| "movq %%mm3, (%0, %3, 2) \n\t"\
|
| "movq %%mm4, (%0, %1) \n\t"
|
|
|
| -void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
| +void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
| {
|
| x86_reg line_skip = line_size;
|
| x86_reg line_skip3;
|
| @@ -324,7 +320,7 @@
|
| :"memory");
|
| }
|
|
|
| -void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
| +void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
|
| {
|
| const DCTELEM *p;
|
| uint8_t *pix;
|
| @@ -732,35 +728,6 @@
|
| }
|
| }
|
|
|
| -static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
|
| - __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
|
| - "movd %4, %%mm0 \n\t"
|
| - "movd %5, %%mm1 \n\t"
|
| - "movd %6, %%mm2 \n\t"
|
| - "movd %7, %%mm3 \n\t"
|
| - "punpcklbw %%mm1, %%mm0 \n\t"
|
| - "punpcklbw %%mm3, %%mm2 \n\t"
|
| - "movq %%mm0, %%mm1 \n\t"
|
| - "punpcklwd %%mm2, %%mm0 \n\t"
|
| - "punpckhwd %%mm2, %%mm1 \n\t"
|
| - "movd %%mm0, %0 \n\t"
|
| - "punpckhdq %%mm0, %%mm0 \n\t"
|
| - "movd %%mm0, %1 \n\t"
|
| - "movd %%mm1, %2 \n\t"
|
| - "punpckhdq %%mm1, %%mm1 \n\t"
|
| - "movd %%mm1, %3 \n\t"
|
| -
|
| - : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
|
| - "=m" (*(uint32_t*)(dst + 1*dst_stride)),
|
| - "=m" (*(uint32_t*)(dst + 2*dst_stride)),
|
| - "=m" (*(uint32_t*)(dst + 3*dst_stride))
|
| - : "m" (*(uint32_t*)(src + 0*src_stride)),
|
| - "m" (*(uint32_t*)(src + 1*src_stride)),
|
| - "m" (*(uint32_t*)(src + 2*src_stride)),
|
| - "m" (*(uint32_t*)(src + 3*src_stride))
|
| - );
|
| -}
|
| -
|
| static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
|
| if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
|
| const int strength= ff_h263_loop_filter_strength[qscale];
|
| @@ -1829,9 +1796,60 @@
|
| PREFETCH(prefetch_3dnow, prefetch)
|
| #undef PREFETCH
|
|
|
| -#include "h264dsp_mmx.c"
|
| -#include "rv40dsp_mmx.c"
|
| +#include "h264_qpel_mmx.c"
|
|
|
| +void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +
|
| +void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +
|
| +void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +
|
| +void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +
|
| +void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
|
| + int stride, int h, int x, int y);
|
| +
|
| +
|
| /* CAVS specific */
|
| void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
|
| put_pixels8_mmx(dst, src, stride, 8);
|
| @@ -1860,43 +1878,43 @@
|
| static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_mmx_idct (block);
|
| - put_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_put_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_mmx_idct (block);
|
| - add_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_add_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_mmxext_idct (block);
|
| - put_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_put_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_mmxext_idct (block);
|
| - add_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_add_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| #endif
|
| static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_idct_xvid_mmx (block);
|
| - put_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_put_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_idct_xvid_mmx (block);
|
| - add_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_add_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_idct_xvid_mmx2 (block);
|
| - put_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_put_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
| static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
|
| {
|
| ff_idct_xvid_mmx2 (block);
|
| - add_pixels_clamped_mmx(block, dest, line_size);
|
| + ff_add_pixels_clamped_mmx(block, dest, line_size);
|
| }
|
|
|
| static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
|
| @@ -2385,6 +2403,19 @@
|
| );
|
| }
|
|
|
| +void ff_vp3_idct_mmx(int16_t *input_data);
|
| +void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
|
| +void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
|
| +
|
| +void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block);
|
| +
|
| +void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
|
| +void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
|
| +
|
| +void ff_vp3_idct_sse2(int16_t *input_data);
|
| +void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
|
| +void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
|
| +
|
| void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
|
| void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
|
| void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
|
| @@ -2396,20 +2427,8 @@
|
| void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
|
| int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left);
|
| int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, int left);
|
| -void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
|
| -void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
|
| -void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
|
| -void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
|
| -void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
|
|
|
| -#if HAVE_YASM && ARCH_X86_32
|
| -void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
|
| -static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
|
| -{
|
| - ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
|
| - ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
|
| -}
|
| -#elif !HAVE_YASM
|
| +#if !HAVE_YASM
|
| #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
|
| #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
|
| #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
|
| @@ -2509,10 +2528,10 @@
|
|
|
| void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
|
| {
|
| - mm_flags = mm_support();
|
| + int mm_flags = av_get_cpu_flags();
|
|
|
| if (avctx->dsp_mask) {
|
| - if (avctx->dsp_mask & FF_MM_FORCE)
|
| + if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
|
| mm_flags |= (avctx->dsp_mask & 0xffff);
|
| else
|
| mm_flags &= ~(avctx->dsp_mask & 0xffff);
|
| @@ -2520,20 +2539,20 @@
|
|
|
| #if 0
|
| av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
|
| - if (mm_flags & FF_MM_MMX)
|
| + if (mm_flags & AV_CPU_FLAG_MMX)
|
| av_log(avctx, AV_LOG_INFO, " mmx");
|
| - if (mm_flags & FF_MM_MMX2)
|
| + if (mm_flags & AV_CPU_FLAG_MMX2)
|
| av_log(avctx, AV_LOG_INFO, " mmx2");
|
| - if (mm_flags & FF_MM_3DNOW)
|
| + if (mm_flags & AV_CPU_FLAG_3DNOW)
|
| av_log(avctx, AV_LOG_INFO, " 3dnow");
|
| - if (mm_flags & FF_MM_SSE)
|
| + if (mm_flags & AV_CPU_FLAG_SSE)
|
| av_log(avctx, AV_LOG_INFO, " sse");
|
| - if (mm_flags & FF_MM_SSE2)
|
| + if (mm_flags & AV_CPU_FLAG_SSE2)
|
| av_log(avctx, AV_LOG_INFO, " sse2");
|
| av_log(avctx, AV_LOG_INFO, "\n");
|
| #endif
|
|
|
| - if (mm_flags & FF_MM_MMX) {
|
| + if (mm_flags & AV_CPU_FLAG_MMX) {
|
| const int idct_algo= avctx->idct_algo;
|
|
|
| if(avctx->lowres==0){
|
| @@ -2544,7 +2563,7 @@
|
| c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
|
| #if CONFIG_GPL
|
| }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
|
| - if(mm_flags & FF_MM_MMX2){
|
| + if(mm_flags & AV_CPU_FLAG_MMX2){
|
| c->idct_put= ff_libmpeg2mmx2_idct_put;
|
| c->idct_add= ff_libmpeg2mmx2_idct_add;
|
| c->idct = ff_mmxext_idct;
|
| @@ -2556,8 +2575,8 @@
|
| c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
|
| #endif
|
| }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
|
| - idct_algo==FF_IDCT_VP3){
|
| - if(mm_flags & FF_MM_SSE2){
|
| + idct_algo==FF_IDCT_VP3 && HAVE_YASM){
|
| + if(mm_flags & AV_CPU_FLAG_SSE2){
|
| c->idct_put= ff_vp3_idct_put_sse2;
|
| c->idct_add= ff_vp3_idct_add_sse2;
|
| c->idct = ff_vp3_idct_sse2;
|
| @@ -2571,12 +2590,12 @@
|
| }else if(idct_algo==FF_IDCT_CAVS){
|
| c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
|
| }else if(idct_algo==FF_IDCT_XVIDMMX){
|
| - if(mm_flags & FF_MM_SSE2){
|
| + if(mm_flags & AV_CPU_FLAG_SSE2){
|
| c->idct_put= ff_idct_xvid_sse2_put;
|
| c->idct_add= ff_idct_xvid_sse2_add;
|
| c->idct = ff_idct_xvid_sse2;
|
| c->idct_permutation_type= FF_SSE2_IDCT_PERM;
|
| - }else if(mm_flags & FF_MM_MMX2){
|
| + }else if(mm_flags & AV_CPU_FLAG_MMX2){
|
| c->idct_put= ff_idct_xvid_mmx2_put;
|
| c->idct_add= ff_idct_xvid_mmx2_add;
|
| c->idct = ff_idct_xvid_mmx2;
|
| @@ -2588,12 +2607,12 @@
|
| }
|
| }
|
|
|
| - c->put_pixels_clamped = put_pixels_clamped_mmx;
|
| - c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
|
| - c->add_pixels_clamped = add_pixels_clamped_mmx;
|
| + c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
|
| + c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
|
| + c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
|
| c->clear_block = clear_block_mmx;
|
| c->clear_blocks = clear_blocks_mmx;
|
| - if ((mm_flags & FF_MM_SSE) &&
|
| + if ((mm_flags & AV_CPU_FLAG_SSE) &&
|
| !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){
|
| /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
|
| c->clear_block = clear_block_sse;
|
| @@ -2626,18 +2645,17 @@
|
| c->h263_v_loop_filter= h263_v_loop_filter_mmx;
|
| c->h263_h_loop_filter= h263_h_loop_filter_mmx;
|
| }
|
| - c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;
|
| - c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
|
| - c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd;
|
|
|
| - c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
|
| - c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
|
| +#if HAVE_YASM
|
| + c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
|
| + c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
|
| + c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
|
|
|
| - if (CONFIG_VP6_DECODER) {
|
| - c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
|
| - }
|
| + c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
|
| + c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
|
| +#endif
|
|
|
| - if (mm_flags & FF_MM_MMX2) {
|
| + if (mm_flags & AV_CPU_FLAG_MMX2) {
|
| c->prefetch = prefetch_mmx2;
|
|
|
| c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
|
| @@ -2662,12 +2680,12 @@
|
| c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
|
| c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
|
|
|
| - if (CONFIG_VP3_DECODER) {
|
| + if (CONFIG_VP3_DECODER && HAVE_YASM) {
|
| c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
|
| c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
|
| }
|
| }
|
| - if (CONFIG_VP3_DECODER) {
|
| + if (CONFIG_VP3_DECODER && HAVE_YASM) {
|
| c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
|
| }
|
|
|
| @@ -2714,21 +2732,21 @@
|
| SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
|
| SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
|
|
|
| - c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2;
|
| - c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2;
|
| +#if HAVE_YASM
|
| + c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
|
| + c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
|
|
|
| - c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_nornd;
|
| + c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
|
|
|
| - c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;
|
| - c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
|
| - c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
|
| - c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
|
| + c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
|
| + c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
|
| + c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
|
| + c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
|
|
|
| -#if HAVE_YASM
|
| c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
|
| #endif
|
| #if HAVE_7REGS && HAVE_TEN_OPERANDS
|
| - if( mm_flags&FF_MM_3DNOW )
|
| + if( mm_flags&AV_CPU_FLAG_3DNOW )
|
| c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
|
| #endif
|
|
|
| @@ -2736,7 +2754,7 @@
|
| ff_vc1dsp_init_mmx(c, avctx);
|
|
|
| c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
|
| - } else if (mm_flags & FF_MM_3DNOW) {
|
| + } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
|
| c->prefetch = prefetch_3dnow;
|
|
|
| c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
|
| @@ -2787,11 +2805,15 @@
|
| SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
|
| SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
|
|
|
| - c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd;
|
| - c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
|
| +#if HAVE_YASM
|
| + c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
|
| + c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
|
|
|
| - c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow;
|
| - c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow;
|
| + c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
|
| +
|
| + c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
|
| + c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
|
| +#endif
|
| }
|
|
|
|
|
| @@ -2800,13 +2822,13 @@
|
| c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
|
| c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
|
| c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
|
| - if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
|
| + if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
|
| // these functions are slower than mmx on AMD, but faster on Intel
|
| c->put_pixels_tab[0][0] = put_pixels16_sse2;
|
| c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
|
| H264_QPEL_FUNCS(0, 0, sse2);
|
| }
|
| - if(mm_flags & FF_MM_SSE2){
|
| + if(mm_flags & AV_CPU_FLAG_SSE2){
|
| H264_QPEL_FUNCS(0, 1, sse2);
|
| H264_QPEL_FUNCS(0, 2, sse2);
|
| H264_QPEL_FUNCS(0, 3, sse2);
|
| @@ -2819,13 +2841,9 @@
|
| H264_QPEL_FUNCS(3, 1, sse2);
|
| H264_QPEL_FUNCS(3, 2, sse2);
|
| H264_QPEL_FUNCS(3, 3, sse2);
|
| -
|
| - if (CONFIG_VP6_DECODER) {
|
| - c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
|
| - }
|
| }
|
| #if HAVE_SSSE3
|
| - if(mm_flags & FF_MM_SSSE3){
|
| + if(mm_flags & AV_CPU_FLAG_SSSE3){
|
| H264_QPEL_FUNCS(1, 0, ssse3);
|
| H264_QPEL_FUNCS(1, 1, ssse3);
|
| H264_QPEL_FUNCS(1, 2, ssse3);
|
| @@ -2838,22 +2856,22 @@
|
| H264_QPEL_FUNCS(3, 1, ssse3);
|
| H264_QPEL_FUNCS(3, 2, ssse3);
|
| H264_QPEL_FUNCS(3, 3, ssse3);
|
| - c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nornd;
|
| - c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nornd;
|
| - c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
|
| - c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
|
| - c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
|
| - c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
|
| c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
|
| #if HAVE_YASM
|
| + c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
|
| + c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
|
| + c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
|
| + c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
|
| + c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
|
| + c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
|
| c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
|
| - if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe
|
| + if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
|
| c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
|
| #endif
|
| }
|
| #endif
|
|
|
| - if(mm_flags & FF_MM_3DNOW){
|
| + if(mm_flags & AV_CPU_FLAG_3DNOW){
|
| c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
|
| c->vector_fmul = vector_fmul_3dnow;
|
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
| @@ -2861,20 +2879,20 @@
|
| c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
|
| }
|
| }
|
| - if(mm_flags & FF_MM_3DNOWEXT){
|
| + if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
|
| c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
|
| c->vector_fmul_window = vector_fmul_window_3dnow2;
|
| if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
|
| c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
|
| }
|
| }
|
| - if(mm_flags & FF_MM_MMX2){
|
| + if(mm_flags & AV_CPU_FLAG_MMX2){
|
| #if HAVE_YASM
|
| c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;
|
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmx2;
|
| #endif
|
| }
|
| - if(mm_flags & FF_MM_SSE){
|
| + if(mm_flags & AV_CPU_FLAG_SSE){
|
| c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
|
| c->ac3_downmix = ac3_downmix_sse;
|
| c->vector_fmul = vector_fmul_sse;
|
| @@ -2889,9 +2907,9 @@
|
| c->scalarproduct_float = ff_scalarproduct_float_sse;
|
| #endif
|
| }
|
| - if(mm_flags & FF_MM_3DNOW)
|
| + if(mm_flags & AV_CPU_FLAG_3DNOW)
|
| c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
|
| - if(mm_flags & FF_MM_SSE2){
|
| + if(mm_flags & AV_CPU_FLAG_SSE2){
|
| c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
|
| c->float_to_int16 = float_to_int16_sse2;
|
| c->float_to_int16_interleave = float_to_int16_interleave_sse2;
|
| @@ -2900,7 +2918,7 @@
|
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
|
| #endif
|
| }
|
| - if((mm_flags & FF_MM_SSSE3) && !(mm_flags & (FF_MM_SSE42|FF_MM_3DNOW)) && HAVE_YASM) // cachesplit
|
| + if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit
|
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3;
|
| }
|
|
|
| @@ -2942,89 +2960,3 @@
|
| //ff_idct = just_return;
|
| #endif
|
| }
|
| -
|
| -#if CONFIG_H264DSP
|
| -void ff_h264dsp_init_x86(H264DSPContext *c)
|
| -{
|
| - mm_flags = mm_support();
|
| -
|
| - if (mm_flags & FF_MM_MMX) {
|
| - c->h264_idct_dc_add=
|
| - c->h264_idct_add= ff_h264_idct_add_mmx;
|
| - c->h264_idct8_dc_add=
|
| - c->h264_idct8_add= ff_h264_idct8_add_mmx;
|
| -
|
| - c->h264_idct_add16 = ff_h264_idct_add16_mmx;
|
| - c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
|
| - c->h264_idct_add8 = ff_h264_idct_add8_mmx;
|
| - c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
|
| -
|
| - if (mm_flags & FF_MM_MMX2) {
|
| - c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
|
| - c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
|
| - c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
|
| - c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
|
| - c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
|
| - c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
|
| -
|
| - c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
|
| - c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
|
| - c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
|
| - c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
|
| - c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
|
| - c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
|
| - c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
|
| -
|
| - c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
|
| - c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
|
| - c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
|
| - c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
|
| - c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
|
| - c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
|
| - c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
|
| - c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
|
| -
|
| - c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
|
| - c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
|
| - c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
|
| - c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
|
| - c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
|
| - c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
|
| - c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
|
| - c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
|
| - }
|
| - if(mm_flags & FF_MM_SSE2){
|
| - c->h264_idct8_add = ff_h264_idct8_add_sse2;
|
| - c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
|
| - }
|
| -
|
| -#if HAVE_YASM
|
| - if (mm_flags & FF_MM_MMX2){
|
| -#if ARCH_X86_32
|
| - c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
|
| - c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
|
| -#endif
|
| - if( mm_flags&FF_MM_SSE2 ){
|
| - c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
|
| - c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
|
| -#if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
|
| - c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
|
| - c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
|
| - c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
|
| - c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
|
| -#endif
|
| -#if CONFIG_GPL
|
| - c->h264_idct_add16 = ff_h264_idct_add16_sse2;
|
| - c->h264_idct_add8 = ff_h264_idct_add8_sse2;
|
| - c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
|
| -#endif
|
| - }
|
| - if ( mm_flags&FF_MM_SSSE3 ){
|
| - c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
|
| - c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
|
| - }
|
| - }
|
| -#endif
|
| - }
|
| -}
|
| -#endif /* CONFIG_H264DSP */
|
|
|