patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c - Issue 789004: ffmpeg roll of source to mar 9 version...

Side by Side Diff: patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« patched-ffmpeg-mt/libavcodec/mpeg4video_es_bsf.c ('K') | « patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h ('k') | patched-ffmpeg-mt/libavcodec/x86/dsputilenc_mmx.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 /*	1 /*

2 * MMX optimized DSP utils	2 * MMX optimized DSP utils

3 * Copyright (c) 2000, 2001 Fabrice Bellard	3 * Copyright (c) 2000, 2001 Fabrice Bellard

4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>	4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>

5 *	5 *

6 * This file is part of FFmpeg.	6 * This file is part of FFmpeg.

7 *	7 *

8 * FFmpeg is free software; you can redistribute it and/or	8 * FFmpeg is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Lesser General Public	9 * modify it under the terms of the GNU Lesser General Public

10 * License as published by the Free Software Foundation; either	10 * License as published by the Free Software Foundation; either

(...skipping 21 matching lines...) Expand all Loading...
32 #include "vp6dsp_mmx.h"	32 #include "vp6dsp_mmx.h"

33 #include "vp6dsp_sse2.h"	33 #include "vp6dsp_sse2.h"

34 #include "idct_xvid.h"	34 #include "idct_xvid.h"

35	35

36 //#undef NDEBUG	36 //#undef NDEBUG

37 //#include <assert.h>	37 //#include <assert.h>

38	38

39 int mm_flags; /* multimedia extension flags */	39 int mm_flags; /* multimedia extension flags */

40	40

41 /* pixel operations */	41 /* pixel operations */

42 DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;	42 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;

43 DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;	43 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

44	44

45 DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] =	45 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =

46 {0x8000000080000000ULL, 0x8000000080000000ULL};	46 {0x8000000080000000ULL, 0x8000000080000000ULL};

47	47

48 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;	48 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;

49 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;	49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;

50 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x000500 0500050005ULL};	50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050 00500050005ULL};

51 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x000800 0800080008ULL};	51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080 00800080008ULL};

52 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;	52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;

53 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x001000 1000100010ULL};	53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100 01000100010ULL};

54 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;	54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;

55 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C00 1C001C001CULL};	55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0 01C001C001CULL};

56 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x002000 2000200020ULL};	56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200 02000200020ULL};

57 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;	57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;

58 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x004000 4000400040ULL};	58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 04000400040ULL};

59 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;	59 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;

60 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;	60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;

61 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;	61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;

62	62

63 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;	63 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;

64 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;	64 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;

65 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;	65 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;

66 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;	66 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;

67 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;	67 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;

68 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;	68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;

69 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;	69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;

70 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;	70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;

71	71

72 DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 };	72 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };

73 DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 };	73 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };

74	74

75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)	75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)

76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)	76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)

77	77

78 #define MOVQ_BFE(regd) \	78 #define MOVQ_BFE(regd) \

79 __asm__ volatile ( \	79 __asm__ volatile ( \

80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\	80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\

81 "paddb %%" #regd ", %%" #regd " \n\t" ::)	81 "paddb %%" #regd ", %%" #regd " \n\t" ::)

82	82

83 #ifndef PIC	83 #ifndef PIC

(...skipping 1732 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1816 }while(--h);\	1816 }while(--h);\

1817 }	1817 }

1818 PREFETCH(prefetch_mmx2, prefetcht0)	1818 PREFETCH(prefetch_mmx2, prefetcht0)

1819 PREFETCH(prefetch_3dnow, prefetch)	1819 PREFETCH(prefetch_3dnow, prefetch)

1820 #undef PREFETCH	1820 #undef PREFETCH

1821	1821

1822 #include "h264dsp_mmx.c"	1822 #include "h264dsp_mmx.c"

1823 #include "rv40dsp_mmx.c"	1823 #include "rv40dsp_mmx.c"

1824	1824

1825 /* CAVS specific */	1825 /* CAVS specific */

1826 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);

1827 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);

1828

1829 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1826 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1830 put_pixels8_mmx(dst, src, stride, 8);	1827 put_pixels8_mmx(dst, src, stride, 8);

1831 }	1828 }

1832 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1829 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1833 avg_pixels8_mmx(dst, src, stride, 8);	1830 avg_pixels8_mmx(dst, src, stride, 8);

1834 }	1831 }

1835 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1832 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1836 put_pixels16_mmx(dst, src, stride, 16);	1833 put_pixels16_mmx(dst, src, stride, 16);

1837 }	1834 }

1838 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1835 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1839 avg_pixels16_mmx(dst, src, stride, 16);	1836 avg_pixels16_mmx(dst, src, stride, 16);

1840 }	1837 }

1841	1838

1842 /* VC1 specific */	1839 /* VC1 specific */

1843 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);

1844

1845 void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, int stride, int rnd) {	1840 void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, int stride, int rnd) {

1846 put_pixels8_mmx(dst, src, stride, 8);	1841 put_pixels8_mmx(dst, src, stride, 8);

1847 }	1842 }

1848 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t dst, const uint8_t src, int stride, in t rnd) {	1843 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t dst, const uint8_t src, int stride, in t rnd) {

1849 avg_pixels8_mmx2(dst, src, stride, 8);	1844 avg_pixels8_mmx2(dst, src, stride, 8);

1850 }	1845 }

1851	1846

1852 /* external functions, from idct_mmx.c */

1853 void ff_mmx_idct(DCTELEM *block);

1854 void ff_mmxext_idct(DCTELEM *block);

1855

1856 /* XXX: those functions should be suppressed ASAP when all IDCTs are	1847 /* XXX: those functions should be suppressed ASAP when all IDCTs are

1857 converted */	1848 converted */

1858 #if CONFIG_GPL	1849 #if CONFIG_GPL

1859 static void ff_libmpeg2mmx_idct_put(uint8_t dest, int line_size, DCTELEM block )	1850 static void ff_libmpeg2mmx_idct_put(uint8_t dest, int line_size, DCTELEM block )

1860 {	1851 {

1861 ff_mmx_idct (block);	1852 ff_mmx_idct (block);

1862 put_pixels_clamped_mmx(block, dest, line_size);	1853 put_pixels_clamped_mmx(block, dest, line_size);

1863 }	1854 }

1864 static void ff_libmpeg2mmx_idct_add(uint8_t dest, int line_size, DCTELEM block )	1855 static void ff_libmpeg2mmx_idct_add(uint8_t dest, int line_size, DCTELEM block )

1865 {	1856 {

(...skipping 158 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2024 {	2015 {

2025 int (matrix_cmp)[2] = (int()[2])matrix;	2016 int (matrix_cmp)[2] = (int()[2])matrix;

2026 intptr_t i,j,k;	2017 intptr_t i,j,k;

2027	2018

2028 i = -len*sizeof(float);	2019 i = -len*sizeof(float);

2029 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]\|matrix_cmp[2][0]\|matrix_c mp[3][1]\|matrix_cmp[4][0]\|(matrix_cmp[1][0]^matrix_cmp[1][1])\|(matrix_cmp[0][0]^ matrix_cmp[2][1]))) {	2020 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]\|matrix_cmp[2][0]\|matrix_c mp[3][1]\|matrix_cmp[4][0]\|(matrix_cmp[1][0]^matrix_cmp[1][1])\|(matrix_cmp[0][0]^ matrix_cmp[2][1]))) {

2030 MIX5(IF0,IF1);	2021 MIX5(IF0,IF1);

2031 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {	2022 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {

2032 MIX5(IF1,IF0);	2023 MIX5(IF1,IF0);

2033 } else {	2024 } else {

2034 DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4];	2025 DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];

2035 j = 2in_chsizeof(float);	2026 j = 2in_chsizeof(float);

2036 __asm__ volatile(	2027 __asm__ volatile(

2037 "1: \n"	2028 "1: \n"

2038 "sub $8, %0 \n"	2029 "sub $8, %0 \n"

2039 "movss (%2,%0), %%xmm6 \n"	2030 "movss (%2,%0), %%xmm6 \n"

2040 "movss 4(%2,%0), %%xmm7 \n"	2031 "movss 4(%2,%0), %%xmm7 \n"

2041 "shufps $0, %%xmm6, %%xmm6 \n"	2032 "shufps $0, %%xmm6, %%xmm6 \n"

2042 "shufps $0, %%xmm7, %%xmm7 \n"	2033 "shufps $0, %%xmm7, %%xmm7 \n"

2043 "movaps %%xmm6, (%1,%0,4) \n"	2034 "movaps %%xmm6, (%1,%0,4) \n"

2044 "movaps %%xmm7, 16(%1,%0,4) \n"	2035 "movaps %%xmm7, 16(%1,%0,4) \n"

(...skipping 366 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2411 #elif !HAVE_YASM	2402 #elif !HAVE_YASM

2412 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6)	2403 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6)

2413 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)	2404 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)

2414 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)	2405 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)

2415 #endif	2406 #endif

2416 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse	2407 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse

2417	2408

2418 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \	2409 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \

2419 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\	2410 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\

2420 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t dst, const float *src, long len, int channels){\	2411 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t dst, const float *src, long len, int channels){\

2421 DECLARE_ALIGNED_16(int16_t, tmp)[len];\	2412 DECLARE_ALIGNED(16, int16_t, tmp)[len];\

2422 int i,j,c;\	2413 int i,j,c;\

2423 for(c=0; c<channels; c++){\	2414 for(c=0; c<channels; c++){\

2424 float_to_int16_##cpu(tmp, src[c], len);\	2415 float_to_int16_##cpu(tmp, src[c], len);\

2425 for(i=0, j=c; i<len; i++, j+=channels)\	2416 for(i=0, j=c; i<len; i++, j+=channels)\

2426 dst[j] = tmp[i];\	2417 dst[j] = tmp[i];\

2427 }\	2418 }\

2428 }\	2419 }\

2429 \	2420 \

2430 static void float_to_int16_interleave_##cpu(int16_t dst, const float *src, lon g len, int channels){\	2421 static void float_to_int16_interleave_##cpu(int16_t dst, const float *src, lon g len, int channels){\

2431 if(channels==1)\	2422 if(channels==1)\

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2497 "js 1b \n"	2488 "js 1b \n"

2498 )	2489 )

2499	2490

2500 static void float_to_int16_interleave_3dn2(int16_t dst, const float *src, long len, int channels){	2491 static void float_to_int16_interleave_3dn2(int16_t dst, const float *src, long len, int channels){

2501 if(channels==6)	2492 if(channels==6)

2502 ff_float_to_int16_interleave6_3dn2(dst, src, len);	2493 ff_float_to_int16_interleave6_3dn2(dst, src, len);

2503 else	2494 else

2504 float_to_int16_interleave_3dnow(dst, src, len, channels);	2495 float_to_int16_interleave_3dnow(dst, src, len, channels);

2505 }	2496 }

2506	2497

2507

2508 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);

2509 void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);

2510 void ff_snow_vertical_compose97i_sse2(IDWTELEM b0, IDWTELEM b1, IDWTELEM b2, IDWTELEM b3, IDWTELEM b4, IDWTELEM b5, int width);

2511 void ff_snow_vertical_compose97i_mmx(IDWTELEM b0, IDWTELEM b1, IDWTELEM b2, I DWTELEM b3, IDWTELEM b4, IDWTELEM b5, int width);

2512 void ff_snow_inner_add_yblock_sse2(const uint8_t obmc, const int obmc_stride, u int8_t * block, int b_w, int b_h,

2513 int src_x, int src_y, int src_stride, slice_b uffer * sb, int add, uint8_t * dst8);

2514 void ff_snow_inner_add_yblock_mmx(const uint8_t obmc, const int obmc_stride, ui nt8_t * block, int b_w, int b_h,

2515 int src_x, int src_y, int src_stride, slice_bu ffer * sb, int add, uint8_t * dst8);

2516

2517

2518 float ff_scalarproduct_float_sse(const float v1, const float v2, int order);	2498 float ff_scalarproduct_float_sse(const float v1, const float v2, int order);

2519	2499

2520 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)	2500 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)

2521 {	2501 {

2522 mm_flags = mm_support();	2502 mm_flags = mm_support();

2523	2503

2524 if (avctx->dsp_mask) {	2504 if (avctx->dsp_mask) {

2525 if (avctx->dsp_mask & FF_MM_FORCE)	2505 if (avctx->dsp_mask & FF_MM_FORCE)

2526 mm_flags \|= (avctx->dsp_mask & 0xffff);	2506 mm_flags \|= (avctx->dsp_mask & 0xffff);

2527 else	2507 else

(...skipping 310 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2838 }	2818 }

2839	2819

2840	2820

2841 #define H264_QPEL_FUNCS(x, y, CPU)\	2821 #define H264_QPEL_FUNCS(x, y, CPU)\

2842 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\	2822 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\

2843 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\	2823 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\

2844 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\	2824 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\

2845 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU;	2825 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU;

2846 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){	2826 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){

2847 // these functions are slower than mmx on AMD, but faster on Intel	2827 // these functions are slower than mmx on AMD, but faster on Intel

2848 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma

2849 c->put_pixels_tab[0][0] = put_pixels16_sse2;	2828 c->put_pixels_tab[0][0] = put_pixels16_sse2;

2850 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;	2829 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;

2851 */

2852 H264_QPEL_FUNCS(0, 0, sse2);	2830 H264_QPEL_FUNCS(0, 0, sse2);

2853 }	2831 }

2854 if(mm_flags & FF_MM_SSE2){	2832 if(mm_flags & FF_MM_SSE2){

2855 c->h264_idct8_add = ff_h264_idct8_add_sse2;	2833 c->h264_idct8_add = ff_h264_idct8_add_sse2;

2856 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;	2834 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;

2857	2835

2858 H264_QPEL_FUNCS(0, 1, sse2);	2836 H264_QPEL_FUNCS(0, 1, sse2);

2859 H264_QPEL_FUNCS(0, 2, sse2);	2837 H264_QPEL_FUNCS(0, 2, sse2);

2860 H264_QPEL_FUNCS(0, 3, sse2);	2838 H264_QPEL_FUNCS(0, 3, sse2);

2861 H264_QPEL_FUNCS(1, 1, sse2);	2839 H264_QPEL_FUNCS(1, 1, sse2);

(...skipping 160 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3022	3000

3023 avg_no_rnd_pixels_tab[0] = just_return;	3001 avg_no_rnd_pixels_tab[0] = just_return;

3024 avg_no_rnd_pixels_tab[1] = just_return;	3002 avg_no_rnd_pixels_tab[1] = just_return;

3025 avg_no_rnd_pixels_tab[2] = just_return;	3003 avg_no_rnd_pixels_tab[2] = just_return;

3026 avg_no_rnd_pixels_tab[3] = just_return;	3004 avg_no_rnd_pixels_tab[3] = just_return;

3027	3005

3028 //av_fdct = just_return;	3006 //av_fdct = just_return;

3029 //ff_idct = just_return;	3007 //ff_idct = just_return;

3030 #endif	3008 #endif

3031 }	3009 }

OLD	NEW