source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c - Issue 3384002: ffmpeg source update for sep 09

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * MMX optimized DSP utils	2 * MMX optimized DSP utils

3 * Copyright (c) 2000, 2001 Fabrice Bellard	3 * Copyright (c) 2000, 2001 Fabrice Bellard

4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>	4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>

5 *	5 *

6 * This file is part of FFmpeg.	6 * This file is part of FFmpeg.

7 *	7 *

8 * FFmpeg is free software; you can redistribute it and/or	8 * FFmpeg is free software; you can redistribute it and/or

9 * modify it under the terms of the GNU Lesser General Public	9 * modify it under the terms of the GNU Lesser General Public

10 * License as published by the Free Software Foundation; either	10 * License as published by the Free Software Foundation; either

11 * version 2.1 of the License, or (at your option) any later version.	11 * version 2.1 of the License, or (at your option) any later version.

12 *	12 *

13 * FFmpeg is distributed in the hope that it will be useful,	13 * FFmpeg is distributed in the hope that it will be useful,

14 * but WITHOUT ANY WARRANTY; without even the implied warranty of	14 * but WITHOUT ANY WARRANTY; without even the implied warranty of

15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU	15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

16 * Lesser General Public License for more details.	16 * Lesser General Public License for more details.

17 *	17 *

18 * You should have received a copy of the GNU Lesser General Public	18 * You should have received a copy of the GNU Lesser General Public

19 * License along with FFmpeg; if not, write to the Free Software	19 * License along with FFmpeg; if not, write to the Free Software

20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA	20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

21 *	21 *

22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>	22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>

23 */	23 */

24	24

	25 #include "libavutil/cpu.h"

25 #include "libavutil/x86_cpu.h"	26 #include "libavutil/x86_cpu.h"

26 #include "libavcodec/dsputil.h"	27 #include "libavcodec/dsputil.h"

27 #include "libavcodec/h264dsp.h"	28 #include "libavcodec/h264dsp.h"

28 #include "libavcodec/mpegvideo.h"	29 #include "libavcodec/mpegvideo.h"

29 #include "libavcodec/simple_idct.h"	30 #include "libavcodec/simple_idct.h"

30 #include "dsputil_mmx.h"	31 #include "dsputil_mmx.h"

31 #include "vp3dsp_mmx.h"

32 #include "vp3dsp_sse2.h"

33 #include "vp6dsp_mmx.h"

34 #include "vp6dsp_sse2.h"

35 #include "idct_xvid.h"	32 #include "idct_xvid.h"

36	33

37 //#undef NDEBUG	34 //#undef NDEBUG

38 //#include <assert.h>	35 //#include <assert.h>

39	36

40 int mm_flags; /* multimedia extension flags */

41

42 /* pixel operations */	37 /* pixel operations */

43 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;	38 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;

44 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;	39 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

45	40

46 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =	41 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =

47 {0x8000000080000000ULL, 0x8000000080000000ULL};	42 {0x8000000080000000ULL, 0x8000000080000000ULL};

48	43

49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;	44 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;

50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x00040 00400040004ULL};	45 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x00040 00400040004ULL};

51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050 00500050005ULL};	46 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050 00500050005ULL};

52 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080 00800080008ULL};	47 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080 00800080008ULL};

53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x00090 00900090009ULL};	48 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x00090 00900090009ULL};

54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;	49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;

55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100 01000100010ULL};	50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100 01000100010ULL};

56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x00120 01200120012ULL};	51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x00120 01200120012ULL};

57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;	52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;

58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B0 01B001B001BULL};	53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B0 01B001B001BULL};

59 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0 01C001C001CULL};	54 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0 01C001C001CULL};

60 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200 02000200020ULL};	55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200 02000200020ULL};

61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;	56 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;

62 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL;	57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL;

63 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F0 03F003F003FULL};	58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F0 03F003F003FULL};

64 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 04000400040ULL};	59 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 04000400040ULL};

65 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;	60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;

66 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;	61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;

67 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;	62 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;

68	63

	64 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x00000 00000000000ULL};

69 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x01010 10101010101ULL};	65 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x01010 10101010101ULL};

70 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x03030 30303030303ULL};	66 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x03030 30303030303ULL};

71 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x04040 40404040404ULL};	67 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x04040 40404040404ULL};

72 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;	68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;

73 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;	69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;

74 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;	70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;

75 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x80808 08080808080ULL};	71 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x80808 08080808080ULL};

76 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;	72 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;

77 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;	73 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1 ) = {0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A 1A1A1A1A1A1ULL};

78 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F 8F8F8F8F8F8ULL};	74 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F 8F8F8F8F8F8ULL};

79 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;	75 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;

80 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF EFEFEFEFEFEULL};	76 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF EFEFEFEFEFEULL};

81	77

82 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };	78 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };

83 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };	79 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };

84	80

85 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)	81 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)

86 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)	82 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)

87	83

(...skipping 137 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
225 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx	221 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx

226 #define put_pixels16_3dnow put_pixels16_mmx	222 #define put_pixels16_3dnow put_pixels16_mmx

227 #define put_pixels8_3dnow put_pixels8_mmx	223 #define put_pixels8_3dnow put_pixels8_mmx

228 #define put_pixels4_3dnow put_pixels4_mmx	224 #define put_pixels4_3dnow put_pixels4_mmx

229 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx	225 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx

230 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx	226 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx

231	227

232 /***********************************/	228 /***********************************/

233 /* standard MMX */	229 /* standard MMX */

234	230

235 void put_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size )	231 void ff_put_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_s ize)

236 {	232 {

237 const DCTELEM *p;	233 const DCTELEM *p;

238 uint8_t *pix;	234 uint8_t *pix;

239	235

240 /* read the pixels */	236 /* read the pixels */

241 p = block;	237 p = block;

242 pix = pixels;	238 pix = pixels;

243 /* unrolled loop */	239 /* unrolled loop */

244 __asm__ volatile(	240 __asm__ volatile(

245 "movq %3, %%mm0 \n\t"	241 "movq %3, %%mm0 \n\t"

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
301 "packsswb 56+"#off"(%2), %%mm4 \n\t"\	297 "packsswb 56+"#off"(%2), %%mm4 \n\t"\

302 "paddb %%mm0, %%mm1 \n\t"\	298 "paddb %%mm0, %%mm1 \n\t"\

303 "paddb %%mm0, %%mm2 \n\t"\	299 "paddb %%mm0, %%mm2 \n\t"\

304 "paddb %%mm0, %%mm3 \n\t"\	300 "paddb %%mm0, %%mm3 \n\t"\

305 "paddb %%mm0, %%mm4 \n\t"\	301 "paddb %%mm0, %%mm4 \n\t"\

306 "movq %%mm1, (%0) \n\t"\	302 "movq %%mm1, (%0) \n\t"\

307 "movq %%mm2, (%0, %3) \n\t"\	303 "movq %%mm2, (%0, %3) \n\t"\

308 "movq %%mm3, (%0, %3, 2) \n\t"\	304 "movq %%mm3, (%0, %3, 2) \n\t"\

309 "movq %%mm4, (%0, %1) \n\t"	305 "movq %%mm4, (%0, %1) \n\t"

310	306

311 void put_signed_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int li ne_size)	307 void ff_put_signed_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size)

312 {	308 {

313 x86_reg line_skip = line_size;	309 x86_reg line_skip = line_size;

314 x86_reg line_skip3;	310 x86_reg line_skip3;

315	311

316 __asm__ volatile (	312 __asm__ volatile (

317 "movq "MANGLE(ff_vector128)", %%mm0 \n\t"	313 "movq "MANGLE(ff_vector128)", %%mm0 \n\t"

318 "lea (%3, %3, 2), %1 \n\t"	314 "lea (%3, %3, 2), %1 \n\t"

319 put_signed_pixels_clamped_mmx_half(0)	315 put_signed_pixels_clamped_mmx_half(0)

320 "lea (%0, %3, 4), %0 \n\t"	316 "lea (%0, %3, 4), %0 \n\t"

321 put_signed_pixels_clamped_mmx_half(64)	317 put_signed_pixels_clamped_mmx_half(64)

322 :"+&r" (pixels), "=&r" (line_skip3)	318 :"+&r" (pixels), "=&r" (line_skip3)

323 :"r" (block), "r"(line_skip)	319 :"r" (block), "r"(line_skip)

324 :"memory");	320 :"memory");

325 }	321 }

326	322

327 void add_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_size )	323 void ff_add_pixels_clamped_mmx(const DCTELEM block, uint8_t pixels, int line_s ize)

328 {	324 {

329 const DCTELEM *p;	325 const DCTELEM *p;

330 uint8_t *pix;	326 uint8_t *pix;

331 int i;	327 int i;

332	328

333 /* read the pixels */	329 /* read the pixels */

334 p = block;	330 p = block;

335 pix = pixels;	331 pix = pixels;

336 MOVQ_ZERO(mm7);	332 MOVQ_ZERO(mm7);

337 i = 4;	333 i = 4;

(...skipping 387 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
725 "movq %%mm6, %3 \n\t"	721 "movq %%mm6, %3 \n\t"

726 : "+m" ((uint64_t)(src - 2*stride)),	722 : "+m" ((uint64_t)(src - 2*stride)),

727 "+m" ((uint64_t)(src - 1*stride)),	723 "+m" ((uint64_t)(src - 1*stride)),

728 "+m" ((uint64_t)(src + 0*stride)),	724 "+m" ((uint64_t)(src + 0*stride)),

729 "+m" ((uint64_t)(src + 1*stride))	725 "+m" ((uint64_t)(src + 1*stride))

730 : "g" (2*strength), "m"(ff_pb_FC)	726 : "g" (2*strength), "m"(ff_pb_FC)

731 );	727 );

732 }	728 }

733 }	729 }

734	730

735 static inline void transpose4x4(uint8_t dst, uint8_t src, int dst_stride, int src_stride){

736 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...

737 "movd %4, %%mm0 \n\t"

738 "movd %5, %%mm1 \n\t"

739 "movd %6, %%mm2 \n\t"

740 "movd %7, %%mm3 \n\t"

741 "punpcklbw %%mm1, %%mm0 \n\t"

742 "punpcklbw %%mm3, %%mm2 \n\t"

743 "movq %%mm0, %%mm1 \n\t"

744 "punpcklwd %%mm2, %%mm0 \n\t"

745 "punpckhwd %%mm2, %%mm1 \n\t"

746 "movd %%mm0, %0 \n\t"

747 "punpckhdq %%mm0, %%mm0 \n\t"

748 "movd %%mm0, %1 \n\t"

749 "movd %%mm1, %2 \n\t"

750 "punpckhdq %%mm1, %%mm1 \n\t"

751 "movd %%mm1, %3 \n\t"

752

753 : "=m" ((uint32_t)(dst + 0*dst_stride)),

754 "=m" ((uint32_t)(dst + 1*dst_stride)),

755 "=m" ((uint32_t)(dst + 2*dst_stride)),

756 "=m" ((uint32_t)(dst + 3*dst_stride))

757 : "m" ((uint32_t)(src + 0*src_stride)),

758 "m" ((uint32_t)(src + 1*src_stride)),

759 "m" ((uint32_t)(src + 2*src_stride)),

760 "m" ((uint32_t)(src + 3*src_stride))

761 );

762 }

763

764 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){	731 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){

765 if(CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {	732 if(CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {

766 const int strength= ff_h263_loop_filter_strength[qscale];	733 const int strength= ff_h263_loop_filter_strength[qscale];

767 DECLARE_ALIGNED(8, uint64_t, temp)[4];	734 DECLARE_ALIGNED(8, uint64_t, temp)[4];

768 uint8_t btemp= (uint8_t)temp;	735 uint8_t btemp= (uint8_t)temp;

769	736

770 src -= 2;	737 src -= 2;

771	738

772 transpose4x4(btemp , src , 8, stride);	739 transpose4x4(btemp , src , 8, stride);

773 transpose4x4(btemp+4, src + 4*stride, 8, stride);	740 transpose4x4(btemp+4, src + 4*stride, 8, stride);

(...skipping 1048 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1822 const uint8_t *p= mem;\	1789 const uint8_t *p= mem;\

1823 do{\	1790 do{\

1824 __asm__ volatile(#op" %0" :: "m"(*p));\	1791 __asm__ volatile(#op" %0" :: "m"(*p));\

1825 p+= stride;\	1792 p+= stride;\

1826 }while(--h);\	1793 }while(--h);\

1827 }	1794 }

1828 PREFETCH(prefetch_mmx2, prefetcht0)	1795 PREFETCH(prefetch_mmx2, prefetcht0)

1829 PREFETCH(prefetch_3dnow, prefetch)	1796 PREFETCH(prefetch_3dnow, prefetch)

1830 #undef PREFETCH	1797 #undef PREFETCH

1831	1798

1832 #include "h264dsp_mmx.c"	1799 #include "h264_qpel_mmx.c"

1833 #include "rv40dsp_mmx.c"	1800

	1801 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t dst, uint8_t src,

	1802 int stride, int h, int x, int y);

	1803 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t dst, uint8_t src,

	1804 int stride, int h, int x, int y);

	1805 void ff_put_rv40_chroma_mc8_mmx (uint8_t dst, uint8_t src,

	1806 int stride, int h, int x, int y);

	1807 void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t dst, uint8_t src,

	1808 int stride, int h, int x, int y);

	1809 void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t dst, uint8_t src,

	1810 int stride, int h, int x, int y);

	1811 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t dst, uint8_t src,

	1812 int stride, int h, int x, int y);

	1813 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t dst, uint8_t src,

	1814 int stride, int h, int x, int y);

	1815 void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t dst, uint8_t src,

	1816 int stride, int h, int x, int y);

	1817 void ff_avg_rv40_chroma_mc8_3dnow (uint8_t dst, uint8_t src,

	1818 int stride, int h, int x, int y);

	1819

	1820 void ff_put_h264_chroma_mc4_mmx (uint8_t dst, uint8_t src,

	1821 int stride, int h, int x, int y);

	1822 void ff_put_rv40_chroma_mc4_mmx (uint8_t dst, uint8_t src,

	1823 int stride, int h, int x, int y);

	1824 void ff_avg_h264_chroma_mc4_mmx2 (uint8_t dst, uint8_t src,

	1825 int stride, int h, int x, int y);

	1826 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t dst, uint8_t src,

	1827 int stride, int h, int x, int y);

	1828 void ff_avg_h264_chroma_mc4_3dnow (uint8_t dst, uint8_t src,

	1829 int stride, int h, int x, int y);

	1830 void ff_avg_rv40_chroma_mc4_3dnow (uint8_t dst, uint8_t src,

	1831 int stride, int h, int x, int y);

	1832

	1833 void ff_put_h264_chroma_mc2_mmx2 (uint8_t dst, uint8_t src,

	1834 int stride, int h, int x, int y);

	1835 void ff_avg_h264_chroma_mc2_mmx2 (uint8_t dst, uint8_t src,

	1836 int stride, int h, int x, int y);

	1837

	1838 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t dst, uint8_t src,

	1839 int stride, int h, int x, int y);

	1840 void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t dst, uint8_t src,

	1841 int stride, int h, int x, int y);

	1842 void ff_put_h264_chroma_mc4_ssse3 (uint8_t dst, uint8_t src,

	1843 int stride, int h, int x, int y);

	1844

	1845 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t dst, uint8_t src,

	1846 int stride, int h, int x, int y);

	1847 void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t dst, uint8_t src,

	1848 int stride, int h, int x, int y);

	1849 void ff_avg_h264_chroma_mc4_ssse3 (uint8_t dst, uint8_t src,

	1850 int stride, int h, int x, int y);

	1851

1834	1852

1835 /* CAVS specific */	1853 /* CAVS specific */

1836 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1854 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1837 put_pixels8_mmx(dst, src, stride, 8);	1855 put_pixels8_mmx(dst, src, stride, 8);

1838 }	1856 }

1839 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1857 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1840 avg_pixels8_mmx(dst, src, stride, 8);	1858 avg_pixels8_mmx(dst, src, stride, 8);

1841 }	1859 }

1842 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1860 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1843 put_pixels16_mmx(dst, src, stride, 16);	1861 put_pixels16_mmx(dst, src, stride, 16);

1844 }	1862 }

1845 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {	1863 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t dst, uint8_t src, int stride) {

1846 avg_pixels16_mmx(dst, src, stride, 16);	1864 avg_pixels16_mmx(dst, src, stride, 16);

1847 }	1865 }

1848	1866

1849 /* VC1 specific */	1867 /* VC1 specific */

1850 void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, int stride, int rnd) {	1868 void ff_put_vc1_mspel_mc00_mmx(uint8_t dst, const uint8_t src, int stride, int rnd) {

1851 put_pixels8_mmx(dst, src, stride, 8);	1869 put_pixels8_mmx(dst, src, stride, 8);

1852 }	1870 }

1853 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t dst, const uint8_t src, int stride, in t rnd) {	1871 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t dst, const uint8_t src, int stride, in t rnd) {

1854 avg_pixels8_mmx2(dst, src, stride, 8);	1872 avg_pixels8_mmx2(dst, src, stride, 8);

1855 }	1873 }

1856	1874

1857 /* XXX: those functions should be suppressed ASAP when all IDCTs are	1875 /* XXX: those functions should be suppressed ASAP when all IDCTs are

1858 converted */	1876 converted */

1859 #if CONFIG_GPL	1877 #if CONFIG_GPL

1860 static void ff_libmpeg2mmx_idct_put(uint8_t dest, int line_size, DCTELEM block )	1878 static void ff_libmpeg2mmx_idct_put(uint8_t dest, int line_size, DCTELEM block )

1861 {	1879 {

1862 ff_mmx_idct (block);	1880 ff_mmx_idct (block);

1863 put_pixels_clamped_mmx(block, dest, line_size);	1881 ff_put_pixels_clamped_mmx(block, dest, line_size);

1864 }	1882 }

1865 static void ff_libmpeg2mmx_idct_add(uint8_t dest, int line_size, DCTELEM block )	1883 static void ff_libmpeg2mmx_idct_add(uint8_t dest, int line_size, DCTELEM block )

1866 {	1884 {

1867 ff_mmx_idct (block);	1885 ff_mmx_idct (block);

1868 add_pixels_clamped_mmx(block, dest, line_size);	1886 ff_add_pixels_clamped_mmx(block, dest, line_size);

1869 }	1887 }

1870 static void ff_libmpeg2mmx2_idct_put(uint8_t dest, int line_size, DCTELEM bloc k)	1888 static void ff_libmpeg2mmx2_idct_put(uint8_t dest, int line_size, DCTELEM bloc k)

1871 {	1889 {

1872 ff_mmxext_idct (block);	1890 ff_mmxext_idct (block);

1873 put_pixels_clamped_mmx(block, dest, line_size);	1891 ff_put_pixels_clamped_mmx(block, dest, line_size);

1874 }	1892 }

1875 static void ff_libmpeg2mmx2_idct_add(uint8_t dest, int line_size, DCTELEM bloc k)	1893 static void ff_libmpeg2mmx2_idct_add(uint8_t dest, int line_size, DCTELEM bloc k)

1876 {	1894 {

1877 ff_mmxext_idct (block);	1895 ff_mmxext_idct (block);

1878 add_pixels_clamped_mmx(block, dest, line_size);	1896 ff_add_pixels_clamped_mmx(block, dest, line_size);

1879 }	1897 }

1880 #endif	1898 #endif

1881 static void ff_idct_xvid_mmx_put(uint8_t dest, int line_size, DCTELEM block)	1899 static void ff_idct_xvid_mmx_put(uint8_t dest, int line_size, DCTELEM block)

1882 {	1900 {

1883 ff_idct_xvid_mmx (block);	1901 ff_idct_xvid_mmx (block);

1884 put_pixels_clamped_mmx(block, dest, line_size);	1902 ff_put_pixels_clamped_mmx(block, dest, line_size);

1885 }	1903 }

1886 static void ff_idct_xvid_mmx_add(uint8_t dest, int line_size, DCTELEM block)	1904 static void ff_idct_xvid_mmx_add(uint8_t dest, int line_size, DCTELEM block)

1887 {	1905 {

1888 ff_idct_xvid_mmx (block);	1906 ff_idct_xvid_mmx (block);

1889 add_pixels_clamped_mmx(block, dest, line_size);	1907 ff_add_pixels_clamped_mmx(block, dest, line_size);

1890 }	1908 }

1891 static void ff_idct_xvid_mmx2_put(uint8_t dest, int line_size, DCTELEM block)	1909 static void ff_idct_xvid_mmx2_put(uint8_t dest, int line_size, DCTELEM block)

1892 {	1910 {

1893 ff_idct_xvid_mmx2 (block);	1911 ff_idct_xvid_mmx2 (block);

1894 put_pixels_clamped_mmx(block, dest, line_size);	1912 ff_put_pixels_clamped_mmx(block, dest, line_size);

1895 }	1913 }

1896 static void ff_idct_xvid_mmx2_add(uint8_t dest, int line_size, DCTELEM block)	1914 static void ff_idct_xvid_mmx2_add(uint8_t dest, int line_size, DCTELEM block)

1897 {	1915 {

1898 ff_idct_xvid_mmx2 (block);	1916 ff_idct_xvid_mmx2 (block);

1899 add_pixels_clamped_mmx(block, dest, line_size);	1917 ff_add_pixels_clamped_mmx(block, dest, line_size);

1900 }	1918 }

1901	1919

1902 static void vorbis_inverse_coupling_3dnow(float mag, float ang, int blocksize)	1920 static void vorbis_inverse_coupling_3dnow(float mag, float ang, int blocksize)

1903 {	1921 {

1904 int i;	1922 int i;

1905 __asm__ volatile("pxor %%mm7, %%mm7":);	1923 __asm__ volatile("pxor %%mm7, %%mm7":);

1906 for(i=0; i<blocksize; i+=2) {	1924 for(i=0; i<blocksize; i+=2) {

1907 __asm__ volatile(	1925 __asm__ volatile(

1908 "movq %0, %%mm0 \n\t"	1926 "movq %0, %%mm0 \n\t"

1909 "movq %1, %%mm1 \n\t"	1927 "movq %1, %%mm1 \n\t"

(...skipping 468 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2378 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t"	2396 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t"

2379 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"	2397 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"

2380 "packssdw %%xmm1 , %%xmm0 \n\t"	2398 "packssdw %%xmm1 , %%xmm0 \n\t"

2381 "movdqa %%xmm0 , (%1,%0) \n\t"	2399 "movdqa %%xmm0 , (%1,%0) \n\t"

2382 "add $16 , %0 \n\t"	2400 "add $16 , %0 \n\t"

2383 " js 1b \n\t"	2401 " js 1b \n\t"

2384 :"+r"(reglen), "+r"(dst), "+r"(src)	2402 :"+r"(reglen), "+r"(dst), "+r"(src)

2385 );	2403 );

2386 }	2404 }

2387	2405

	2406 void ff_vp3_idct_mmx(int16_t *input_data);

	2407 void ff_vp3_idct_put_mmx(uint8_t dest, int line_size, DCTELEM block);

	2408 void ff_vp3_idct_add_mmx(uint8_t dest, int line_size, DCTELEM block);

	2409

	2410 void ff_vp3_idct_dc_add_mmx2(uint8_t dest, int line_size, const DCTELEM block) ;

	2411

	2412 void ff_vp3_v_loop_filter_mmx2(uint8_t src, int stride, int bounding_values);

	2413 void ff_vp3_h_loop_filter_mmx2(uint8_t src, int stride, int bounding_values);

	2414

	2415 void ff_vp3_idct_sse2(int16_t *input_data);

	2416 void ff_vp3_idct_put_sse2(uint8_t dest, int line_size, DCTELEM block);

	2417 void ff_vp3_idct_add_sse2(uint8_t dest, int line_size, DCTELEM block);

	2418

2388 void ff_float_to_int16_interleave6_sse(int16_t dst, const float *src, int len) ;	2419 void ff_float_to_int16_interleave6_sse(int16_t dst, const float *src, int len) ;

2389 void ff_float_to_int16_interleave6_3dnow(int16_t dst, const float *src, int le n);	2420 void ff_float_to_int16_interleave6_3dnow(int16_t dst, const float *src, int le n);

2390 void ff_float_to_int16_interleave6_3dn2(int16_t dst, const float *src, int len );	2421 void ff_float_to_int16_interleave6_3dn2(int16_t dst, const float *src, int len );

2391 int32_t ff_scalarproduct_int16_mmx2(const int16_t v1, const int16_t v2, int or der, int shift);	2422 int32_t ff_scalarproduct_int16_mmx2(const int16_t v1, const int16_t v2, int or der, int shift);

2392 int32_t ff_scalarproduct_int16_sse2(const int16_t v1, const int16_t v2, int or der, int shift);	2423 int32_t ff_scalarproduct_int16_sse2(const int16_t v1, const int16_t v2, int or der, int shift);

2393 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t v1, const int16_t v2, con st int16_t *v3, int order, int mul);	2424 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t v1, const int16_t v2, con st int16_t *v3, int order, int mul);

2394 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t v1, const int16_t v2, con st int16_t *v3, int order, int mul);	2425 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t v1, const int16_t v2, con st int16_t *v3, int order, int mul);

2395 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t v1, const int16_t v2, co nst int16_t *v3, int order, int mul);	2426 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t v1, const int16_t v2, co nst int16_t *v3, int order, int mul);

2396 void ff_add_hfyu_median_prediction_mmx2(uint8_t dst, const uint8_t top, const uint8_t diff, int w, int left, int *left_top);	2427 void ff_add_hfyu_median_prediction_mmx2(uint8_t dst, const uint8_t top, const uint8_t diff, int w, int left, int *left_top);

2397 int ff_add_hfyu_left_prediction_ssse3(uint8_t dst, const uint8_t src, int w, int left);	2428 int ff_add_hfyu_left_prediction_ssse3(uint8_t dst, const uint8_t src, int w, int left);

2398 int ff_add_hfyu_left_prediction_sse4(uint8_t dst, const uint8_t src, int w, i nt left);	2429 int ff_add_hfyu_left_prediction_sse4(uint8_t dst, const uint8_t src, int w, i nt left);

2399 void ff_x264_deblock_v_luma_sse2(uint8_t pix, int stride, int alpha, int beta, int8_t tc0);

2400 void ff_x264_deblock_h_luma_sse2(uint8_t pix, int stride, int alpha, int beta, int8_t tc0);

2401 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, in t beta);

2402 void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);

2403 void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);

2404	2430

2405 #if HAVE_YASM && ARCH_X86_32	2431 #if !HAVE_YASM

2406 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, i nt beta);

2407 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al pha, int beta)

2408 {

2409 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);

2410 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);

2411 }

2412 #elif !HAVE_YASM

2413 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6)	2432 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6)

2414 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)	2433 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)

2415 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)	2434 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)

2416 #endif	2435 #endif

2417 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse	2436 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse

2418	2437

2419 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \	2438 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \

2420 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\	2439 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\

2421 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t dst, const float *src, long len, int channels){\	2440 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t dst, const float *src, long len, int channels){\

2422 DECLARE_ALIGNED(16, int16_t, tmp)[len];\	2441 DECLARE_ALIGNED(16, int16_t, tmp)[len];\

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2502 if(channels==6)	2521 if(channels==6)

2503 ff_float_to_int16_interleave6_3dn2(dst, src, len);	2522 ff_float_to_int16_interleave6_3dn2(dst, src, len);

2504 else	2523 else

2505 float_to_int16_interleave_3dnow(dst, src, len, channels);	2524 float_to_int16_interleave_3dnow(dst, src, len, channels);

2506 }	2525 }

2507	2526

2508 float ff_scalarproduct_float_sse(const float v1, const float v2, int order);	2527 float ff_scalarproduct_float_sse(const float v1, const float v2, int order);

2509	2528

2510 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)	2529 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)

2511 {	2530 {

2512 mm_flags = mm_support();	2531 int mm_flags = av_get_cpu_flags();

2513	2532

2514 if (avctx->dsp_mask) {	2533 if (avctx->dsp_mask) {

2515 if (avctx->dsp_mask & FF_MM_FORCE)	2534 if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)

2516 mm_flags \|= (avctx->dsp_mask & 0xffff);	2535 mm_flags \|= (avctx->dsp_mask & 0xffff);

2517 else	2536 else

2518 mm_flags &= ~(avctx->dsp_mask & 0xffff);	2537 mm_flags &= ~(avctx->dsp_mask & 0xffff);

2519 }	2538 }

2520	2539

2521 #if 0	2540 #if 0

2522 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");	2541 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");

2523 if (mm_flags & FF_MM_MMX)	2542 if (mm_flags & AV_CPU_FLAG_MMX)

2524 av_log(avctx, AV_LOG_INFO, " mmx");	2543 av_log(avctx, AV_LOG_INFO, " mmx");

2525 if (mm_flags & FF_MM_MMX2)	2544 if (mm_flags & AV_CPU_FLAG_MMX2)

2526 av_log(avctx, AV_LOG_INFO, " mmx2");	2545 av_log(avctx, AV_LOG_INFO, " mmx2");

2527 if (mm_flags & FF_MM_3DNOW)	2546 if (mm_flags & AV_CPU_FLAG_3DNOW)

2528 av_log(avctx, AV_LOG_INFO, " 3dnow");	2547 av_log(avctx, AV_LOG_INFO, " 3dnow");

2529 if (mm_flags & FF_MM_SSE)	2548 if (mm_flags & AV_CPU_FLAG_SSE)

2530 av_log(avctx, AV_LOG_INFO, " sse");	2549 av_log(avctx, AV_LOG_INFO, " sse");

2531 if (mm_flags & FF_MM_SSE2)	2550 if (mm_flags & AV_CPU_FLAG_SSE2)

2532 av_log(avctx, AV_LOG_INFO, " sse2");	2551 av_log(avctx, AV_LOG_INFO, " sse2");

2533 av_log(avctx, AV_LOG_INFO, "\n");	2552 av_log(avctx, AV_LOG_INFO, "\n");

2534 #endif	2553 #endif

2535	2554

2536 if (mm_flags & FF_MM_MMX) {	2555 if (mm_flags & AV_CPU_FLAG_MMX) {

2537 const int idct_algo= avctx->idct_algo;	2556 const int idct_algo= avctx->idct_algo;

2538	2557

2539 if(avctx->lowres==0){	2558 if(avctx->lowres==0){

2540 if(idct_algo==FF_IDCT_AUTO \|\| idct_algo==FF_IDCT_SIMPLEMMX){	2559 if(idct_algo==FF_IDCT_AUTO \|\| idct_algo==FF_IDCT_SIMPLEMMX){

2541 c->idct_put= ff_simple_idct_put_mmx;	2560 c->idct_put= ff_simple_idct_put_mmx;

2542 c->idct_add= ff_simple_idct_add_mmx;	2561 c->idct_add= ff_simple_idct_add_mmx;

2543 c->idct = ff_simple_idct_mmx;	2562 c->idct = ff_simple_idct_mmx;

2544 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;	2563 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;

2545 #if CONFIG_GPL	2564 #if CONFIG_GPL

2546 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){	2565 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){

2547 if(mm_flags & FF_MM_MMX2){	2566 if(mm_flags & AV_CPU_FLAG_MMX2){

2548 c->idct_put= ff_libmpeg2mmx2_idct_put;	2567 c->idct_put= ff_libmpeg2mmx2_idct_put;

2549 c->idct_add= ff_libmpeg2mmx2_idct_add;	2568 c->idct_add= ff_libmpeg2mmx2_idct_add;

2550 c->idct = ff_mmxext_idct;	2569 c->idct = ff_mmxext_idct;

2551 }else{	2570 }else{

2552 c->idct_put= ff_libmpeg2mmx_idct_put;	2571 c->idct_put= ff_libmpeg2mmx_idct_put;

2553 c->idct_add= ff_libmpeg2mmx_idct_add;	2572 c->idct_add= ff_libmpeg2mmx_idct_add;

2554 c->idct = ff_mmx_idct;	2573 c->idct = ff_mmx_idct;

2555 }	2574 }

2556 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;	2575 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;

2557 #endif	2576 #endif

2558 }else if((CONFIG_VP3_DECODER \|\| CONFIG_VP5_DECODER \|\| CONFIG_VP6_DEC ODER) &&	2577 }else if((CONFIG_VP3_DECODER \|\| CONFIG_VP5_DECODER \|\| CONFIG_VP6_DEC ODER) &&

2559 idct_algo==FF_IDCT_VP3){	2578 idct_algo==FF_IDCT_VP3 && HAVE_YASM){

2560 if(mm_flags & FF_MM_SSE2){	2579 if(mm_flags & AV_CPU_FLAG_SSE2){

2561 c->idct_put= ff_vp3_idct_put_sse2;	2580 c->idct_put= ff_vp3_idct_put_sse2;

2562 c->idct_add= ff_vp3_idct_add_sse2;	2581 c->idct_add= ff_vp3_idct_add_sse2;

2563 c->idct = ff_vp3_idct_sse2;	2582 c->idct = ff_vp3_idct_sse2;

2564 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;	2583 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;

2565 }else{	2584 }else{

2566 c->idct_put= ff_vp3_idct_put_mmx;	2585 c->idct_put= ff_vp3_idct_put_mmx;

2567 c->idct_add= ff_vp3_idct_add_mmx;	2586 c->idct_add= ff_vp3_idct_add_mmx;

2568 c->idct = ff_vp3_idct_mmx;	2587 c->idct = ff_vp3_idct_mmx;

2569 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;	2588 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;

2570 }	2589 }

2571 }else if(idct_algo==FF_IDCT_CAVS){	2590 }else if(idct_algo==FF_IDCT_CAVS){

2572 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;	2591 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;

2573 }else if(idct_algo==FF_IDCT_XVIDMMX){	2592 }else if(idct_algo==FF_IDCT_XVIDMMX){

2574 if(mm_flags & FF_MM_SSE2){	2593 if(mm_flags & AV_CPU_FLAG_SSE2){

2575 c->idct_put= ff_idct_xvid_sse2_put;	2594 c->idct_put= ff_idct_xvid_sse2_put;

2576 c->idct_add= ff_idct_xvid_sse2_add;	2595 c->idct_add= ff_idct_xvid_sse2_add;

2577 c->idct = ff_idct_xvid_sse2;	2596 c->idct = ff_idct_xvid_sse2;

2578 c->idct_permutation_type= FF_SSE2_IDCT_PERM;	2597 c->idct_permutation_type= FF_SSE2_IDCT_PERM;

2579 }else if(mm_flags & FF_MM_MMX2){	2598 }else if(mm_flags & AV_CPU_FLAG_MMX2){

2580 c->idct_put= ff_idct_xvid_mmx2_put;	2599 c->idct_put= ff_idct_xvid_mmx2_put;

2581 c->idct_add= ff_idct_xvid_mmx2_add;	2600 c->idct_add= ff_idct_xvid_mmx2_add;

2582 c->idct = ff_idct_xvid_mmx2;	2601 c->idct = ff_idct_xvid_mmx2;

2583 }else{	2602 }else{

2584 c->idct_put= ff_idct_xvid_mmx_put;	2603 c->idct_put= ff_idct_xvid_mmx_put;

2585 c->idct_add= ff_idct_xvid_mmx_add;	2604 c->idct_add= ff_idct_xvid_mmx_add;

2586 c->idct = ff_idct_xvid_mmx;	2605 c->idct = ff_idct_xvid_mmx;

2587 }	2606 }

2588 }	2607 }

2589 }	2608 }

2590	2609

2591 c->put_pixels_clamped = put_pixels_clamped_mmx;	2610 c->put_pixels_clamped = ff_put_pixels_clamped_mmx;

2592 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;	2611 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;

2593 c->add_pixels_clamped = add_pixels_clamped_mmx;	2612 c->add_pixels_clamped = ff_add_pixels_clamped_mmx;

2594 c->clear_block = clear_block_mmx;	2613 c->clear_block = clear_block_mmx;

2595 c->clear_blocks = clear_blocks_mmx;	2614 c->clear_blocks = clear_blocks_mmx;

2596 if ((mm_flags & FF_MM_SSE) &&	2615 if ((mm_flags & AV_CPU_FLAG_SSE) &&

2597 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){	2616 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){

2598 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */	2617 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */

2599 c->clear_block = clear_block_sse;	2618 c->clear_block = clear_block_sse;

2600 c->clear_blocks = clear_blocks_sse;	2619 c->clear_blocks = clear_blocks_sse;

2601 }	2620 }

2602	2621

2603 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \	2622 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \

2604 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \	2623 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \

2605 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \	2624 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \

2606 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \	2625 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \

(...skipping 12 matching lines...) Expand all Loading...
2619	2638

2620 c->add_bytes= add_bytes_mmx;	2639 c->add_bytes= add_bytes_mmx;

2621 c->add_bytes_l2= add_bytes_l2_mmx;	2640 c->add_bytes_l2= add_bytes_l2_mmx;

2622	2641

2623 c->draw_edges = draw_edges_mmx;	2642 c->draw_edges = draw_edges_mmx;

2624	2643

2625 if (CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {	2644 if (CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {

2626 c->h263_v_loop_filter= h263_v_loop_filter_mmx;	2645 c->h263_v_loop_filter= h263_v_loop_filter_mmx;

2627 c->h263_h_loop_filter= h263_h_loop_filter_mmx;	2646 c->h263_h_loop_filter= h263_h_loop_filter_mmx;

2628 }	2647 }

2629 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;

2630 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;

2631 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd;

2632	2648

2633 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;	2649 #if HAVE_YASM

2634 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;	2650 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;

	2651 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;

	2652 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;

2635	2653

2636 if (CONFIG_VP6_DECODER) {	2654 c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;

2637 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;	2655 c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;

2638 }	2656 #endif

2639	2657

2640 if (mm_flags & FF_MM_MMX2) {	2658 if (mm_flags & AV_CPU_FLAG_MMX2) {

2641 c->prefetch = prefetch_mmx2;	2659 c->prefetch = prefetch_mmx2;

2642	2660

2643 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;	2661 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;

2644 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;	2662 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;

2645	2663

2646 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;	2664 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;

2647 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;	2665 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;

2648 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;	2666 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;

2649	2667

2650 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;	2668 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;

2651 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;	2669 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;

2652	2670

2653 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;	2671 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;

2654 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;	2672 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;

2655 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;	2673 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;

2656	2674

2657 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){	2675 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){

2658 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;	2676 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;

2659 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;	2677 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;

2660 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;	2678 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;

2661 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;	2679 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;

2662 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;	2680 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;

2663 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;	2681 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;

2664	2682

2665 if (CONFIG_VP3_DECODER) {	2683 if (CONFIG_VP3_DECODER && HAVE_YASM) {

2666 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;	2684 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;

2667 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;	2685 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;

2668 }	2686 }

2669 }	2687 }

2670 if (CONFIG_VP3_DECODER) {	2688 if (CONFIG_VP3_DECODER && HAVE_YASM) {

2671 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;	2689 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;

2672 }	2690 }

2673	2691

2674 if (CONFIG_VP3_DECODER	2692 if (CONFIG_VP3_DECODER

2675 && (avctx->codec_id == CODEC_ID_VP3 \|\| avctx->codec_id == CODEC_ ID_THEORA)) {	2693 && (avctx->codec_id == CODEC_ID_VP3 \|\| avctx->codec_id == CODEC_ ID_THEORA)) {

2676 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx 2;	2694 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx 2;

2677 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx 2;	2695 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx 2;

2678 }	2696 }

2679	2697

2680 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \	2698 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \

(...skipping 26 matching lines...) Expand all Loading...
2707 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);	2725 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);

2708 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);	2726 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);

2709 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);	2727 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);

2710 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);	2728 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);

2711	2729

2712 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);	2730 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);

2713 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);	2731 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);

2714 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);	2732 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);

2715 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);	2733 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);

2716	2734

2717 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2;	2735 #if HAVE_YASM

2718 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2;	2736 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;

	2737 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;

2719	2738

2720 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_norn d;	2739 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_n ornd;

2721	2740

2722 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;	2741 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;

2723 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;	2742 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;

2724 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;	2743 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;

2725 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;	2744 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;

2726	2745

2727 #if HAVE_YASM

2728 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;	2746 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;

2729 #endif	2747 #endif

2730 #if HAVE_7REGS && HAVE_TEN_OPERANDS	2748 #if HAVE_7REGS && HAVE_TEN_OPERANDS

2731 if( mm_flags&FF_MM_3DNOW )	2749 if( mm_flags&AV_CPU_FLAG_3DNOW )

2732 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;	2750 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;

2733 #endif	2751 #endif

2734	2752

2735 if (CONFIG_VC1_DECODER)	2753 if (CONFIG_VC1_DECODER)

2736 ff_vc1dsp_init_mmx(c, avctx);	2754 ff_vc1dsp_init_mmx(c, avctx);

2737	2755

2738 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;	2756 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;

2739 } else if (mm_flags & FF_MM_3DNOW) {	2757 } else if (mm_flags & AV_CPU_FLAG_3DNOW) {

2740 c->prefetch = prefetch_3dnow;	2758 c->prefetch = prefetch_3dnow;

2741	2759

2742 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;	2760 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;

2743 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;	2761 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;

2744	2762

2745 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;	2763 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;

2746 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;	2764 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;

2747 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;	2765 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;

2748	2766

2749 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;	2767 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;

(...skipping 30 matching lines...) Expand all Loading...
2780 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);	2798 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);

2781 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);	2799 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);

2782 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);	2800 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);

2783 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);	2801 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);

2784	2802

2785 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);	2803 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);

2786 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);	2804 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);

2787 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);	2805 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);

2788 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);	2806 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);

2789	2807

2790 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd;	2808 #if HAVE_YASM

2791 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;	2809 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;

	2810 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;

2792	2811

2793 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow;	2812 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_ nornd;

2794 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow;	2813

	2814 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;

	2815 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;

	2816 #endif

2795 }	2817 }

2796	2818

2797	2819

2798 #define H264_QPEL_FUNCS(x, y, CPU)\	2820 #define H264_QPEL_FUNCS(x, y, CPU)\

2799 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\	2821 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\

2800 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\	2822 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\

2801 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\	2823 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\

2802 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU;	2824 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU;

2803 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){	2825 if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){

2804 // these functions are slower than mmx on AMD, but faster on Intel	2826 // these functions are slower than mmx on AMD, but faster on Intel

2805 c->put_pixels_tab[0][0] = put_pixels16_sse2;	2827 c->put_pixels_tab[0][0] = put_pixels16_sse2;

2806 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;	2828 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;

2807 H264_QPEL_FUNCS(0, 0, sse2);	2829 H264_QPEL_FUNCS(0, 0, sse2);

2808 }	2830 }

2809 if(mm_flags & FF_MM_SSE2){	2831 if(mm_flags & AV_CPU_FLAG_SSE2){

2810 H264_QPEL_FUNCS(0, 1, sse2);	2832 H264_QPEL_FUNCS(0, 1, sse2);

2811 H264_QPEL_FUNCS(0, 2, sse2);	2833 H264_QPEL_FUNCS(0, 2, sse2);

2812 H264_QPEL_FUNCS(0, 3, sse2);	2834 H264_QPEL_FUNCS(0, 3, sse2);

2813 H264_QPEL_FUNCS(1, 1, sse2);	2835 H264_QPEL_FUNCS(1, 1, sse2);

2814 H264_QPEL_FUNCS(1, 2, sse2);	2836 H264_QPEL_FUNCS(1, 2, sse2);

2815 H264_QPEL_FUNCS(1, 3, sse2);	2837 H264_QPEL_FUNCS(1, 3, sse2);

2816 H264_QPEL_FUNCS(2, 1, sse2);	2838 H264_QPEL_FUNCS(2, 1, sse2);

2817 H264_QPEL_FUNCS(2, 2, sse2);	2839 H264_QPEL_FUNCS(2, 2, sse2);

2818 H264_QPEL_FUNCS(2, 3, sse2);	2840 H264_QPEL_FUNCS(2, 3, sse2);

2819 H264_QPEL_FUNCS(3, 1, sse2);	2841 H264_QPEL_FUNCS(3, 1, sse2);

2820 H264_QPEL_FUNCS(3, 2, sse2);	2842 H264_QPEL_FUNCS(3, 2, sse2);

2821 H264_QPEL_FUNCS(3, 3, sse2);	2843 H264_QPEL_FUNCS(3, 3, sse2);

2822

2823 if (CONFIG_VP6_DECODER) {

2824 c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;

2825 }

2826 }	2844 }

2827 #if HAVE_SSSE3	2845 #if HAVE_SSSE3

2828 if(mm_flags & FF_MM_SSSE3){	2846 if(mm_flags & AV_CPU_FLAG_SSSE3){

2829 H264_QPEL_FUNCS(1, 0, ssse3);	2847 H264_QPEL_FUNCS(1, 0, ssse3);

2830 H264_QPEL_FUNCS(1, 1, ssse3);	2848 H264_QPEL_FUNCS(1, 1, ssse3);

2831 H264_QPEL_FUNCS(1, 2, ssse3);	2849 H264_QPEL_FUNCS(1, 2, ssse3);

2832 H264_QPEL_FUNCS(1, 3, ssse3);	2850 H264_QPEL_FUNCS(1, 3, ssse3);

2833 H264_QPEL_FUNCS(2, 0, ssse3);	2851 H264_QPEL_FUNCS(2, 0, ssse3);

2834 H264_QPEL_FUNCS(2, 1, ssse3);	2852 H264_QPEL_FUNCS(2, 1, ssse3);

2835 H264_QPEL_FUNCS(2, 2, ssse3);	2853 H264_QPEL_FUNCS(2, 2, ssse3);

2836 H264_QPEL_FUNCS(2, 3, ssse3);	2854 H264_QPEL_FUNCS(2, 3, ssse3);

2837 H264_QPEL_FUNCS(3, 0, ssse3);	2855 H264_QPEL_FUNCS(3, 0, ssse3);

2838 H264_QPEL_FUNCS(3, 1, ssse3);	2856 H264_QPEL_FUNCS(3, 1, ssse3);

2839 H264_QPEL_FUNCS(3, 2, ssse3);	2857 H264_QPEL_FUNCS(3, 2, ssse3);

2840 H264_QPEL_FUNCS(3, 3, ssse3);	2858 H264_QPEL_FUNCS(3, 3, ssse3);

2841 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nor nd;

2842 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nor nd;

2843 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;

2844 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;

2845 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;

2846 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;

2847 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;	2859 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;

2848 #if HAVE_YASM	2860 #if HAVE_YASM

	2861 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_ nornd;

	2862 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_ nornd;

	2863 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;

	2864 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;

	2865 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;

	2866 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;

2849 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;	2867 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;

2850 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe	2868 if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Co nroe

2851 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;	2869 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;

2852 #endif	2870 #endif

2853 }	2871 }

2854 #endif	2872 #endif

2855	2873

2856 if(mm_flags & FF_MM_3DNOW){	2874 if(mm_flags & AV_CPU_FLAG_3DNOW){

2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;	2875 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;

2858 c->vector_fmul = vector_fmul_3dnow;	2876 c->vector_fmul = vector_fmul_3dnow;

2859 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){	2877 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){

2860 c->float_to_int16 = float_to_int16_3dnow;	2878 c->float_to_int16 = float_to_int16_3dnow;

2861 c->float_to_int16_interleave = float_to_int16_interleave_3dnow;	2879 c->float_to_int16_interleave = float_to_int16_interleave_3dnow;

2862 }	2880 }

2863 }	2881 }

2864 if(mm_flags & FF_MM_3DNOWEXT){	2882 if(mm_flags & AV_CPU_FLAG_3DNOWEXT){

2865 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;	2883 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;

2866 c->vector_fmul_window = vector_fmul_window_3dnow2;	2884 c->vector_fmul_window = vector_fmul_window_3dnow2;

2867 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){	2885 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){

2868 c->float_to_int16_interleave = float_to_int16_interleave_3dn2;	2886 c->float_to_int16_interleave = float_to_int16_interleave_3dn2;

2869 }	2887 }

2870 }	2888 }

2871 if(mm_flags & FF_MM_MMX2){	2889 if(mm_flags & AV_CPU_FLAG_MMX2){

2872 #if HAVE_YASM	2890 #if HAVE_YASM

2873 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;	2891 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;

2874 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mm x2;	2892 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mm x2;

2875 #endif	2893 #endif

2876 }	2894 }

2877 if(mm_flags & FF_MM_SSE){	2895 if(mm_flags & AV_CPU_FLAG_SSE){

2878 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;	2896 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;

2879 c->ac3_downmix = ac3_downmix_sse;	2897 c->ac3_downmix = ac3_downmix_sse;

2880 c->vector_fmul = vector_fmul_sse;	2898 c->vector_fmul = vector_fmul_sse;

2881 c->vector_fmul_reverse = vector_fmul_reverse_sse;	2899 c->vector_fmul_reverse = vector_fmul_reverse_sse;

2882 c->vector_fmul_add = vector_fmul_add_sse;	2900 c->vector_fmul_add = vector_fmul_add_sse;

2883 c->vector_fmul_window = vector_fmul_window_sse;	2901 c->vector_fmul_window = vector_fmul_window_sse;

2884 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;	2902 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;

2885 c->vector_clipf = vector_clipf_sse;	2903 c->vector_clipf = vector_clipf_sse;

2886 c->float_to_int16 = float_to_int16_sse;	2904 c->float_to_int16 = float_to_int16_sse;

2887 c->float_to_int16_interleave = float_to_int16_interleave_sse;	2905 c->float_to_int16_interleave = float_to_int16_interleave_sse;

2888 #if HAVE_YASM	2906 #if HAVE_YASM

2889 c->scalarproduct_float = ff_scalarproduct_float_sse;	2907 c->scalarproduct_float = ff_scalarproduct_float_sse;

2890 #endif	2908 #endif

2891 }	2909 }

2892 if(mm_flags & FF_MM_3DNOW)	2910 if(mm_flags & AV_CPU_FLAG_3DNOW)

2893 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse	2911 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse

2894 if(mm_flags & FF_MM_SSE2){	2912 if(mm_flags & AV_CPU_FLAG_SSE2){

2895 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;	2913 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;

2896 c->float_to_int16 = float_to_int16_sse2;	2914 c->float_to_int16 = float_to_int16_sse2;

2897 c->float_to_int16_interleave = float_to_int16_interleave_sse2;	2915 c->float_to_int16_interleave = float_to_int16_interleave_sse2;

2898 #if HAVE_YASM	2916 #if HAVE_YASM

2899 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;	2917 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;

2900 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss e2;	2918 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss e2;

2901 #endif	2919 #endif

2902 }	2920 }

2903 if((mm_flags & FF_MM_SSSE3) && !(mm_flags & (FF_MM_SSE42\|FF_MM_3DNOW)) & & HAVE_YASM) // cachesplit	2921 if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42\|AV_ CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit

2904 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss se3;	2922 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss se3;

2905 }	2923 }

2906	2924

2907 if (CONFIG_ENCODERS)	2925 if (CONFIG_ENCODERS)

2908 dsputilenc_init_mmx(c, avctx);	2926 dsputilenc_init_mmx(c, avctx);

2909	2927

2910 #if 0	2928 #if 0

2911 // for speed testing	2929 // for speed testing

2912 get_pixels = just_return;	2930 get_pixels = just_return;

2913 put_pixels_clamped = just_return;	2931 put_pixels_clamped = just_return;

(...skipping 21 matching lines...) Expand all Loading...
2935	2953

2936 avg_no_rnd_pixels_tab[0] = just_return;	2954 avg_no_rnd_pixels_tab[0] = just_return;

2937 avg_no_rnd_pixels_tab[1] = just_return;	2955 avg_no_rnd_pixels_tab[1] = just_return;

2938 avg_no_rnd_pixels_tab[2] = just_return;	2956 avg_no_rnd_pixels_tab[2] = just_return;

2939 avg_no_rnd_pixels_tab[3] = just_return;	2957 avg_no_rnd_pixels_tab[3] = just_return;

2940	2958

2941 //av_fdct = just_return;	2959 //av_fdct = just_return;

2942 //ff_idct = just_return;	2960 //ff_idct = just_return;

2943 #endif	2961 #endif

2944 }	2962 }

2945

2946 #if CONFIG_H264DSP

2947 void ff_h264dsp_init_x86(H264DSPContext *c)

2948 {

2949 mm_flags = mm_support();

2950

2951 if (mm_flags & FF_MM_MMX) {

2952 c->h264_idct_dc_add=

2953 c->h264_idct_add= ff_h264_idct_add_mmx;

2954 c->h264_idct8_dc_add=

2955 c->h264_idct8_add= ff_h264_idct8_add_mmx;

2956

2957 c->h264_idct_add16 = ff_h264_idct_add16_mmx;

2958 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;

2959 c->h264_idct_add8 = ff_h264_idct_add8_mmx;

2960 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;

2961

2962 if (mm_flags & FF_MM_MMX2) {

2963 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;

2964 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;

2965 c->h264_idct_add16 = ff_h264_idct_add16_mmx2;

2966 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;

2967 c->h264_idct_add8 = ff_h264_idct_add8_mmx2;

2968 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;

2969

2970 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;

2971 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;

2972 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;

2973 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;

2974 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_ mmx2;

2975 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_ mmx2;

2976 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;

2977

2978 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;

2979 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;

2980 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;

2981 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;

2982 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;

2983 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;

2984 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;

2985 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;

2986

2987 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;

2988 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;

2989 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;

2990 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;

2991 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;

2992 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;

2993 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;

2994 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;

2995 }

2996 if(mm_flags & FF_MM_SSE2){

2997 c->h264_idct8_add = ff_h264_idct8_add_sse2;

2998 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;

2999 }

3000

3001 #if HAVE_YASM

3002 if (mm_flags & FF_MM_MMX2){

3003 #if ARCH_X86_32

3004 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxe xt;

3005 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxe xt;

3006 #endif

3007 if( mm_flags&FF_MM_SSE2 ){

3008 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;

3009 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;

3010 #if ARCH_X86_64 \|\| !defined(__ICC) \|\| __ICC > 1110

3011 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;

3012 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;

3013 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_ sse2;

3014 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_ sse2;

3015 #endif

3016 #if CONFIG_GPL

3017 c->h264_idct_add16 = ff_h264_idct_add16_sse2;

3018 c->h264_idct_add8 = ff_h264_idct_add8_sse2;

3019 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;

3020 #endif

3021 }

3022 if ( mm_flags&FF_MM_SSSE3 ){

3023 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;

3024 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;

3025 }

3026 }

3027 #endif

3028 }

3029 }

3030 #endif /* CONFIG_H264DSP */

OLD	NEW

« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h ('k') | source/patched-ffmpeg-mt/libavcodec/x86/dsputilenc_mmx.c » ('j') | no next file with comments »