Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * MMX optimized DSP utils 2 * MMX optimized DSP utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard 3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 * 5 *
6 * This file is part of FFmpeg. 6 * This file is part of FFmpeg.
7 * 7 *
8 * FFmpeg is free software; you can redistribute it and/or 8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public 9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version. 11 * version 2.1 of the License, or (at your option) any later version.
12 * 12 *
13 * FFmpeg is distributed in the hope that it will be useful, 13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details. 16 * Lesser General Public License for more details.
17 * 17 *
18 * You should have received a copy of the GNU Lesser General Public 18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software 19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 * 21 *
22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
23 */ 23 */
24 24
25 #include "libavutil/cpu.h"
25 #include "libavutil/x86_cpu.h" 26 #include "libavutil/x86_cpu.h"
26 #include "libavcodec/dsputil.h" 27 #include "libavcodec/dsputil.h"
27 #include "libavcodec/h264dsp.h" 28 #include "libavcodec/h264dsp.h"
28 #include "libavcodec/mpegvideo.h" 29 #include "libavcodec/mpegvideo.h"
29 #include "libavcodec/simple_idct.h" 30 #include "libavcodec/simple_idct.h"
30 #include "dsputil_mmx.h" 31 #include "dsputil_mmx.h"
31 #include "vp3dsp_mmx.h"
32 #include "vp3dsp_sse2.h"
33 #include "vp6dsp_mmx.h"
34 #include "vp6dsp_sse2.h"
35 #include "idct_xvid.h" 32 #include "idct_xvid.h"
36 33
37 //#undef NDEBUG 34 //#undef NDEBUG
38 //#include <assert.h> 35 //#include <assert.h>
39 36
40 int mm_flags; /* multimedia extension flags */
41
42 /* pixel operations */ 37 /* pixel operations */
43 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; 38 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
44 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; 39 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
45 40
46 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = 41 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
47 {0x8000000080000000ULL, 0x8000000080000000ULL}; 42 {0x8000000080000000ULL, 0x8000000080000000ULL};
48 43
49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; 44 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x00040 00400040004ULL}; 45 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x00040 00400040004ULL};
51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050 00500050005ULL}; 46 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050 00500050005ULL};
52 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080 00800080008ULL}; 47 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080 00800080008ULL};
53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x00090 00900090009ULL}; 48 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x00090 00900090009ULL};
54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; 49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100 01000100010ULL}; 50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100 01000100010ULL};
56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x00120 01200120012ULL}; 51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x00120 01200120012ULL};
57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; 52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B0 01B001B001BULL}; 53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B0 01B001B001BULL};
59 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0 01C001C001CULL}; 54 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0 01C001C001CULL};
60 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200 02000200020ULL}; 55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200 02000200020ULL};
61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; 56 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
62 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL; 57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL;
63 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F0 03F003F003FULL}; 58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F0 03F003F003FULL};
64 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 04000400040ULL}; 59 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 04000400040ULL};
65 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; 60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
66 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; 61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
67 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; 62 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
68 63
64 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x00000 00000000000ULL};
69 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x01010 10101010101ULL}; 65 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x01010 10101010101ULL};
70 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x03030 30303030303ULL}; 66 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x03030 30303030303ULL};
71 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x04040 40404040404ULL}; 67 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x04040 40404040404ULL};
72 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; 68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
73 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; 69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
74 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; 70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
75 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x80808 08080808080ULL}; 71 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x80808 08080808080ULL};
76 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; 72 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
77 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; 73 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1 ) = {0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A 1A1A1A1A1A1ULL};
78 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F 8F8F8F8F8F8ULL}; 74 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F 8F8F8F8F8F8ULL};
79 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; 75 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
80 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF EFEFEFEFEFEULL}; 76 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF EFEFEFEFEFEULL};
81 77
82 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; 78 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
83 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; 79 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
84 80
85 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) 81 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
86 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) 82 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
87 83
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
225 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx 221 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
226 #define put_pixels16_3dnow put_pixels16_mmx 222 #define put_pixels16_3dnow put_pixels16_mmx
227 #define put_pixels8_3dnow put_pixels8_mmx 223 #define put_pixels8_3dnow put_pixels8_mmx
228 #define put_pixels4_3dnow put_pixels4_mmx 224 #define put_pixels4_3dnow put_pixels4_mmx
229 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx 225 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
230 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx 226 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
231 227
232 /***********************************/ 228 /***********************************/
233 /* standard MMX */ 229 /* standard MMX */
234 230
235 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size ) 231 void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s ize)
236 { 232 {
237 const DCTELEM *p; 233 const DCTELEM *p;
238 uint8_t *pix; 234 uint8_t *pix;
239 235
240 /* read the pixels */ 236 /* read the pixels */
241 p = block; 237 p = block;
242 pix = pixels; 238 pix = pixels;
243 /* unrolled loop */ 239 /* unrolled loop */
244 __asm__ volatile( 240 __asm__ volatile(
245 "movq %3, %%mm0 \n\t" 241 "movq %3, %%mm0 \n\t"
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 "packsswb 56+"#off"(%2), %%mm4 \n\t"\ 297 "packsswb 56+"#off"(%2), %%mm4 \n\t"\
302 "paddb %%mm0, %%mm1 \n\t"\ 298 "paddb %%mm0, %%mm1 \n\t"\
303 "paddb %%mm0, %%mm2 \n\t"\ 299 "paddb %%mm0, %%mm2 \n\t"\
304 "paddb %%mm0, %%mm3 \n\t"\ 300 "paddb %%mm0, %%mm3 \n\t"\
305 "paddb %%mm0, %%mm4 \n\t"\ 301 "paddb %%mm0, %%mm4 \n\t"\
306 "movq %%mm1, (%0) \n\t"\ 302 "movq %%mm1, (%0) \n\t"\
307 "movq %%mm2, (%0, %3) \n\t"\ 303 "movq %%mm2, (%0, %3) \n\t"\
308 "movq %%mm3, (%0, %3, 2) \n\t"\ 304 "movq %%mm3, (%0, %3, 2) \n\t"\
309 "movq %%mm4, (%0, %1) \n\t" 305 "movq %%mm4, (%0, %1) \n\t"
310 306
311 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int li ne_size) 307 void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
312 { 308 {
313 x86_reg line_skip = line_size; 309 x86_reg line_skip = line_size;
314 x86_reg line_skip3; 310 x86_reg line_skip3;
315 311
316 __asm__ volatile ( 312 __asm__ volatile (
317 "movq "MANGLE(ff_vector128)", %%mm0 \n\t" 313 "movq "MANGLE(ff_vector128)", %%mm0 \n\t"
318 "lea (%3, %3, 2), %1 \n\t" 314 "lea (%3, %3, 2), %1 \n\t"
319 put_signed_pixels_clamped_mmx_half(0) 315 put_signed_pixels_clamped_mmx_half(0)
320 "lea (%0, %3, 4), %0 \n\t" 316 "lea (%0, %3, 4), %0 \n\t"
321 put_signed_pixels_clamped_mmx_half(64) 317 put_signed_pixels_clamped_mmx_half(64)
322 :"+&r" (pixels), "=&r" (line_skip3) 318 :"+&r" (pixels), "=&r" (line_skip3)
323 :"r" (block), "r"(line_skip) 319 :"r" (block), "r"(line_skip)
324 :"memory"); 320 :"memory");
325 } 321 }
326 322
327 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size ) 323 void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s ize)
328 { 324 {
329 const DCTELEM *p; 325 const DCTELEM *p;
330 uint8_t *pix; 326 uint8_t *pix;
331 int i; 327 int i;
332 328
333 /* read the pixels */ 329 /* read the pixels */
334 p = block; 330 p = block;
335 pix = pixels; 331 pix = pixels;
336 MOVQ_ZERO(mm7); 332 MOVQ_ZERO(mm7);
337 i = 4; 333 i = 4;
(...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 "movq %%mm6, %3 \n\t" 721 "movq %%mm6, %3 \n\t"
726 : "+m" (*(uint64_t*)(src - 2*stride)), 722 : "+m" (*(uint64_t*)(src - 2*stride)),
727 "+m" (*(uint64_t*)(src - 1*stride)), 723 "+m" (*(uint64_t*)(src - 1*stride)),
728 "+m" (*(uint64_t*)(src + 0*stride)), 724 "+m" (*(uint64_t*)(src + 0*stride)),
729 "+m" (*(uint64_t*)(src + 1*stride)) 725 "+m" (*(uint64_t*)(src + 1*stride))
730 : "g" (2*strength), "m"(ff_pb_FC) 726 : "g" (2*strength), "m"(ff_pb_FC)
731 ); 727 );
732 } 728 }
733 } 729 }
734 730
735 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
736 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
737 "movd %4, %%mm0 \n\t"
738 "movd %5, %%mm1 \n\t"
739 "movd %6, %%mm2 \n\t"
740 "movd %7, %%mm3 \n\t"
741 "punpcklbw %%mm1, %%mm0 \n\t"
742 "punpcklbw %%mm3, %%mm2 \n\t"
743 "movq %%mm0, %%mm1 \n\t"
744 "punpcklwd %%mm2, %%mm0 \n\t"
745 "punpckhwd %%mm2, %%mm1 \n\t"
746 "movd %%mm0, %0 \n\t"
747 "punpckhdq %%mm0, %%mm0 \n\t"
748 "movd %%mm0, %1 \n\t"
749 "movd %%mm1, %2 \n\t"
750 "punpckhdq %%mm1, %%mm1 \n\t"
751 "movd %%mm1, %3 \n\t"
752
753 : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
754 "=m" (*(uint32_t*)(dst + 1*dst_stride)),
755 "=m" (*(uint32_t*)(dst + 2*dst_stride)),
756 "=m" (*(uint32_t*)(dst + 3*dst_stride))
757 : "m" (*(uint32_t*)(src + 0*src_stride)),
758 "m" (*(uint32_t*)(src + 1*src_stride)),
759 "m" (*(uint32_t*)(src + 2*src_stride)),
760 "m" (*(uint32_t*)(src + 3*src_stride))
761 );
762 }
763
764 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ 731 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
765 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 732 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
766 const int strength= ff_h263_loop_filter_strength[qscale]; 733 const int strength= ff_h263_loop_filter_strength[qscale];
767 DECLARE_ALIGNED(8, uint64_t, temp)[4]; 734 DECLARE_ALIGNED(8, uint64_t, temp)[4];
768 uint8_t *btemp= (uint8_t*)temp; 735 uint8_t *btemp= (uint8_t*)temp;
769 736
770 src -= 2; 737 src -= 2;
771 738
772 transpose4x4(btemp , src , 8, stride); 739 transpose4x4(btemp , src , 8, stride);
773 transpose4x4(btemp+4, src + 4*stride, 8, stride); 740 transpose4x4(btemp+4, src + 4*stride, 8, stride);
(...skipping 1048 matching lines...) Expand 10 before | Expand all | Expand 10 after
1822 const uint8_t *p= mem;\ 1789 const uint8_t *p= mem;\
1823 do{\ 1790 do{\
1824 __asm__ volatile(#op" %0" :: "m"(*p));\ 1791 __asm__ volatile(#op" %0" :: "m"(*p));\
1825 p+= stride;\ 1792 p+= stride;\
1826 }while(--h);\ 1793 }while(--h);\
1827 } 1794 }
1828 PREFETCH(prefetch_mmx2, prefetcht0) 1795 PREFETCH(prefetch_mmx2, prefetcht0)
1829 PREFETCH(prefetch_3dnow, prefetch) 1796 PREFETCH(prefetch_3dnow, prefetch)
1830 #undef PREFETCH 1797 #undef PREFETCH
1831 1798
1832 #include "h264dsp_mmx.c" 1799 #include "h264_qpel_mmx.c"
1833 #include "rv40dsp_mmx.c" 1800
1801 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src,
1802 int stride, int h, int x, int y);
1803 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src,
1804 int stride, int h, int x, int y);
1805 void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src,
1806 int stride, int h, int x, int y);
1807 void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src,
1808 int stride, int h, int x, int y);
1809 void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
1810 int stride, int h, int x, int y);
1811 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src,
1812 int stride, int h, int x, int y);
1813 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
1814 int stride, int h, int x, int y);
1815 void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
1816 int stride, int h, int x, int y);
1817 void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src,
1818 int stride, int h, int x, int y);
1819
1820 void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
1821 int stride, int h, int x, int y);
1822 void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
1823 int stride, int h, int x, int y);
1824 void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
1825 int stride, int h, int x, int y);
1826 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src,
1827 int stride, int h, int x, int y);
1828 void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
1829 int stride, int h, int x, int y);
1830 void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
1831 int stride, int h, int x, int y);
1832
1833 void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
1834 int stride, int h, int x, int y);
1835 void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src,
1836 int stride, int h, int x, int y);
1837
1838 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1839 int stride, int h, int x, int y);
1840 void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1841 int stride, int h, int x, int y);
1842 void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
1843 int stride, int h, int x, int y);
1844
1845 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1846 int stride, int h, int x, int y);
1847 void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1848 int stride, int h, int x, int y);
1849 void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
1850 int stride, int h, int x, int y);
1851
1834 1852
1835 /* CAVS specific */ 1853 /* CAVS specific */
1836 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1854 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1837 put_pixels8_mmx(dst, src, stride, 8); 1855 put_pixels8_mmx(dst, src, stride, 8);
1838 } 1856 }
1839 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1857 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1840 avg_pixels8_mmx(dst, src, stride, 8); 1858 avg_pixels8_mmx(dst, src, stride, 8);
1841 } 1859 }
1842 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1860 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1843 put_pixels16_mmx(dst, src, stride, 16); 1861 put_pixels16_mmx(dst, src, stride, 16);
1844 } 1862 }
1845 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1863 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1846 avg_pixels16_mmx(dst, src, stride, 16); 1864 avg_pixels16_mmx(dst, src, stride, 16);
1847 } 1865 }
1848 1866
1849 /* VC1 specific */ 1867 /* VC1 specific */
1850 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { 1868 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
1851 put_pixels8_mmx(dst, src, stride, 8); 1869 put_pixels8_mmx(dst, src, stride, 8);
1852 } 1870 }
1853 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in t rnd) { 1871 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in t rnd) {
1854 avg_pixels8_mmx2(dst, src, stride, 8); 1872 avg_pixels8_mmx2(dst, src, stride, 8);
1855 } 1873 }
1856 1874
1857 /* XXX: those functions should be suppressed ASAP when all IDCTs are 1875 /* XXX: those functions should be suppressed ASAP when all IDCTs are
1858 converted */ 1876 converted */
1859 #if CONFIG_GPL 1877 #if CONFIG_GPL
1860 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block ) 1878 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block )
1861 { 1879 {
1862 ff_mmx_idct (block); 1880 ff_mmx_idct (block);
1863 put_pixels_clamped_mmx(block, dest, line_size); 1881 ff_put_pixels_clamped_mmx(block, dest, line_size);
1864 } 1882 }
1865 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block ) 1883 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block )
1866 { 1884 {
1867 ff_mmx_idct (block); 1885 ff_mmx_idct (block);
1868 add_pixels_clamped_mmx(block, dest, line_size); 1886 ff_add_pixels_clamped_mmx(block, dest, line_size);
1869 } 1887 }
1870 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *bloc k) 1888 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *bloc k)
1871 { 1889 {
1872 ff_mmxext_idct (block); 1890 ff_mmxext_idct (block);
1873 put_pixels_clamped_mmx(block, dest, line_size); 1891 ff_put_pixels_clamped_mmx(block, dest, line_size);
1874 } 1892 }
1875 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *bloc k) 1893 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *bloc k)
1876 { 1894 {
1877 ff_mmxext_idct (block); 1895 ff_mmxext_idct (block);
1878 add_pixels_clamped_mmx(block, dest, line_size); 1896 ff_add_pixels_clamped_mmx(block, dest, line_size);
1879 } 1897 }
1880 #endif 1898 #endif
1881 static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block) 1899 static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
1882 { 1900 {
1883 ff_idct_xvid_mmx (block); 1901 ff_idct_xvid_mmx (block);
1884 put_pixels_clamped_mmx(block, dest, line_size); 1902 ff_put_pixels_clamped_mmx(block, dest, line_size);
1885 } 1903 }
1886 static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) 1904 static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
1887 { 1905 {
1888 ff_idct_xvid_mmx (block); 1906 ff_idct_xvid_mmx (block);
1889 add_pixels_clamped_mmx(block, dest, line_size); 1907 ff_add_pixels_clamped_mmx(block, dest, line_size);
1890 } 1908 }
1891 static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) 1909 static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
1892 { 1910 {
1893 ff_idct_xvid_mmx2 (block); 1911 ff_idct_xvid_mmx2 (block);
1894 put_pixels_clamped_mmx(block, dest, line_size); 1912 ff_put_pixels_clamped_mmx(block, dest, line_size);
1895 } 1913 }
1896 static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) 1914 static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
1897 { 1915 {
1898 ff_idct_xvid_mmx2 (block); 1916 ff_idct_xvid_mmx2 (block);
1899 add_pixels_clamped_mmx(block, dest, line_size); 1917 ff_add_pixels_clamped_mmx(block, dest, line_size);
1900 } 1918 }
1901 1919
1902 static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) 1920 static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
1903 { 1921 {
1904 int i; 1922 int i;
1905 __asm__ volatile("pxor %%mm7, %%mm7":); 1923 __asm__ volatile("pxor %%mm7, %%mm7":);
1906 for(i=0; i<blocksize; i+=2) { 1924 for(i=0; i<blocksize; i+=2) {
1907 __asm__ volatile( 1925 __asm__ volatile(
1908 "movq %0, %%mm0 \n\t" 1926 "movq %0, %%mm0 \n\t"
1909 "movq %1, %%mm1 \n\t" 1927 "movq %1, %%mm1 \n\t"
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after
2378 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t" 2396 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t"
2379 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t" 2397 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"
2380 "packssdw %%xmm1 , %%xmm0 \n\t" 2398 "packssdw %%xmm1 , %%xmm0 \n\t"
2381 "movdqa %%xmm0 , (%1,%0) \n\t" 2399 "movdqa %%xmm0 , (%1,%0) \n\t"
2382 "add $16 , %0 \n\t" 2400 "add $16 , %0 \n\t"
2383 " js 1b \n\t" 2401 " js 1b \n\t"
2384 :"+r"(reglen), "+r"(dst), "+r"(src) 2402 :"+r"(reglen), "+r"(dst), "+r"(src)
2385 ); 2403 );
2386 } 2404 }
2387 2405
2406 void ff_vp3_idct_mmx(int16_t *input_data);
2407 void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
2408 void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
2409
2410 void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block) ;
2411
2412 void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
2413 void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
2414
2415 void ff_vp3_idct_sse2(int16_t *input_data);
2416 void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
2417 void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
2418
2388 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) ; 2419 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) ;
2389 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int le n); 2420 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int le n);
2390 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len ); 2421 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len );
2391 int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int or der, int shift); 2422 int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int or der, int shift);
2392 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or der, int shift); 2423 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or der, int shift);
2393 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, con st int16_t *v3, int order, int mul); 2424 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, con st int16_t *v3, int order, int mul);
2394 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, con st int16_t *v3, int order, int mul); 2425 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, con st int16_t *v3, int order, int mul);
2395 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, co nst int16_t *v3, int order, int mul); 2426 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, co nst int16_t *v3, int order, int mul);
2396 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top); 2427 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
2397 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left); 2428 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w, int left);
2398 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i nt left); 2429 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i nt left);
2399 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
2400 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
2401 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, in t beta);
2402 void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
2403 void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
2404 2430
2405 #if HAVE_YASM && ARCH_X86_32 2431 #if !HAVE_YASM
2406 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, i nt beta);
2407 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al pha, int beta)
2408 {
2409 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
2410 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
2411 }
2412 #elif !HAVE_YASM
2413 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6) 2432 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6)
2414 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6) 2433 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)
2415 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6) 2434 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)
2416 #endif 2435 #endif
2417 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse 2436 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
2418 2437
2419 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ 2438 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
2420 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\ 2439 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\
2421 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ 2440 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
2422 DECLARE_ALIGNED(16, int16_t, tmp)[len];\ 2441 DECLARE_ALIGNED(16, int16_t, tmp)[len];\
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
2502 if(channels==6) 2521 if(channels==6)
2503 ff_float_to_int16_interleave6_3dn2(dst, src, len); 2522 ff_float_to_int16_interleave6_3dn2(dst, src, len);
2504 else 2523 else
2505 float_to_int16_interleave_3dnow(dst, src, len, channels); 2524 float_to_int16_interleave_3dnow(dst, src, len, channels);
2506 } 2525 }
2507 2526
2508 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); 2527 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2509 2528
2510 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) 2529 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2511 { 2530 {
2512 mm_flags = mm_support(); 2531 int mm_flags = av_get_cpu_flags();
2513 2532
2514 if (avctx->dsp_mask) { 2533 if (avctx->dsp_mask) {
2515 if (avctx->dsp_mask & FF_MM_FORCE) 2534 if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
2516 mm_flags |= (avctx->dsp_mask & 0xffff); 2535 mm_flags |= (avctx->dsp_mask & 0xffff);
2517 else 2536 else
2518 mm_flags &= ~(avctx->dsp_mask & 0xffff); 2537 mm_flags &= ~(avctx->dsp_mask & 0xffff);
2519 } 2538 }
2520 2539
2521 #if 0 2540 #if 0
2522 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); 2541 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
2523 if (mm_flags & FF_MM_MMX) 2542 if (mm_flags & AV_CPU_FLAG_MMX)
2524 av_log(avctx, AV_LOG_INFO, " mmx"); 2543 av_log(avctx, AV_LOG_INFO, " mmx");
2525 if (mm_flags & FF_MM_MMX2) 2544 if (mm_flags & AV_CPU_FLAG_MMX2)
2526 av_log(avctx, AV_LOG_INFO, " mmx2"); 2545 av_log(avctx, AV_LOG_INFO, " mmx2");
2527 if (mm_flags & FF_MM_3DNOW) 2546 if (mm_flags & AV_CPU_FLAG_3DNOW)
2528 av_log(avctx, AV_LOG_INFO, " 3dnow"); 2547 av_log(avctx, AV_LOG_INFO, " 3dnow");
2529 if (mm_flags & FF_MM_SSE) 2548 if (mm_flags & AV_CPU_FLAG_SSE)
2530 av_log(avctx, AV_LOG_INFO, " sse"); 2549 av_log(avctx, AV_LOG_INFO, " sse");
2531 if (mm_flags & FF_MM_SSE2) 2550 if (mm_flags & AV_CPU_FLAG_SSE2)
2532 av_log(avctx, AV_LOG_INFO, " sse2"); 2551 av_log(avctx, AV_LOG_INFO, " sse2");
2533 av_log(avctx, AV_LOG_INFO, "\n"); 2552 av_log(avctx, AV_LOG_INFO, "\n");
2534 #endif 2553 #endif
2535 2554
2536 if (mm_flags & FF_MM_MMX) { 2555 if (mm_flags & AV_CPU_FLAG_MMX) {
2537 const int idct_algo= avctx->idct_algo; 2556 const int idct_algo= avctx->idct_algo;
2538 2557
2539 if(avctx->lowres==0){ 2558 if(avctx->lowres==0){
2540 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ 2559 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
2541 c->idct_put= ff_simple_idct_put_mmx; 2560 c->idct_put= ff_simple_idct_put_mmx;
2542 c->idct_add= ff_simple_idct_add_mmx; 2561 c->idct_add= ff_simple_idct_add_mmx;
2543 c->idct = ff_simple_idct_mmx; 2562 c->idct = ff_simple_idct_mmx;
2544 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; 2563 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
2545 #if CONFIG_GPL 2564 #if CONFIG_GPL
2546 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ 2565 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
2547 if(mm_flags & FF_MM_MMX2){ 2566 if(mm_flags & AV_CPU_FLAG_MMX2){
2548 c->idct_put= ff_libmpeg2mmx2_idct_put; 2567 c->idct_put= ff_libmpeg2mmx2_idct_put;
2549 c->idct_add= ff_libmpeg2mmx2_idct_add; 2568 c->idct_add= ff_libmpeg2mmx2_idct_add;
2550 c->idct = ff_mmxext_idct; 2569 c->idct = ff_mmxext_idct;
2551 }else{ 2570 }else{
2552 c->idct_put= ff_libmpeg2mmx_idct_put; 2571 c->idct_put= ff_libmpeg2mmx_idct_put;
2553 c->idct_add= ff_libmpeg2mmx_idct_add; 2572 c->idct_add= ff_libmpeg2mmx_idct_add;
2554 c->idct = ff_mmx_idct; 2573 c->idct = ff_mmx_idct;
2555 } 2574 }
2556 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; 2575 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2557 #endif 2576 #endif
2558 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DEC ODER) && 2577 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DEC ODER) &&
2559 idct_algo==FF_IDCT_VP3){ 2578 idct_algo==FF_IDCT_VP3 && HAVE_YASM){
2560 if(mm_flags & FF_MM_SSE2){ 2579 if(mm_flags & AV_CPU_FLAG_SSE2){
2561 c->idct_put= ff_vp3_idct_put_sse2; 2580 c->idct_put= ff_vp3_idct_put_sse2;
2562 c->idct_add= ff_vp3_idct_add_sse2; 2581 c->idct_add= ff_vp3_idct_add_sse2;
2563 c->idct = ff_vp3_idct_sse2; 2582 c->idct = ff_vp3_idct_sse2;
2564 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; 2583 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
2565 }else{ 2584 }else{
2566 c->idct_put= ff_vp3_idct_put_mmx; 2585 c->idct_put= ff_vp3_idct_put_mmx;
2567 c->idct_add= ff_vp3_idct_add_mmx; 2586 c->idct_add= ff_vp3_idct_add_mmx;
2568 c->idct = ff_vp3_idct_mmx; 2587 c->idct = ff_vp3_idct_mmx;
2569 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; 2588 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
2570 } 2589 }
2571 }else if(idct_algo==FF_IDCT_CAVS){ 2590 }else if(idct_algo==FF_IDCT_CAVS){
2572 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; 2591 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
2573 }else if(idct_algo==FF_IDCT_XVIDMMX){ 2592 }else if(idct_algo==FF_IDCT_XVIDMMX){
2574 if(mm_flags & FF_MM_SSE2){ 2593 if(mm_flags & AV_CPU_FLAG_SSE2){
2575 c->idct_put= ff_idct_xvid_sse2_put; 2594 c->idct_put= ff_idct_xvid_sse2_put;
2576 c->idct_add= ff_idct_xvid_sse2_add; 2595 c->idct_add= ff_idct_xvid_sse2_add;
2577 c->idct = ff_idct_xvid_sse2; 2596 c->idct = ff_idct_xvid_sse2;
2578 c->idct_permutation_type= FF_SSE2_IDCT_PERM; 2597 c->idct_permutation_type= FF_SSE2_IDCT_PERM;
2579 }else if(mm_flags & FF_MM_MMX2){ 2598 }else if(mm_flags & AV_CPU_FLAG_MMX2){
2580 c->idct_put= ff_idct_xvid_mmx2_put; 2599 c->idct_put= ff_idct_xvid_mmx2_put;
2581 c->idct_add= ff_idct_xvid_mmx2_add; 2600 c->idct_add= ff_idct_xvid_mmx2_add;
2582 c->idct = ff_idct_xvid_mmx2; 2601 c->idct = ff_idct_xvid_mmx2;
2583 }else{ 2602 }else{
2584 c->idct_put= ff_idct_xvid_mmx_put; 2603 c->idct_put= ff_idct_xvid_mmx_put;
2585 c->idct_add= ff_idct_xvid_mmx_add; 2604 c->idct_add= ff_idct_xvid_mmx_add;
2586 c->idct = ff_idct_xvid_mmx; 2605 c->idct = ff_idct_xvid_mmx;
2587 } 2606 }
2588 } 2607 }
2589 } 2608 }
2590 2609
2591 c->put_pixels_clamped = put_pixels_clamped_mmx; 2610 c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
2592 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; 2611 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
2593 c->add_pixels_clamped = add_pixels_clamped_mmx; 2612 c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
2594 c->clear_block = clear_block_mmx; 2613 c->clear_block = clear_block_mmx;
2595 c->clear_blocks = clear_blocks_mmx; 2614 c->clear_blocks = clear_blocks_mmx;
2596 if ((mm_flags & FF_MM_SSE) && 2615 if ((mm_flags & AV_CPU_FLAG_SSE) &&
2597 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){ 2616 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){
2598 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ 2617 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
2599 c->clear_block = clear_block_sse; 2618 c->clear_block = clear_block_sse;
2600 c->clear_blocks = clear_blocks_sse; 2619 c->clear_blocks = clear_blocks_sse;
2601 } 2620 }
2602 2621
2603 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ 2622 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
2604 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ 2623 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
2605 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ 2624 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
2606 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ 2625 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
(...skipping 12 matching lines...) Expand all
2619 2638
2620 c->add_bytes= add_bytes_mmx; 2639 c->add_bytes= add_bytes_mmx;
2621 c->add_bytes_l2= add_bytes_l2_mmx; 2640 c->add_bytes_l2= add_bytes_l2_mmx;
2622 2641
2623 c->draw_edges = draw_edges_mmx; 2642 c->draw_edges = draw_edges_mmx;
2624 2643
2625 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { 2644 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2626 c->h263_v_loop_filter= h263_v_loop_filter_mmx; 2645 c->h263_v_loop_filter= h263_v_loop_filter_mmx;
2627 c->h263_h_loop_filter= h263_h_loop_filter_mmx; 2646 c->h263_h_loop_filter= h263_h_loop_filter_mmx;
2628 } 2647 }
2629 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;
2630 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
2631 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd;
2632 2648
2633 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; 2649 #if HAVE_YASM
2634 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; 2650 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
2651 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2652 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
2635 2653
2636 if (CONFIG_VP6_DECODER) { 2654 c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2637 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; 2655 c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
2638 } 2656 #endif
2639 2657
2640 if (mm_flags & FF_MM_MMX2) { 2658 if (mm_flags & AV_CPU_FLAG_MMX2) {
2641 c->prefetch = prefetch_mmx2; 2659 c->prefetch = prefetch_mmx2;
2642 2660
2643 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; 2661 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2644 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; 2662 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
2645 2663
2646 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; 2664 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
2647 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; 2665 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
2648 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; 2666 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
2649 2667
2650 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; 2668 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
2651 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; 2669 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
2652 2670
2653 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; 2671 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
2654 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; 2672 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
2655 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; 2673 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
2656 2674
2657 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2675 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2658 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; 2676 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
2659 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; 2677 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
2660 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; 2678 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
2661 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; 2679 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
2662 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; 2680 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
2663 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; 2681 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
2664 2682
2665 if (CONFIG_VP3_DECODER) { 2683 if (CONFIG_VP3_DECODER && HAVE_YASM) {
2666 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; 2684 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
2667 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2; 2685 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
2668 } 2686 }
2669 } 2687 }
2670 if (CONFIG_VP3_DECODER) { 2688 if (CONFIG_VP3_DECODER && HAVE_YASM) {
2671 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; 2689 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
2672 } 2690 }
2673 2691
2674 if (CONFIG_VP3_DECODER 2692 if (CONFIG_VP3_DECODER
2675 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ ID_THEORA)) { 2693 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ ID_THEORA)) {
2676 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx 2; 2694 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx 2;
2677 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx 2; 2695 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx 2;
2678 } 2696 }
2679 2697
2680 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \ 2698 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
(...skipping 26 matching lines...) Expand all
2707 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); 2725 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
2708 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); 2726 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
2709 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); 2727 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
2710 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); 2728 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
2711 2729
2712 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); 2730 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
2713 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); 2731 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
2714 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); 2732 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
2715 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); 2733 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
2716 2734
2717 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2; 2735 #if HAVE_YASM
2718 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2; 2736 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2737 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2719 2738
2720 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_norn d; 2739 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_n ornd;
2721 2740
2722 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd; 2741 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2723 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; 2742 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
2724 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; 2743 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
2725 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; 2744 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
2726 2745
2727 #if HAVE_YASM
2728 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; 2746 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
2729 #endif 2747 #endif
2730 #if HAVE_7REGS && HAVE_TEN_OPERANDS 2748 #if HAVE_7REGS && HAVE_TEN_OPERANDS
2731 if( mm_flags&FF_MM_3DNOW ) 2749 if( mm_flags&AV_CPU_FLAG_3DNOW )
2732 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; 2750 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
2733 #endif 2751 #endif
2734 2752
2735 if (CONFIG_VC1_DECODER) 2753 if (CONFIG_VC1_DECODER)
2736 ff_vc1dsp_init_mmx(c, avctx); 2754 ff_vc1dsp_init_mmx(c, avctx);
2737 2755
2738 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; 2756 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
2739 } else if (mm_flags & FF_MM_3DNOW) { 2757 } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
2740 c->prefetch = prefetch_3dnow; 2758 c->prefetch = prefetch_3dnow;
2741 2759
2742 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; 2760 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
2743 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; 2761 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
2744 2762
2745 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; 2763 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
2746 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; 2764 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
2747 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; 2765 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
2748 2766
2749 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; 2767 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
(...skipping 30 matching lines...) Expand all
2780 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); 2798 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
2781 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); 2799 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
2782 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); 2800 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
2783 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); 2801 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
2784 2802
2785 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); 2803 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
2786 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); 2804 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
2787 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); 2805 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
2788 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); 2806 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
2789 2807
2790 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd; 2808 #if HAVE_YASM
2791 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; 2809 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
2810 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2792 2811
2793 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow; 2812 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_ nornd;
2794 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow; 2813
2814 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2815 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
2816 #endif
2795 } 2817 }
2796 2818
2797 2819
2798 #define H264_QPEL_FUNCS(x, y, CPU)\ 2820 #define H264_QPEL_FUNCS(x, y, CPU)\
2799 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\ 2821 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\
2800 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\ 2822 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\
2801 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\ 2823 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\
2802 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU; 2824 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU;
2803 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ 2825 if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
2804 // these functions are slower than mmx on AMD, but faster on Intel 2826 // these functions are slower than mmx on AMD, but faster on Intel
2805 c->put_pixels_tab[0][0] = put_pixels16_sse2; 2827 c->put_pixels_tab[0][0] = put_pixels16_sse2;
2806 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; 2828 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2807 H264_QPEL_FUNCS(0, 0, sse2); 2829 H264_QPEL_FUNCS(0, 0, sse2);
2808 } 2830 }
2809 if(mm_flags & FF_MM_SSE2){ 2831 if(mm_flags & AV_CPU_FLAG_SSE2){
2810 H264_QPEL_FUNCS(0, 1, sse2); 2832 H264_QPEL_FUNCS(0, 1, sse2);
2811 H264_QPEL_FUNCS(0, 2, sse2); 2833 H264_QPEL_FUNCS(0, 2, sse2);
2812 H264_QPEL_FUNCS(0, 3, sse2); 2834 H264_QPEL_FUNCS(0, 3, sse2);
2813 H264_QPEL_FUNCS(1, 1, sse2); 2835 H264_QPEL_FUNCS(1, 1, sse2);
2814 H264_QPEL_FUNCS(1, 2, sse2); 2836 H264_QPEL_FUNCS(1, 2, sse2);
2815 H264_QPEL_FUNCS(1, 3, sse2); 2837 H264_QPEL_FUNCS(1, 3, sse2);
2816 H264_QPEL_FUNCS(2, 1, sse2); 2838 H264_QPEL_FUNCS(2, 1, sse2);
2817 H264_QPEL_FUNCS(2, 2, sse2); 2839 H264_QPEL_FUNCS(2, 2, sse2);
2818 H264_QPEL_FUNCS(2, 3, sse2); 2840 H264_QPEL_FUNCS(2, 3, sse2);
2819 H264_QPEL_FUNCS(3, 1, sse2); 2841 H264_QPEL_FUNCS(3, 1, sse2);
2820 H264_QPEL_FUNCS(3, 2, sse2); 2842 H264_QPEL_FUNCS(3, 2, sse2);
2821 H264_QPEL_FUNCS(3, 3, sse2); 2843 H264_QPEL_FUNCS(3, 3, sse2);
2822
2823 if (CONFIG_VP6_DECODER) {
2824 c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
2825 }
2826 } 2844 }
2827 #if HAVE_SSSE3 2845 #if HAVE_SSSE3
2828 if(mm_flags & FF_MM_SSSE3){ 2846 if(mm_flags & AV_CPU_FLAG_SSSE3){
2829 H264_QPEL_FUNCS(1, 0, ssse3); 2847 H264_QPEL_FUNCS(1, 0, ssse3);
2830 H264_QPEL_FUNCS(1, 1, ssse3); 2848 H264_QPEL_FUNCS(1, 1, ssse3);
2831 H264_QPEL_FUNCS(1, 2, ssse3); 2849 H264_QPEL_FUNCS(1, 2, ssse3);
2832 H264_QPEL_FUNCS(1, 3, ssse3); 2850 H264_QPEL_FUNCS(1, 3, ssse3);
2833 H264_QPEL_FUNCS(2, 0, ssse3); 2851 H264_QPEL_FUNCS(2, 0, ssse3);
2834 H264_QPEL_FUNCS(2, 1, ssse3); 2852 H264_QPEL_FUNCS(2, 1, ssse3);
2835 H264_QPEL_FUNCS(2, 2, ssse3); 2853 H264_QPEL_FUNCS(2, 2, ssse3);
2836 H264_QPEL_FUNCS(2, 3, ssse3); 2854 H264_QPEL_FUNCS(2, 3, ssse3);
2837 H264_QPEL_FUNCS(3, 0, ssse3); 2855 H264_QPEL_FUNCS(3, 0, ssse3);
2838 H264_QPEL_FUNCS(3, 1, ssse3); 2856 H264_QPEL_FUNCS(3, 1, ssse3);
2839 H264_QPEL_FUNCS(3, 2, ssse3); 2857 H264_QPEL_FUNCS(3, 2, ssse3);
2840 H264_QPEL_FUNCS(3, 3, ssse3); 2858 H264_QPEL_FUNCS(3, 3, ssse3);
2841 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nor nd;
2842 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nor nd;
2843 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
2844 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
2845 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
2846 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
2847 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; 2859 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2848 #if HAVE_YASM 2860 #if HAVE_YASM
2861 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_ nornd;
2862 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_ nornd;
2863 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
2864 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
2865 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
2866 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
2849 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; 2867 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
2850 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe 2868 if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Co nroe
2851 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; 2869 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
2852 #endif 2870 #endif
2853 } 2871 }
2854 #endif 2872 #endif
2855 2873
2856 if(mm_flags & FF_MM_3DNOW){ 2874 if(mm_flags & AV_CPU_FLAG_3DNOW){
2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; 2875 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
2858 c->vector_fmul = vector_fmul_3dnow; 2876 c->vector_fmul = vector_fmul_3dnow;
2859 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2877 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2860 c->float_to_int16 = float_to_int16_3dnow; 2878 c->float_to_int16 = float_to_int16_3dnow;
2861 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; 2879 c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
2862 } 2880 }
2863 } 2881 }
2864 if(mm_flags & FF_MM_3DNOWEXT){ 2882 if(mm_flags & AV_CPU_FLAG_3DNOWEXT){
2865 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; 2883 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
2866 c->vector_fmul_window = vector_fmul_window_3dnow2; 2884 c->vector_fmul_window = vector_fmul_window_3dnow2;
2867 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ 2885 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2868 c->float_to_int16_interleave = float_to_int16_interleave_3dn2; 2886 c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
2869 } 2887 }
2870 } 2888 }
2871 if(mm_flags & FF_MM_MMX2){ 2889 if(mm_flags & AV_CPU_FLAG_MMX2){
2872 #if HAVE_YASM 2890 #if HAVE_YASM
2873 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; 2891 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2;
2874 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mm x2; 2892 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mm x2;
2875 #endif 2893 #endif
2876 } 2894 }
2877 if(mm_flags & FF_MM_SSE){ 2895 if(mm_flags & AV_CPU_FLAG_SSE){
2878 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; 2896 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
2879 c->ac3_downmix = ac3_downmix_sse; 2897 c->ac3_downmix = ac3_downmix_sse;
2880 c->vector_fmul = vector_fmul_sse; 2898 c->vector_fmul = vector_fmul_sse;
2881 c->vector_fmul_reverse = vector_fmul_reverse_sse; 2899 c->vector_fmul_reverse = vector_fmul_reverse_sse;
2882 c->vector_fmul_add = vector_fmul_add_sse; 2900 c->vector_fmul_add = vector_fmul_add_sse;
2883 c->vector_fmul_window = vector_fmul_window_sse; 2901 c->vector_fmul_window = vector_fmul_window_sse;
2884 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; 2902 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
2885 c->vector_clipf = vector_clipf_sse; 2903 c->vector_clipf = vector_clipf_sse;
2886 c->float_to_int16 = float_to_int16_sse; 2904 c->float_to_int16 = float_to_int16_sse;
2887 c->float_to_int16_interleave = float_to_int16_interleave_sse; 2905 c->float_to_int16_interleave = float_to_int16_interleave_sse;
2888 #if HAVE_YASM 2906 #if HAVE_YASM
2889 c->scalarproduct_float = ff_scalarproduct_float_sse; 2907 c->scalarproduct_float = ff_scalarproduct_float_sse;
2890 #endif 2908 #endif
2891 } 2909 }
2892 if(mm_flags & FF_MM_3DNOW) 2910 if(mm_flags & AV_CPU_FLAG_3DNOW)
2893 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse 2911 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
2894 if(mm_flags & FF_MM_SSE2){ 2912 if(mm_flags & AV_CPU_FLAG_SSE2){
2895 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; 2913 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
2896 c->float_to_int16 = float_to_int16_sse2; 2914 c->float_to_int16 = float_to_int16_sse2;
2897 c->float_to_int16_interleave = float_to_int16_interleave_sse2; 2915 c->float_to_int16_interleave = float_to_int16_interleave_sse2;
2898 #if HAVE_YASM 2916 #if HAVE_YASM
2899 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; 2917 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
2900 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss e2; 2918 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss e2;
2901 #endif 2919 #endif
2902 } 2920 }
2903 if((mm_flags & FF_MM_SSSE3) && !(mm_flags & (FF_MM_SSE42|FF_MM_3DNOW)) & & HAVE_YASM) // cachesplit 2921 if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_ CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit
2904 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss se3; 2922 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss se3;
2905 } 2923 }
2906 2924
2907 if (CONFIG_ENCODERS) 2925 if (CONFIG_ENCODERS)
2908 dsputilenc_init_mmx(c, avctx); 2926 dsputilenc_init_mmx(c, avctx);
2909 2927
2910 #if 0 2928 #if 0
2911 // for speed testing 2929 // for speed testing
2912 get_pixels = just_return; 2930 get_pixels = just_return;
2913 put_pixels_clamped = just_return; 2931 put_pixels_clamped = just_return;
(...skipping 21 matching lines...) Expand all
2935 2953
2936 avg_no_rnd_pixels_tab[0] = just_return; 2954 avg_no_rnd_pixels_tab[0] = just_return;
2937 avg_no_rnd_pixels_tab[1] = just_return; 2955 avg_no_rnd_pixels_tab[1] = just_return;
2938 avg_no_rnd_pixels_tab[2] = just_return; 2956 avg_no_rnd_pixels_tab[2] = just_return;
2939 avg_no_rnd_pixels_tab[3] = just_return; 2957 avg_no_rnd_pixels_tab[3] = just_return;
2940 2958
2941 //av_fdct = just_return; 2959 //av_fdct = just_return;
2942 //ff_idct = just_return; 2960 //ff_idct = just_return;
2943 #endif 2961 #endif
2944 } 2962 }
2945
2946 #if CONFIG_H264DSP
2947 void ff_h264dsp_init_x86(H264DSPContext *c)
2948 {
2949 mm_flags = mm_support();
2950
2951 if (mm_flags & FF_MM_MMX) {
2952 c->h264_idct_dc_add=
2953 c->h264_idct_add= ff_h264_idct_add_mmx;
2954 c->h264_idct8_dc_add=
2955 c->h264_idct8_add= ff_h264_idct8_add_mmx;
2956
2957 c->h264_idct_add16 = ff_h264_idct_add16_mmx;
2958 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
2959 c->h264_idct_add8 = ff_h264_idct_add8_mmx;
2960 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
2961
2962 if (mm_flags & FF_MM_MMX2) {
2963 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
2964 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
2965 c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
2966 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
2967 c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
2968 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
2969
2970 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
2971 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
2972 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
2973 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
2974 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_ mmx2;
2975 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_ mmx2;
2976 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
2977
2978 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
2979 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
2980 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
2981 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
2982 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
2983 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
2984 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
2985 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
2986
2987 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
2988 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
2989 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
2990 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
2991 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
2992 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
2993 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
2994 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
2995 }
2996 if(mm_flags & FF_MM_SSE2){
2997 c->h264_idct8_add = ff_h264_idct8_add_sse2;
2998 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
2999 }
3000
3001 #if HAVE_YASM
3002 if (mm_flags & FF_MM_MMX2){
3003 #if ARCH_X86_32
3004 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxe xt;
3005 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxe xt;
3006 #endif
3007 if( mm_flags&FF_MM_SSE2 ){
3008 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2;
3009 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2;
3010 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110
3011 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
3012 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
3013 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_ sse2;
3014 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_ sse2;
3015 #endif
3016 #if CONFIG_GPL
3017 c->h264_idct_add16 = ff_h264_idct_add16_sse2;
3018 c->h264_idct_add8 = ff_h264_idct_add8_sse2;
3019 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
3020 #endif
3021 }
3022 if ( mm_flags&FF_MM_SSSE3 ){
3023 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
3024 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3;
3025 }
3026 }
3027 #endif
3028 }
3029 }
3030 #endif /* CONFIG_H264DSP */
OLDNEW
« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h ('k') | source/patched-ffmpeg-mt/libavcodec/x86/dsputilenc_mmx.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698