| OLD | NEW |
| 1 /* | 1 /* |
| 2 * MMX optimized DSP utils | 2 * MMX optimized DSP utils |
| 3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
| 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
| 5 * | 5 * |
| 6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
| 7 * | 7 * |
| 8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| 11 * version 2.1 of the License, or (at your option) any later version. | 11 * version 2.1 of the License, or (at your option) any later version. |
| 12 * | 12 * |
| 13 * FFmpeg is distributed in the hope that it will be useful, | 13 * FFmpeg is distributed in the hope that it will be useful, |
| 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 16 * Lesser General Public License for more details. | 16 * Lesser General Public License for more details. |
| 17 * | 17 * |
| 18 * You should have received a copy of the GNU Lesser General Public | 18 * You should have received a copy of the GNU Lesser General Public |
| 19 * License along with FFmpeg; if not, write to the Free Software | 19 * License along with FFmpeg; if not, write to the Free Software |
| 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 21 * | 21 * |
| 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | 22 * MMX optimization by Nick Kurshev <nickols_k@mail.ru> |
| 23 */ | 23 */ |
| 24 | 24 |
| 25 #include "libavutil/cpu.h" |
| 25 #include "libavutil/x86_cpu.h" | 26 #include "libavutil/x86_cpu.h" |
| 26 #include "libavcodec/dsputil.h" | 27 #include "libavcodec/dsputil.h" |
| 27 #include "libavcodec/h264dsp.h" | 28 #include "libavcodec/h264dsp.h" |
| 28 #include "libavcodec/mpegvideo.h" | 29 #include "libavcodec/mpegvideo.h" |
| 29 #include "libavcodec/simple_idct.h" | 30 #include "libavcodec/simple_idct.h" |
| 30 #include "dsputil_mmx.h" | 31 #include "dsputil_mmx.h" |
| 31 #include "vp3dsp_mmx.h" | |
| 32 #include "vp3dsp_sse2.h" | |
| 33 #include "vp6dsp_mmx.h" | |
| 34 #include "vp6dsp_sse2.h" | |
| 35 #include "idct_xvid.h" | 32 #include "idct_xvid.h" |
| 36 | 33 |
| 37 //#undef NDEBUG | 34 //#undef NDEBUG |
| 38 //#include <assert.h> | 35 //#include <assert.h> |
| 39 | 36 |
| 40 int mm_flags; /* multimedia extension flags */ | |
| 41 | |
| 42 /* pixel operations */ | 37 /* pixel operations */ |
| 43 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; | 38 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; |
| 44 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | 39 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; |
| 45 | 40 |
| 46 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = | 41 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = |
| 47 {0x8000000080000000ULL, 0x8000000080000000ULL}; | 42 {0x8000000080000000ULL, 0x8000000080000000ULL}; |
| 48 | 43 |
| 49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; | 44 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; |
| 50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x00040
00400040004ULL}; | 45 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_4 ) = {0x0004000400040004ULL, 0x00040
00400040004ULL}; |
| 51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050
00500050005ULL}; | 46 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050
00500050005ULL}; |
| 52 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080
00800080008ULL}; | 47 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080
00800080008ULL}; |
| 53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x00090
00900090009ULL}; | 48 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9 ) = {0x0009000900090009ULL, 0x00090
00900090009ULL}; |
| 54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; | 49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; |
| 55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100
01000100010ULL}; | 50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100
01000100010ULL}; |
| 56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x00120
01200120012ULL}; | 51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18 ) = {0x0012001200120012ULL, 0x00120
01200120012ULL}; |
| 57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; | 52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; |
| 58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B0
01B001B001BULL}; | 53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_27 ) = {0x001B001B001B001BULL, 0x001B0
01B001B001BULL}; |
| 59 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0
01C001C001CULL}; | 54 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0
01C001C001CULL}; |
| 60 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200
02000200020ULL}; | 55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200
02000200020ULL}; |
| 61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; | 56 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; |
| 62 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL; | 57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_53 ) = 0x0035003500350035ULL; |
| 63 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F0
03F003F003FULL}; | 58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_63 ) = {0x003F003F003F003FULL, 0x003F0
03F003F003FULL}; |
| 64 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
04000400040ULL}; | 59 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
04000400040ULL}; |
| 65 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; | 60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; |
| 66 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; | 61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; |
| 67 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; | 62 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; |
| 68 | 63 |
| 64 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0 ) = {0x0000000000000000ULL, 0x00000
00000000000ULL}; |
| 69 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x01010
10101010101ULL}; | 65 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_1 ) = {0x0101010101010101ULL, 0x01010
10101010101ULL}; |
| 70 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x03030
30303030303ULL}; | 66 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_3 ) = {0x0303030303030303ULL, 0x03030
30303030303ULL}; |
| 71 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x04040
40404040404ULL}; | 67 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_4 ) = {0x0404040404040404ULL, 0x04040
40404040404ULL}; |
| 72 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; | 68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; |
| 73 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; | 69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; |
| 74 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; | 70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; |
| 75 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x80808
08080808080ULL}; | 71 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80 ) = {0x8080808080808080ULL, 0x80808
08080808080ULL}; |
| 76 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; | 72 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; |
| 77 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; | 73 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_A1 ) = {0xA1A1A1A1A1A1A1A1ULL, 0xA1A1A
1A1A1A1A1A1ULL}; |
| 78 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F
8F8F8F8F8F8ULL}; | 74 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_F8 ) = {0xF8F8F8F8F8F8F8F8ULL, 0xF8F8F
8F8F8F8F8F8ULL}; |
| 79 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; | 75 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; |
| 80 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF
EFEFEFEFEFEULL}; | 76 DECLARE_ALIGNED(16, const xmm_reg, ff_pb_FE ) = {0xFEFEFEFEFEFEFEFEULL, 0xFEFEF
EFEFEFEFEFEULL}; |
| 81 | 77 |
| 82 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; | 78 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; |
| 83 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; | 79 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; |
| 84 | 80 |
| 85 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | 81 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) |
| 86 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | 82 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) |
| 87 | 83 |
| (...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 225 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx | 221 #define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx |
| 226 #define put_pixels16_3dnow put_pixels16_mmx | 222 #define put_pixels16_3dnow put_pixels16_mmx |
| 227 #define put_pixels8_3dnow put_pixels8_mmx | 223 #define put_pixels8_3dnow put_pixels8_mmx |
| 228 #define put_pixels4_3dnow put_pixels4_mmx | 224 #define put_pixels4_3dnow put_pixels4_mmx |
| 229 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx | 225 #define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx |
| 230 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx | 226 #define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx |
| 231 | 227 |
| 232 /***********************************/ | 228 /***********************************/ |
| 233 /* standard MMX */ | 229 /* standard MMX */ |
| 234 | 230 |
| 235 void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
) | 231 void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s
ize) |
| 236 { | 232 { |
| 237 const DCTELEM *p; | 233 const DCTELEM *p; |
| 238 uint8_t *pix; | 234 uint8_t *pix; |
| 239 | 235 |
| 240 /* read the pixels */ | 236 /* read the pixels */ |
| 241 p = block; | 237 p = block; |
| 242 pix = pixels; | 238 pix = pixels; |
| 243 /* unrolled loop */ | 239 /* unrolled loop */ |
| 244 __asm__ volatile( | 240 __asm__ volatile( |
| 245 "movq %3, %%mm0 \n\t" | 241 "movq %3, %%mm0 \n\t" |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 301 "packsswb 56+"#off"(%2), %%mm4 \n\t"\ | 297 "packsswb 56+"#off"(%2), %%mm4 \n\t"\ |
| 302 "paddb %%mm0, %%mm1 \n\t"\ | 298 "paddb %%mm0, %%mm1 \n\t"\ |
| 303 "paddb %%mm0, %%mm2 \n\t"\ | 299 "paddb %%mm0, %%mm2 \n\t"\ |
| 304 "paddb %%mm0, %%mm3 \n\t"\ | 300 "paddb %%mm0, %%mm3 \n\t"\ |
| 305 "paddb %%mm0, %%mm4 \n\t"\ | 301 "paddb %%mm0, %%mm4 \n\t"\ |
| 306 "movq %%mm1, (%0) \n\t"\ | 302 "movq %%mm1, (%0) \n\t"\ |
| 307 "movq %%mm2, (%0, %3) \n\t"\ | 303 "movq %%mm2, (%0, %3) \n\t"\ |
| 308 "movq %%mm3, (%0, %3, 2) \n\t"\ | 304 "movq %%mm3, (%0, %3, 2) \n\t"\ |
| 309 "movq %%mm4, (%0, %1) \n\t" | 305 "movq %%mm4, (%0, %1) \n\t" |
| 310 | 306 |
| 311 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int li
ne_size) | 307 void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int
line_size) |
| 312 { | 308 { |
| 313 x86_reg line_skip = line_size; | 309 x86_reg line_skip = line_size; |
| 314 x86_reg line_skip3; | 310 x86_reg line_skip3; |
| 315 | 311 |
| 316 __asm__ volatile ( | 312 __asm__ volatile ( |
| 317 "movq "MANGLE(ff_vector128)", %%mm0 \n\t" | 313 "movq "MANGLE(ff_vector128)", %%mm0 \n\t" |
| 318 "lea (%3, %3, 2), %1 \n\t" | 314 "lea (%3, %3, 2), %1 \n\t" |
| 319 put_signed_pixels_clamped_mmx_half(0) | 315 put_signed_pixels_clamped_mmx_half(0) |
| 320 "lea (%0, %3, 4), %0 \n\t" | 316 "lea (%0, %3, 4), %0 \n\t" |
| 321 put_signed_pixels_clamped_mmx_half(64) | 317 put_signed_pixels_clamped_mmx_half(64) |
| 322 :"+&r" (pixels), "=&r" (line_skip3) | 318 :"+&r" (pixels), "=&r" (line_skip3) |
| 323 :"r" (block), "r"(line_skip) | 319 :"r" (block), "r"(line_skip) |
| 324 :"memory"); | 320 :"memory"); |
| 325 } | 321 } |
| 326 | 322 |
| 327 void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
) | 323 void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_s
ize) |
| 328 { | 324 { |
| 329 const DCTELEM *p; | 325 const DCTELEM *p; |
| 330 uint8_t *pix; | 326 uint8_t *pix; |
| 331 int i; | 327 int i; |
| 332 | 328 |
| 333 /* read the pixels */ | 329 /* read the pixels */ |
| 334 p = block; | 330 p = block; |
| 335 pix = pixels; | 331 pix = pixels; |
| 336 MOVQ_ZERO(mm7); | 332 MOVQ_ZERO(mm7); |
| 337 i = 4; | 333 i = 4; |
| (...skipping 387 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 725 "movq %%mm6, %3 \n\t" | 721 "movq %%mm6, %3 \n\t" |
| 726 : "+m" (*(uint64_t*)(src - 2*stride)), | 722 : "+m" (*(uint64_t*)(src - 2*stride)), |
| 727 "+m" (*(uint64_t*)(src - 1*stride)), | 723 "+m" (*(uint64_t*)(src - 1*stride)), |
| 728 "+m" (*(uint64_t*)(src + 0*stride)), | 724 "+m" (*(uint64_t*)(src + 0*stride)), |
| 729 "+m" (*(uint64_t*)(src + 1*stride)) | 725 "+m" (*(uint64_t*)(src + 1*stride)) |
| 730 : "g" (2*strength), "m"(ff_pb_FC) | 726 : "g" (2*strength), "m"(ff_pb_FC) |
| 731 ); | 727 ); |
| 732 } | 728 } |
| 733 } | 729 } |
| 734 | 730 |
| 735 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int
src_stride){ | |
| 736 __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... | |
| 737 "movd %4, %%mm0 \n\t" | |
| 738 "movd %5, %%mm1 \n\t" | |
| 739 "movd %6, %%mm2 \n\t" | |
| 740 "movd %7, %%mm3 \n\t" | |
| 741 "punpcklbw %%mm1, %%mm0 \n\t" | |
| 742 "punpcklbw %%mm3, %%mm2 \n\t" | |
| 743 "movq %%mm0, %%mm1 \n\t" | |
| 744 "punpcklwd %%mm2, %%mm0 \n\t" | |
| 745 "punpckhwd %%mm2, %%mm1 \n\t" | |
| 746 "movd %%mm0, %0 \n\t" | |
| 747 "punpckhdq %%mm0, %%mm0 \n\t" | |
| 748 "movd %%mm0, %1 \n\t" | |
| 749 "movd %%mm1, %2 \n\t" | |
| 750 "punpckhdq %%mm1, %%mm1 \n\t" | |
| 751 "movd %%mm1, %3 \n\t" | |
| 752 | |
| 753 : "=m" (*(uint32_t*)(dst + 0*dst_stride)), | |
| 754 "=m" (*(uint32_t*)(dst + 1*dst_stride)), | |
| 755 "=m" (*(uint32_t*)(dst + 2*dst_stride)), | |
| 756 "=m" (*(uint32_t*)(dst + 3*dst_stride)) | |
| 757 : "m" (*(uint32_t*)(src + 0*src_stride)), | |
| 758 "m" (*(uint32_t*)(src + 1*src_stride)), | |
| 759 "m" (*(uint32_t*)(src + 2*src_stride)), | |
| 760 "m" (*(uint32_t*)(src + 3*src_stride)) | |
| 761 ); | |
| 762 } | |
| 763 | |
| 764 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ | 731 static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ |
| 765 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | 732 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
| 766 const int strength= ff_h263_loop_filter_strength[qscale]; | 733 const int strength= ff_h263_loop_filter_strength[qscale]; |
| 767 DECLARE_ALIGNED(8, uint64_t, temp)[4]; | 734 DECLARE_ALIGNED(8, uint64_t, temp)[4]; |
| 768 uint8_t *btemp= (uint8_t*)temp; | 735 uint8_t *btemp= (uint8_t*)temp; |
| 769 | 736 |
| 770 src -= 2; | 737 src -= 2; |
| 771 | 738 |
| 772 transpose4x4(btemp , src , 8, stride); | 739 transpose4x4(btemp , src , 8, stride); |
| 773 transpose4x4(btemp+4, src + 4*stride, 8, stride); | 740 transpose4x4(btemp+4, src + 4*stride, 8, stride); |
| (...skipping 1048 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1822 const uint8_t *p= mem;\ | 1789 const uint8_t *p= mem;\ |
| 1823 do{\ | 1790 do{\ |
| 1824 __asm__ volatile(#op" %0" :: "m"(*p));\ | 1791 __asm__ volatile(#op" %0" :: "m"(*p));\ |
| 1825 p+= stride;\ | 1792 p+= stride;\ |
| 1826 }while(--h);\ | 1793 }while(--h);\ |
| 1827 } | 1794 } |
| 1828 PREFETCH(prefetch_mmx2, prefetcht0) | 1795 PREFETCH(prefetch_mmx2, prefetcht0) |
| 1829 PREFETCH(prefetch_3dnow, prefetch) | 1796 PREFETCH(prefetch_3dnow, prefetch) |
| 1830 #undef PREFETCH | 1797 #undef PREFETCH |
| 1831 | 1798 |
| 1832 #include "h264dsp_mmx.c" | 1799 #include "h264_qpel_mmx.c" |
| 1833 #include "rv40dsp_mmx.c" | 1800 |
| 1801 void ff_put_h264_chroma_mc8_mmx_rnd (uint8_t *dst, uint8_t *src, |
| 1802 int stride, int h, int x, int y); |
| 1803 void ff_put_vc1_chroma_mc8_mmx_nornd (uint8_t *dst, uint8_t *src, |
| 1804 int stride, int h, int x, int y); |
| 1805 void ff_put_rv40_chroma_mc8_mmx (uint8_t *dst, uint8_t *src, |
| 1806 int stride, int h, int x, int y); |
| 1807 void ff_avg_h264_chroma_mc8_mmx2_rnd (uint8_t *dst, uint8_t *src, |
| 1808 int stride, int h, int x, int y); |
| 1809 void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src, |
| 1810 int stride, int h, int x, int y); |
| 1811 void ff_avg_rv40_chroma_mc8_mmx2 (uint8_t *dst, uint8_t *src, |
| 1812 int stride, int h, int x, int y); |
| 1813 void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src, |
| 1814 int stride, int h, int x, int y); |
| 1815 void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src, |
| 1816 int stride, int h, int x, int y); |
| 1817 void ff_avg_rv40_chroma_mc8_3dnow (uint8_t *dst, uint8_t *src, |
| 1818 int stride, int h, int x, int y); |
| 1819 |
| 1820 void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, |
| 1821 int stride, int h, int x, int y); |
| 1822 void ff_put_rv40_chroma_mc4_mmx (uint8_t *dst, uint8_t *src, |
| 1823 int stride, int h, int x, int y); |
| 1824 void ff_avg_h264_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, |
| 1825 int stride, int h, int x, int y); |
| 1826 void ff_avg_rv40_chroma_mc4_mmx2 (uint8_t *dst, uint8_t *src, |
| 1827 int stride, int h, int x, int y); |
| 1828 void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, |
| 1829 int stride, int h, int x, int y); |
| 1830 void ff_avg_rv40_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src, |
| 1831 int stride, int h, int x, int y); |
| 1832 |
| 1833 void ff_put_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src, |
| 1834 int stride, int h, int x, int y); |
| 1835 void ff_avg_h264_chroma_mc2_mmx2 (uint8_t *dst, uint8_t *src, |
| 1836 int stride, int h, int x, int y); |
| 1837 |
| 1838 void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src, |
| 1839 int stride, int h, int x, int y); |
| 1840 void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, |
| 1841 int stride, int h, int x, int y); |
| 1842 void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
| 1843 int stride, int h, int x, int y); |
| 1844 |
| 1845 void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src, |
| 1846 int stride, int h, int x, int y); |
| 1847 void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src, |
| 1848 int stride, int h, int x, int y); |
| 1849 void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src, |
| 1850 int stride, int h, int x, int y); |
| 1851 |
| 1834 | 1852 |
| 1835 /* CAVS specific */ | 1853 /* CAVS specific */ |
| 1836 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1854 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1837 put_pixels8_mmx(dst, src, stride, 8); | 1855 put_pixels8_mmx(dst, src, stride, 8); |
| 1838 } | 1856 } |
| 1839 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1857 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1840 avg_pixels8_mmx(dst, src, stride, 8); | 1858 avg_pixels8_mmx(dst, src, stride, 8); |
| 1841 } | 1859 } |
| 1842 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1860 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1843 put_pixels16_mmx(dst, src, stride, 16); | 1861 put_pixels16_mmx(dst, src, stride, 16); |
| 1844 } | 1862 } |
| 1845 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1863 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1846 avg_pixels16_mmx(dst, src, stride, 16); | 1864 avg_pixels16_mmx(dst, src, stride, 16); |
| 1847 } | 1865 } |
| 1848 | 1866 |
| 1849 /* VC1 specific */ | 1867 /* VC1 specific */ |
| 1850 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int
rnd) { | 1868 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int
rnd) { |
| 1851 put_pixels8_mmx(dst, src, stride, 8); | 1869 put_pixels8_mmx(dst, src, stride, 8); |
| 1852 } | 1870 } |
| 1853 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
t rnd) { | 1871 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
t rnd) { |
| 1854 avg_pixels8_mmx2(dst, src, stride, 8); | 1872 avg_pixels8_mmx2(dst, src, stride, 8); |
| 1855 } | 1873 } |
| 1856 | 1874 |
| 1857 /* XXX: those functions should be suppressed ASAP when all IDCTs are | 1875 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
| 1858 converted */ | 1876 converted */ |
| 1859 #if CONFIG_GPL | 1877 #if CONFIG_GPL |
| 1860 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block
) | 1878 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block
) |
| 1861 { | 1879 { |
| 1862 ff_mmx_idct (block); | 1880 ff_mmx_idct (block); |
| 1863 put_pixels_clamped_mmx(block, dest, line_size); | 1881 ff_put_pixels_clamped_mmx(block, dest, line_size); |
| 1864 } | 1882 } |
| 1865 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block
) | 1883 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block
) |
| 1866 { | 1884 { |
| 1867 ff_mmx_idct (block); | 1885 ff_mmx_idct (block); |
| 1868 add_pixels_clamped_mmx(block, dest, line_size); | 1886 ff_add_pixels_clamped_mmx(block, dest, line_size); |
| 1869 } | 1887 } |
| 1870 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *bloc
k) | 1888 static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *bloc
k) |
| 1871 { | 1889 { |
| 1872 ff_mmxext_idct (block); | 1890 ff_mmxext_idct (block); |
| 1873 put_pixels_clamped_mmx(block, dest, line_size); | 1891 ff_put_pixels_clamped_mmx(block, dest, line_size); |
| 1874 } | 1892 } |
| 1875 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *bloc
k) | 1893 static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *bloc
k) |
| 1876 { | 1894 { |
| 1877 ff_mmxext_idct (block); | 1895 ff_mmxext_idct (block); |
| 1878 add_pixels_clamped_mmx(block, dest, line_size); | 1896 ff_add_pixels_clamped_mmx(block, dest, line_size); |
| 1879 } | 1897 } |
| 1880 #endif | 1898 #endif |
| 1881 static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block) | 1899 static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block) |
| 1882 { | 1900 { |
| 1883 ff_idct_xvid_mmx (block); | 1901 ff_idct_xvid_mmx (block); |
| 1884 put_pixels_clamped_mmx(block, dest, line_size); | 1902 ff_put_pixels_clamped_mmx(block, dest, line_size); |
| 1885 } | 1903 } |
| 1886 static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) | 1904 static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block) |
| 1887 { | 1905 { |
| 1888 ff_idct_xvid_mmx (block); | 1906 ff_idct_xvid_mmx (block); |
| 1889 add_pixels_clamped_mmx(block, dest, line_size); | 1907 ff_add_pixels_clamped_mmx(block, dest, line_size); |
| 1890 } | 1908 } |
| 1891 static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) | 1909 static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block) |
| 1892 { | 1910 { |
| 1893 ff_idct_xvid_mmx2 (block); | 1911 ff_idct_xvid_mmx2 (block); |
| 1894 put_pixels_clamped_mmx(block, dest, line_size); | 1912 ff_put_pixels_clamped_mmx(block, dest, line_size); |
| 1895 } | 1913 } |
| 1896 static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) | 1914 static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) |
| 1897 { | 1915 { |
| 1898 ff_idct_xvid_mmx2 (block); | 1916 ff_idct_xvid_mmx2 (block); |
| 1899 add_pixels_clamped_mmx(block, dest, line_size); | 1917 ff_add_pixels_clamped_mmx(block, dest, line_size); |
| 1900 } | 1918 } |
| 1901 | 1919 |
| 1902 static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) | 1920 static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) |
| 1903 { | 1921 { |
| 1904 int i; | 1922 int i; |
| 1905 __asm__ volatile("pxor %%mm7, %%mm7":); | 1923 __asm__ volatile("pxor %%mm7, %%mm7":); |
| 1906 for(i=0; i<blocksize; i+=2) { | 1924 for(i=0; i<blocksize; i+=2) { |
| 1907 __asm__ volatile( | 1925 __asm__ volatile( |
| 1908 "movq %0, %%mm0 \n\t" | 1926 "movq %0, %%mm0 \n\t" |
| 1909 "movq %1, %%mm1 \n\t" | 1927 "movq %1, %%mm1 \n\t" |
| (...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2378 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t" | 2396 "cvtps2dq (%2,%0,2) , %%xmm0 \n\t" |
| 2379 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t" | 2397 "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t" |
| 2380 "packssdw %%xmm1 , %%xmm0 \n\t" | 2398 "packssdw %%xmm1 , %%xmm0 \n\t" |
| 2381 "movdqa %%xmm0 , (%1,%0) \n\t" | 2399 "movdqa %%xmm0 , (%1,%0) \n\t" |
| 2382 "add $16 , %0 \n\t" | 2400 "add $16 , %0 \n\t" |
| 2383 " js 1b \n\t" | 2401 " js 1b \n\t" |
| 2384 :"+r"(reglen), "+r"(dst), "+r"(src) | 2402 :"+r"(reglen), "+r"(dst), "+r"(src) |
| 2385 ); | 2403 ); |
| 2386 } | 2404 } |
| 2387 | 2405 |
| 2406 void ff_vp3_idct_mmx(int16_t *input_data); |
| 2407 void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block); |
| 2408 void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block); |
| 2409 |
| 2410 void ff_vp3_idct_dc_add_mmx2(uint8_t *dest, int line_size, const DCTELEM *block)
; |
| 2411 |
| 2412 void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); |
| 2413 void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values); |
| 2414 |
| 2415 void ff_vp3_idct_sse2(int16_t *input_data); |
| 2416 void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block); |
| 2417 void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block); |
| 2418 |
| 2388 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
; | 2419 void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
; |
| 2389 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int le
n); | 2420 void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int le
n); |
| 2390 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len
); | 2421 void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len
); |
| 2391 int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int or
der, int shift); | 2422 int32_t ff_scalarproduct_int16_mmx2(const int16_t *v1, const int16_t *v2, int or
der, int shift); |
| 2392 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or
der, int shift); | 2423 int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or
der, int shift); |
| 2393 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, con
st int16_t *v3, int order, int mul); | 2424 int32_t ff_scalarproduct_and_madd_int16_mmx2(int16_t *v1, const int16_t *v2, con
st int16_t *v3, int order, int mul); |
| 2394 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, con
st int16_t *v3, int order, int mul); | 2425 int32_t ff_scalarproduct_and_madd_int16_sse2(int16_t *v1, const int16_t *v2, con
st int16_t *v3, int order, int mul); |
| 2395 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, co
nst int16_t *v3, int order, int mul); | 2426 int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, co
nst int16_t *v3, int order, int mul); |
| 2396 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const
uint8_t *diff, int w, int *left, int *left_top); | 2427 void ff_add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const
uint8_t *diff, int w, int *left, int *left_top); |
| 2397 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w,
int left); | 2428 int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, int w,
int left); |
| 2398 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
nt left); | 2429 int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, int w, i
nt left); |
| 2399 void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0); | |
| 2400 void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta,
int8_t *tc0); | |
| 2401 void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, in
t beta); | |
| 2402 void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int
beta); | |
| 2403 void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int
beta); | |
| 2404 | 2430 |
| 2405 #if HAVE_YASM && ARCH_X86_32 | 2431 #if !HAVE_YASM |
| 2406 void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, i
nt beta); | |
| 2407 static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int al
pha, int beta) | |
| 2408 { | |
| 2409 ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta); | |
| 2410 ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta); | |
| 2411 } | |
| 2412 #elif !HAVE_YASM | |
| 2413 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis
c_sse(a,b,c,6) | 2432 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis
c_sse(a,b,c,6) |
| 2414 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) | 2433 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) |
| 2415 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) | 2434 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) |
| 2416 #endif | 2435 #endif |
| 2417 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | 2436 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse |
| 2418 | 2437 |
| 2419 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ | 2438 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ |
| 2420 /* gcc pessimizes register allocation if this is in the same function as float_t
o_int16_interleave_sse2*/\ | 2439 /* gcc pessimizes register allocation if this is in the same function as float_t
o_int16_interleave_sse2*/\ |
| 2421 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
float **src, long len, int channels){\ | 2440 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
float **src, long len, int channels){\ |
| 2422 DECLARE_ALIGNED(16, int16_t, tmp)[len];\ | 2441 DECLARE_ALIGNED(16, int16_t, tmp)[len];\ |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2502 if(channels==6) | 2521 if(channels==6) |
| 2503 ff_float_to_int16_interleave6_3dn2(dst, src, len); | 2522 ff_float_to_int16_interleave6_3dn2(dst, src, len); |
| 2504 else | 2523 else |
| 2505 float_to_int16_interleave_3dnow(dst, src, len, channels); | 2524 float_to_int16_interleave_3dnow(dst, src, len, channels); |
| 2506 } | 2525 } |
| 2507 | 2526 |
| 2508 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); | 2527 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); |
| 2509 | 2528 |
| 2510 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | 2529 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
| 2511 { | 2530 { |
| 2512 mm_flags = mm_support(); | 2531 int mm_flags = av_get_cpu_flags(); |
| 2513 | 2532 |
| 2514 if (avctx->dsp_mask) { | 2533 if (avctx->dsp_mask) { |
| 2515 if (avctx->dsp_mask & FF_MM_FORCE) | 2534 if (avctx->dsp_mask & AV_CPU_FLAG_FORCE) |
| 2516 mm_flags |= (avctx->dsp_mask & 0xffff); | 2535 mm_flags |= (avctx->dsp_mask & 0xffff); |
| 2517 else | 2536 else |
| 2518 mm_flags &= ~(avctx->dsp_mask & 0xffff); | 2537 mm_flags &= ~(avctx->dsp_mask & 0xffff); |
| 2519 } | 2538 } |
| 2520 | 2539 |
| 2521 #if 0 | 2540 #if 0 |
| 2522 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); | 2541 av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:"); |
| 2523 if (mm_flags & FF_MM_MMX) | 2542 if (mm_flags & AV_CPU_FLAG_MMX) |
| 2524 av_log(avctx, AV_LOG_INFO, " mmx"); | 2543 av_log(avctx, AV_LOG_INFO, " mmx"); |
| 2525 if (mm_flags & FF_MM_MMX2) | 2544 if (mm_flags & AV_CPU_FLAG_MMX2) |
| 2526 av_log(avctx, AV_LOG_INFO, " mmx2"); | 2545 av_log(avctx, AV_LOG_INFO, " mmx2"); |
| 2527 if (mm_flags & FF_MM_3DNOW) | 2546 if (mm_flags & AV_CPU_FLAG_3DNOW) |
| 2528 av_log(avctx, AV_LOG_INFO, " 3dnow"); | 2547 av_log(avctx, AV_LOG_INFO, " 3dnow"); |
| 2529 if (mm_flags & FF_MM_SSE) | 2548 if (mm_flags & AV_CPU_FLAG_SSE) |
| 2530 av_log(avctx, AV_LOG_INFO, " sse"); | 2549 av_log(avctx, AV_LOG_INFO, " sse"); |
| 2531 if (mm_flags & FF_MM_SSE2) | 2550 if (mm_flags & AV_CPU_FLAG_SSE2) |
| 2532 av_log(avctx, AV_LOG_INFO, " sse2"); | 2551 av_log(avctx, AV_LOG_INFO, " sse2"); |
| 2533 av_log(avctx, AV_LOG_INFO, "\n"); | 2552 av_log(avctx, AV_LOG_INFO, "\n"); |
| 2534 #endif | 2553 #endif |
| 2535 | 2554 |
| 2536 if (mm_flags & FF_MM_MMX) { | 2555 if (mm_flags & AV_CPU_FLAG_MMX) { |
| 2537 const int idct_algo= avctx->idct_algo; | 2556 const int idct_algo= avctx->idct_algo; |
| 2538 | 2557 |
| 2539 if(avctx->lowres==0){ | 2558 if(avctx->lowres==0){ |
| 2540 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ | 2559 if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ |
| 2541 c->idct_put= ff_simple_idct_put_mmx; | 2560 c->idct_put= ff_simple_idct_put_mmx; |
| 2542 c->idct_add= ff_simple_idct_add_mmx; | 2561 c->idct_add= ff_simple_idct_add_mmx; |
| 2543 c->idct = ff_simple_idct_mmx; | 2562 c->idct = ff_simple_idct_mmx; |
| 2544 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; | 2563 c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; |
| 2545 #if CONFIG_GPL | 2564 #if CONFIG_GPL |
| 2546 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ | 2565 }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ |
| 2547 if(mm_flags & FF_MM_MMX2){ | 2566 if(mm_flags & AV_CPU_FLAG_MMX2){ |
| 2548 c->idct_put= ff_libmpeg2mmx2_idct_put; | 2567 c->idct_put= ff_libmpeg2mmx2_idct_put; |
| 2549 c->idct_add= ff_libmpeg2mmx2_idct_add; | 2568 c->idct_add= ff_libmpeg2mmx2_idct_add; |
| 2550 c->idct = ff_mmxext_idct; | 2569 c->idct = ff_mmxext_idct; |
| 2551 }else{ | 2570 }else{ |
| 2552 c->idct_put= ff_libmpeg2mmx_idct_put; | 2571 c->idct_put= ff_libmpeg2mmx_idct_put; |
| 2553 c->idct_add= ff_libmpeg2mmx_idct_add; | 2572 c->idct_add= ff_libmpeg2mmx_idct_add; |
| 2554 c->idct = ff_mmx_idct; | 2573 c->idct = ff_mmx_idct; |
| 2555 } | 2574 } |
| 2556 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; | 2575 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
| 2557 #endif | 2576 #endif |
| 2558 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DEC
ODER) && | 2577 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DEC
ODER) && |
| 2559 idct_algo==FF_IDCT_VP3){ | 2578 idct_algo==FF_IDCT_VP3 && HAVE_YASM){ |
| 2560 if(mm_flags & FF_MM_SSE2){ | 2579 if(mm_flags & AV_CPU_FLAG_SSE2){ |
| 2561 c->idct_put= ff_vp3_idct_put_sse2; | 2580 c->idct_put= ff_vp3_idct_put_sse2; |
| 2562 c->idct_add= ff_vp3_idct_add_sse2; | 2581 c->idct_add= ff_vp3_idct_add_sse2; |
| 2563 c->idct = ff_vp3_idct_sse2; | 2582 c->idct = ff_vp3_idct_sse2; |
| 2564 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; | 2583 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; |
| 2565 }else{ | 2584 }else{ |
| 2566 c->idct_put= ff_vp3_idct_put_mmx; | 2585 c->idct_put= ff_vp3_idct_put_mmx; |
| 2567 c->idct_add= ff_vp3_idct_add_mmx; | 2586 c->idct_add= ff_vp3_idct_add_mmx; |
| 2568 c->idct = ff_vp3_idct_mmx; | 2587 c->idct = ff_vp3_idct_mmx; |
| 2569 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; | 2588 c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM; |
| 2570 } | 2589 } |
| 2571 }else if(idct_algo==FF_IDCT_CAVS){ | 2590 }else if(idct_algo==FF_IDCT_CAVS){ |
| 2572 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; | 2591 c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM; |
| 2573 }else if(idct_algo==FF_IDCT_XVIDMMX){ | 2592 }else if(idct_algo==FF_IDCT_XVIDMMX){ |
| 2574 if(mm_flags & FF_MM_SSE2){ | 2593 if(mm_flags & AV_CPU_FLAG_SSE2){ |
| 2575 c->idct_put= ff_idct_xvid_sse2_put; | 2594 c->idct_put= ff_idct_xvid_sse2_put; |
| 2576 c->idct_add= ff_idct_xvid_sse2_add; | 2595 c->idct_add= ff_idct_xvid_sse2_add; |
| 2577 c->idct = ff_idct_xvid_sse2; | 2596 c->idct = ff_idct_xvid_sse2; |
| 2578 c->idct_permutation_type= FF_SSE2_IDCT_PERM; | 2597 c->idct_permutation_type= FF_SSE2_IDCT_PERM; |
| 2579 }else if(mm_flags & FF_MM_MMX2){ | 2598 }else if(mm_flags & AV_CPU_FLAG_MMX2){ |
| 2580 c->idct_put= ff_idct_xvid_mmx2_put; | 2599 c->idct_put= ff_idct_xvid_mmx2_put; |
| 2581 c->idct_add= ff_idct_xvid_mmx2_add; | 2600 c->idct_add= ff_idct_xvid_mmx2_add; |
| 2582 c->idct = ff_idct_xvid_mmx2; | 2601 c->idct = ff_idct_xvid_mmx2; |
| 2583 }else{ | 2602 }else{ |
| 2584 c->idct_put= ff_idct_xvid_mmx_put; | 2603 c->idct_put= ff_idct_xvid_mmx_put; |
| 2585 c->idct_add= ff_idct_xvid_mmx_add; | 2604 c->idct_add= ff_idct_xvid_mmx_add; |
| 2586 c->idct = ff_idct_xvid_mmx; | 2605 c->idct = ff_idct_xvid_mmx; |
| 2587 } | 2606 } |
| 2588 } | 2607 } |
| 2589 } | 2608 } |
| 2590 | 2609 |
| 2591 c->put_pixels_clamped = put_pixels_clamped_mmx; | 2610 c->put_pixels_clamped = ff_put_pixels_clamped_mmx; |
| 2592 c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx; | 2611 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; |
| 2593 c->add_pixels_clamped = add_pixels_clamped_mmx; | 2612 c->add_pixels_clamped = ff_add_pixels_clamped_mmx; |
| 2594 c->clear_block = clear_block_mmx; | 2613 c->clear_block = clear_block_mmx; |
| 2595 c->clear_blocks = clear_blocks_mmx; | 2614 c->clear_blocks = clear_blocks_mmx; |
| 2596 if ((mm_flags & FF_MM_SSE) && | 2615 if ((mm_flags & AV_CPU_FLAG_SSE) && |
| 2597 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){ | 2616 !(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)){ |
| 2598 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ | 2617 /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */ |
| 2599 c->clear_block = clear_block_sse; | 2618 c->clear_block = clear_block_sse; |
| 2600 c->clear_blocks = clear_blocks_sse; | 2619 c->clear_blocks = clear_blocks_sse; |
| 2601 } | 2620 } |
| 2602 | 2621 |
| 2603 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | 2622 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \ |
| 2604 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ | 2623 c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \ |
| 2605 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ | 2624 c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \ |
| 2606 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ | 2625 c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \ |
| (...skipping 12 matching lines...) Expand all Loading... |
| 2619 | 2638 |
| 2620 c->add_bytes= add_bytes_mmx; | 2639 c->add_bytes= add_bytes_mmx; |
| 2621 c->add_bytes_l2= add_bytes_l2_mmx; | 2640 c->add_bytes_l2= add_bytes_l2_mmx; |
| 2622 | 2641 |
| 2623 c->draw_edges = draw_edges_mmx; | 2642 c->draw_edges = draw_edges_mmx; |
| 2624 | 2643 |
| 2625 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { | 2644 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) { |
| 2626 c->h263_v_loop_filter= h263_v_loop_filter_mmx; | 2645 c->h263_v_loop_filter= h263_v_loop_filter_mmx; |
| 2627 c->h263_h_loop_filter= h263_h_loop_filter_mmx; | 2646 c->h263_h_loop_filter= h263_h_loop_filter_mmx; |
| 2628 } | 2647 } |
| 2629 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd; | |
| 2630 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx; | |
| 2631 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_mmx_nornd; | |
| 2632 | 2648 |
| 2633 c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx; | 2649 #if HAVE_YASM |
| 2634 c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx; | 2650 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd; |
| 2651 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx; |
| 2652 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd; |
| 2635 | 2653 |
| 2636 if (CONFIG_VP6_DECODER) { | 2654 c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx; |
| 2637 c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx; | 2655 c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx; |
| 2638 } | 2656 #endif |
| 2639 | 2657 |
| 2640 if (mm_flags & FF_MM_MMX2) { | 2658 if (mm_flags & AV_CPU_FLAG_MMX2) { |
| 2641 c->prefetch = prefetch_mmx2; | 2659 c->prefetch = prefetch_mmx2; |
| 2642 | 2660 |
| 2643 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | 2661 c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; |
| 2644 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | 2662 c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; |
| 2645 | 2663 |
| 2646 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; | 2664 c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; |
| 2647 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; | 2665 c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; |
| 2648 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; | 2666 c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; |
| 2649 | 2667 |
| 2650 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; | 2668 c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; |
| 2651 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; | 2669 c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; |
| 2652 | 2670 |
| 2653 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; | 2671 c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; |
| 2654 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; | 2672 c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; |
| 2655 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; | 2673 c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; |
| 2656 | 2674 |
| 2657 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2675 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 2658 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; | 2676 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; |
| 2659 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; | 2677 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; |
| 2660 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; | 2678 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; |
| 2661 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; | 2679 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; |
| 2662 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; | 2680 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; |
| 2663 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; | 2681 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; |
| 2664 | 2682 |
| 2665 if (CONFIG_VP3_DECODER) { | 2683 if (CONFIG_VP3_DECODER && HAVE_YASM) { |
| 2666 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; | 2684 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2; |
| 2667 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2; | 2685 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2; |
| 2668 } | 2686 } |
| 2669 } | 2687 } |
| 2670 if (CONFIG_VP3_DECODER) { | 2688 if (CONFIG_VP3_DECODER && HAVE_YASM) { |
| 2671 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; | 2689 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; |
| 2672 } | 2690 } |
| 2673 | 2691 |
| 2674 if (CONFIG_VP3_DECODER | 2692 if (CONFIG_VP3_DECODER |
| 2675 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_
ID_THEORA)) { | 2693 && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_
ID_THEORA)) { |
| 2676 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx
2; | 2694 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx
2; |
| 2677 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx
2; | 2695 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx
2; |
| 2678 } | 2696 } |
| 2679 | 2697 |
| 2680 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \ | 2698 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \ |
| (...skipping 26 matching lines...) Expand all Loading... |
| 2707 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); | 2725 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2); |
| 2708 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); | 2726 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2); |
| 2709 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); | 2727 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2); |
| 2710 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); | 2728 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2); |
| 2711 | 2729 |
| 2712 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); | 2730 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2); |
| 2713 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); | 2731 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2); |
| 2714 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); | 2732 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2); |
| 2715 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); | 2733 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2); |
| 2716 | 2734 |
| 2717 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2; | 2735 #if HAVE_YASM |
| 2718 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2; | 2736 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2; |
| 2737 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2; |
| 2719 | 2738 |
| 2720 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_mmx2_norn
d; | 2739 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_n
ornd; |
| 2721 | 2740 |
| 2722 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd; | 2741 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd; |
| 2723 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2; | 2742 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2; |
| 2724 c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2; | 2743 c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2; |
| 2725 c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2; | 2744 c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2; |
| 2726 | 2745 |
| 2727 #if HAVE_YASM | |
| 2728 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; | 2746 c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2; |
| 2729 #endif | 2747 #endif |
| 2730 #if HAVE_7REGS && HAVE_TEN_OPERANDS | 2748 #if HAVE_7REGS && HAVE_TEN_OPERANDS |
| 2731 if( mm_flags&FF_MM_3DNOW ) | 2749 if( mm_flags&AV_CPU_FLAG_3DNOW ) |
| 2732 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; | 2750 c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov; |
| 2733 #endif | 2751 #endif |
| 2734 | 2752 |
| 2735 if (CONFIG_VC1_DECODER) | 2753 if (CONFIG_VC1_DECODER) |
| 2736 ff_vc1dsp_init_mmx(c, avctx); | 2754 ff_vc1dsp_init_mmx(c, avctx); |
| 2737 | 2755 |
| 2738 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; | 2756 c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2; |
| 2739 } else if (mm_flags & FF_MM_3DNOW) { | 2757 } else if (mm_flags & AV_CPU_FLAG_3DNOW) { |
| 2740 c->prefetch = prefetch_3dnow; | 2758 c->prefetch = prefetch_3dnow; |
| 2741 | 2759 |
| 2742 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | 2760 c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; |
| 2743 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | 2761 c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; |
| 2744 | 2762 |
| 2745 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; | 2763 c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; |
| 2746 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; | 2764 c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; |
| 2747 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; | 2765 c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; |
| 2748 | 2766 |
| 2749 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; | 2767 c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; |
| (...skipping 30 matching lines...) Expand all Loading... |
| 2780 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); | 2798 SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow); |
| 2781 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); | 2799 SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow); |
| 2782 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); | 2800 SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow); |
| 2783 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); | 2801 SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow); |
| 2784 | 2802 |
| 2785 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); | 2803 SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow); |
| 2786 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); | 2804 SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow); |
| 2787 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); | 2805 SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow); |
| 2788 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); | 2806 SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow); |
| 2789 | 2807 |
| 2790 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd; | 2808 #if HAVE_YASM |
| 2791 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; | 2809 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd; |
| 2810 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow; |
| 2792 | 2811 |
| 2793 c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow; | 2812 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_
nornd; |
| 2794 c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow; | 2813 |
| 2814 c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow; |
| 2815 c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow; |
| 2816 #endif |
| 2795 } | 2817 } |
| 2796 | 2818 |
| 2797 | 2819 |
| 2798 #define H264_QPEL_FUNCS(x, y, CPU)\ | 2820 #define H264_QPEL_FUNCS(x, y, CPU)\ |
| 2799 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_#
#CPU;\ | 2821 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_#
#CPU;\ |
| 2800 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##
CPU;\ | 2822 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##
CPU;\ |
| 2801 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_#
#CPU;\ | 2823 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_#
#CPU;\ |
| 2802 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##
CPU; | 2824 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##
CPU; |
| 2803 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ | 2825 if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){ |
| 2804 // these functions are slower than mmx on AMD, but faster on Intel | 2826 // these functions are slower than mmx on AMD, but faster on Intel |
| 2805 c->put_pixels_tab[0][0] = put_pixels16_sse2; | 2827 c->put_pixels_tab[0][0] = put_pixels16_sse2; |
| 2806 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; | 2828 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; |
| 2807 H264_QPEL_FUNCS(0, 0, sse2); | 2829 H264_QPEL_FUNCS(0, 0, sse2); |
| 2808 } | 2830 } |
| 2809 if(mm_flags & FF_MM_SSE2){ | 2831 if(mm_flags & AV_CPU_FLAG_SSE2){ |
| 2810 H264_QPEL_FUNCS(0, 1, sse2); | 2832 H264_QPEL_FUNCS(0, 1, sse2); |
| 2811 H264_QPEL_FUNCS(0, 2, sse2); | 2833 H264_QPEL_FUNCS(0, 2, sse2); |
| 2812 H264_QPEL_FUNCS(0, 3, sse2); | 2834 H264_QPEL_FUNCS(0, 3, sse2); |
| 2813 H264_QPEL_FUNCS(1, 1, sse2); | 2835 H264_QPEL_FUNCS(1, 1, sse2); |
| 2814 H264_QPEL_FUNCS(1, 2, sse2); | 2836 H264_QPEL_FUNCS(1, 2, sse2); |
| 2815 H264_QPEL_FUNCS(1, 3, sse2); | 2837 H264_QPEL_FUNCS(1, 3, sse2); |
| 2816 H264_QPEL_FUNCS(2, 1, sse2); | 2838 H264_QPEL_FUNCS(2, 1, sse2); |
| 2817 H264_QPEL_FUNCS(2, 2, sse2); | 2839 H264_QPEL_FUNCS(2, 2, sse2); |
| 2818 H264_QPEL_FUNCS(2, 3, sse2); | 2840 H264_QPEL_FUNCS(2, 3, sse2); |
| 2819 H264_QPEL_FUNCS(3, 1, sse2); | 2841 H264_QPEL_FUNCS(3, 1, sse2); |
| 2820 H264_QPEL_FUNCS(3, 2, sse2); | 2842 H264_QPEL_FUNCS(3, 2, sse2); |
| 2821 H264_QPEL_FUNCS(3, 3, sse2); | 2843 H264_QPEL_FUNCS(3, 3, sse2); |
| 2822 | |
| 2823 if (CONFIG_VP6_DECODER) { | |
| 2824 c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2; | |
| 2825 } | |
| 2826 } | 2844 } |
| 2827 #if HAVE_SSSE3 | 2845 #if HAVE_SSSE3 |
| 2828 if(mm_flags & FF_MM_SSSE3){ | 2846 if(mm_flags & AV_CPU_FLAG_SSSE3){ |
| 2829 H264_QPEL_FUNCS(1, 0, ssse3); | 2847 H264_QPEL_FUNCS(1, 0, ssse3); |
| 2830 H264_QPEL_FUNCS(1, 1, ssse3); | 2848 H264_QPEL_FUNCS(1, 1, ssse3); |
| 2831 H264_QPEL_FUNCS(1, 2, ssse3); | 2849 H264_QPEL_FUNCS(1, 2, ssse3); |
| 2832 H264_QPEL_FUNCS(1, 3, ssse3); | 2850 H264_QPEL_FUNCS(1, 3, ssse3); |
| 2833 H264_QPEL_FUNCS(2, 0, ssse3); | 2851 H264_QPEL_FUNCS(2, 0, ssse3); |
| 2834 H264_QPEL_FUNCS(2, 1, ssse3); | 2852 H264_QPEL_FUNCS(2, 1, ssse3); |
| 2835 H264_QPEL_FUNCS(2, 2, ssse3); | 2853 H264_QPEL_FUNCS(2, 2, ssse3); |
| 2836 H264_QPEL_FUNCS(2, 3, ssse3); | 2854 H264_QPEL_FUNCS(2, 3, ssse3); |
| 2837 H264_QPEL_FUNCS(3, 0, ssse3); | 2855 H264_QPEL_FUNCS(3, 0, ssse3); |
| 2838 H264_QPEL_FUNCS(3, 1, ssse3); | 2856 H264_QPEL_FUNCS(3, 1, ssse3); |
| 2839 H264_QPEL_FUNCS(3, 2, ssse3); | 2857 H264_QPEL_FUNCS(3, 2, ssse3); |
| 2840 H264_QPEL_FUNCS(3, 3, ssse3); | 2858 H264_QPEL_FUNCS(3, 3, ssse3); |
| 2841 c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_vc1_chroma_mc8_ssse3_nor
nd; | |
| 2842 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_vc1_chroma_mc8_ssse3_nor
nd; | |
| 2843 c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd; | |
| 2844 c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd; | |
| 2845 c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3; | |
| 2846 c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3; | |
| 2847 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; | 2859 c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3; |
| 2848 #if HAVE_YASM | 2860 #if HAVE_YASM |
| 2861 c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_
nornd; |
| 2862 c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_
nornd; |
| 2863 c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd; |
| 2864 c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd; |
| 2865 c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3; |
| 2866 c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3; |
| 2849 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; | 2867 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; |
| 2850 if (mm_flags & FF_MM_SSE4) // not really sse4, just slow on Conroe | 2868 if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Co
nroe |
| 2851 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | 2869 c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; |
| 2852 #endif | 2870 #endif |
| 2853 } | 2871 } |
| 2854 #endif | 2872 #endif |
| 2855 | 2873 |
| 2856 if(mm_flags & FF_MM_3DNOW){ | 2874 if(mm_flags & AV_CPU_FLAG_3DNOW){ |
| 2857 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; | 2875 c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; |
| 2858 c->vector_fmul = vector_fmul_3dnow; | 2876 c->vector_fmul = vector_fmul_3dnow; |
| 2859 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2877 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 2860 c->float_to_int16 = float_to_int16_3dnow; | 2878 c->float_to_int16 = float_to_int16_3dnow; |
| 2861 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; | 2879 c->float_to_int16_interleave = float_to_int16_interleave_3dnow; |
| 2862 } | 2880 } |
| 2863 } | 2881 } |
| 2864 if(mm_flags & FF_MM_3DNOWEXT){ | 2882 if(mm_flags & AV_CPU_FLAG_3DNOWEXT){ |
| 2865 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; | 2883 c->vector_fmul_reverse = vector_fmul_reverse_3dnow2; |
| 2866 c->vector_fmul_window = vector_fmul_window_3dnow2; | 2884 c->vector_fmul_window = vector_fmul_window_3dnow2; |
| 2867 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 2885 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |
| 2868 c->float_to_int16_interleave = float_to_int16_interleave_3dn2; | 2886 c->float_to_int16_interleave = float_to_int16_interleave_3dn2; |
| 2869 } | 2887 } |
| 2870 } | 2888 } |
| 2871 if(mm_flags & FF_MM_MMX2){ | 2889 if(mm_flags & AV_CPU_FLAG_MMX2){ |
| 2872 #if HAVE_YASM | 2890 #if HAVE_YASM |
| 2873 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; | 2891 c->scalarproduct_int16 = ff_scalarproduct_int16_mmx2; |
| 2874 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mm
x2; | 2892 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mm
x2; |
| 2875 #endif | 2893 #endif |
| 2876 } | 2894 } |
| 2877 if(mm_flags & FF_MM_SSE){ | 2895 if(mm_flags & AV_CPU_FLAG_SSE){ |
| 2878 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; | 2896 c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; |
| 2879 c->ac3_downmix = ac3_downmix_sse; | 2897 c->ac3_downmix = ac3_downmix_sse; |
| 2880 c->vector_fmul = vector_fmul_sse; | 2898 c->vector_fmul = vector_fmul_sse; |
| 2881 c->vector_fmul_reverse = vector_fmul_reverse_sse; | 2899 c->vector_fmul_reverse = vector_fmul_reverse_sse; |
| 2882 c->vector_fmul_add = vector_fmul_add_sse; | 2900 c->vector_fmul_add = vector_fmul_add_sse; |
| 2883 c->vector_fmul_window = vector_fmul_window_sse; | 2901 c->vector_fmul_window = vector_fmul_window_sse; |
| 2884 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; | 2902 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; |
| 2885 c->vector_clipf = vector_clipf_sse; | 2903 c->vector_clipf = vector_clipf_sse; |
| 2886 c->float_to_int16 = float_to_int16_sse; | 2904 c->float_to_int16 = float_to_int16_sse; |
| 2887 c->float_to_int16_interleave = float_to_int16_interleave_sse; | 2905 c->float_to_int16_interleave = float_to_int16_interleave_sse; |
| 2888 #if HAVE_YASM | 2906 #if HAVE_YASM |
| 2889 c->scalarproduct_float = ff_scalarproduct_float_sse; | 2907 c->scalarproduct_float = ff_scalarproduct_float_sse; |
| 2890 #endif | 2908 #endif |
| 2891 } | 2909 } |
| 2892 if(mm_flags & FF_MM_3DNOW) | 2910 if(mm_flags & AV_CPU_FLAG_3DNOW) |
| 2893 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse | 2911 c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse |
| 2894 if(mm_flags & FF_MM_SSE2){ | 2912 if(mm_flags & AV_CPU_FLAG_SSE2){ |
| 2895 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; | 2913 c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; |
| 2896 c->float_to_int16 = float_to_int16_sse2; | 2914 c->float_to_int16 = float_to_int16_sse2; |
| 2897 c->float_to_int16_interleave = float_to_int16_interleave_sse2; | 2915 c->float_to_int16_interleave = float_to_int16_interleave_sse2; |
| 2898 #if HAVE_YASM | 2916 #if HAVE_YASM |
| 2899 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; | 2917 c->scalarproduct_int16 = ff_scalarproduct_int16_sse2; |
| 2900 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss
e2; | 2918 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss
e2; |
| 2901 #endif | 2919 #endif |
| 2902 } | 2920 } |
| 2903 if((mm_flags & FF_MM_SSSE3) && !(mm_flags & (FF_MM_SSE42|FF_MM_3DNOW)) &
& HAVE_YASM) // cachesplit | 2921 if((mm_flags & AV_CPU_FLAG_SSSE3) && !(mm_flags & (AV_CPU_FLAG_SSE42|AV_
CPU_FLAG_3DNOW)) && HAVE_YASM) // cachesplit |
| 2904 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss
se3; | 2922 c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ss
se3; |
| 2905 } | 2923 } |
| 2906 | 2924 |
| 2907 if (CONFIG_ENCODERS) | 2925 if (CONFIG_ENCODERS) |
| 2908 dsputilenc_init_mmx(c, avctx); | 2926 dsputilenc_init_mmx(c, avctx); |
| 2909 | 2927 |
| 2910 #if 0 | 2928 #if 0 |
| 2911 // for speed testing | 2929 // for speed testing |
| 2912 get_pixels = just_return; | 2930 get_pixels = just_return; |
| 2913 put_pixels_clamped = just_return; | 2931 put_pixels_clamped = just_return; |
| (...skipping 21 matching lines...) Expand all Loading... |
| 2935 | 2953 |
| 2936 avg_no_rnd_pixels_tab[0] = just_return; | 2954 avg_no_rnd_pixels_tab[0] = just_return; |
| 2937 avg_no_rnd_pixels_tab[1] = just_return; | 2955 avg_no_rnd_pixels_tab[1] = just_return; |
| 2938 avg_no_rnd_pixels_tab[2] = just_return; | 2956 avg_no_rnd_pixels_tab[2] = just_return; |
| 2939 avg_no_rnd_pixels_tab[3] = just_return; | 2957 avg_no_rnd_pixels_tab[3] = just_return; |
| 2940 | 2958 |
| 2941 //av_fdct = just_return; | 2959 //av_fdct = just_return; |
| 2942 //ff_idct = just_return; | 2960 //ff_idct = just_return; |
| 2943 #endif | 2961 #endif |
| 2944 } | 2962 } |
| 2945 | |
| 2946 #if CONFIG_H264DSP | |
| 2947 void ff_h264dsp_init_x86(H264DSPContext *c) | |
| 2948 { | |
| 2949 mm_flags = mm_support(); | |
| 2950 | |
| 2951 if (mm_flags & FF_MM_MMX) { | |
| 2952 c->h264_idct_dc_add= | |
| 2953 c->h264_idct_add= ff_h264_idct_add_mmx; | |
| 2954 c->h264_idct8_dc_add= | |
| 2955 c->h264_idct8_add= ff_h264_idct8_add_mmx; | |
| 2956 | |
| 2957 c->h264_idct_add16 = ff_h264_idct_add16_mmx; | |
| 2958 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; | |
| 2959 c->h264_idct_add8 = ff_h264_idct_add8_mmx; | |
| 2960 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; | |
| 2961 | |
| 2962 if (mm_flags & FF_MM_MMX2) { | |
| 2963 c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; | |
| 2964 c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; | |
| 2965 c->h264_idct_add16 = ff_h264_idct_add16_mmx2; | |
| 2966 c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; | |
| 2967 c->h264_idct_add8 = ff_h264_idct_add8_mmx2; | |
| 2968 c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; | |
| 2969 | |
| 2970 c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2; | |
| 2971 c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2; | |
| 2972 c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2; | |
| 2973 c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2; | |
| 2974 c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_
mmx2; | |
| 2975 c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_
mmx2; | |
| 2976 c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2; | |
| 2977 | |
| 2978 c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; | |
| 2979 c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; | |
| 2980 c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; | |
| 2981 c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; | |
| 2982 c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; | |
| 2983 c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; | |
| 2984 c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; | |
| 2985 c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; | |
| 2986 | |
| 2987 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; | |
| 2988 c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; | |
| 2989 c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; | |
| 2990 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; | |
| 2991 c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; | |
| 2992 c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; | |
| 2993 c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; | |
| 2994 c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; | |
| 2995 } | |
| 2996 if(mm_flags & FF_MM_SSE2){ | |
| 2997 c->h264_idct8_add = ff_h264_idct8_add_sse2; | |
| 2998 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; | |
| 2999 } | |
| 3000 | |
| 3001 #if HAVE_YASM | |
| 3002 if (mm_flags & FF_MM_MMX2){ | |
| 3003 #if ARCH_X86_32 | |
| 3004 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxe
xt; | |
| 3005 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxe
xt; | |
| 3006 #endif | |
| 3007 if( mm_flags&FF_MM_SSE2 ){ | |
| 3008 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_sse2; | |
| 3009 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_sse2; | |
| 3010 #if ARCH_X86_64 || !defined(__ICC) || __ICC > 1110 | |
| 3011 c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2; | |
| 3012 c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2; | |
| 3013 c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_
sse2; | |
| 3014 c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_
sse2; | |
| 3015 #endif | |
| 3016 #if CONFIG_GPL | |
| 3017 c->h264_idct_add16 = ff_h264_idct_add16_sse2; | |
| 3018 c->h264_idct_add8 = ff_h264_idct_add8_sse2; | |
| 3019 c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2; | |
| 3020 #endif | |
| 3021 } | |
| 3022 if ( mm_flags&FF_MM_SSSE3 ){ | |
| 3023 c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3; | |
| 3024 c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_ssse3; | |
| 3025 } | |
| 3026 } | |
| 3027 #endif | |
| 3028 } | |
| 3029 } | |
| 3030 #endif /* CONFIG_H264DSP */ | |
| OLD | NEW |