Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(933)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * MMX optimized DSP utils 2 * MMX optimized DSP utils
3 * Copyright (c) 2000, 2001 Fabrice Bellard 3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5 * 5 *
6 * This file is part of FFmpeg. 6 * This file is part of FFmpeg.
7 * 7 *
8 * FFmpeg is free software; you can redistribute it and/or 8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public 9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either 10 * License as published by the Free Software Foundation; either
(...skipping 21 matching lines...) Expand all
32 #include "vp6dsp_mmx.h" 32 #include "vp6dsp_mmx.h"
33 #include "vp6dsp_sse2.h" 33 #include "vp6dsp_sse2.h"
34 #include "idct_xvid.h" 34 #include "idct_xvid.h"
35 35
36 //#undef NDEBUG 36 //#undef NDEBUG
37 //#include <assert.h> 37 //#include <assert.h>
38 38
39 int mm_flags; /* multimedia extension flags */ 39 int mm_flags; /* multimedia extension flags */
40 40
41 /* pixel operations */ 41 /* pixel operations */
42 DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL; 42 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;
43 DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL; 43 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
44 44
45 DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] = 45 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =
46 {0x8000000080000000ULL, 0x8000000080000000ULL}; 46 {0x8000000080000000ULL, 0x8000000080000000ULL};
47 47
48 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; 48 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
49 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; 49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
50 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x000500 0500050005ULL}; 50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050 00500050005ULL};
51 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x000800 0800080008ULL}; 51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080 00800080008ULL};
52 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; 52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
53 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x001000 1000100010ULL}; 53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100 01000100010ULL};
54 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; 54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
55 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C00 1C001C001CULL}; 55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0 01C001C001CULL};
56 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x002000 2000200020ULL}; 56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200 02000200020ULL};
57 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; 57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
58 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x004000 4000400040ULL}; 58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400 04000400040ULL};
59 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; 59 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
60 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL; 60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
61 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; 61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
62 62
63 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; 63 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
64 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; 64 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
65 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; 65 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
66 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; 66 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
67 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; 67 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
68 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; 68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
69 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; 69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
70 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; 70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
71 71
72 DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 }; 72 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
73 DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 }; 73 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
74 74
75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) 75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) 76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
77 77
78 #define MOVQ_BFE(regd) \ 78 #define MOVQ_BFE(regd) \
79 __asm__ volatile ( \ 79 __asm__ volatile ( \
80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ 80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
81 "paddb %%" #regd ", %%" #regd " \n\t" ::) 81 "paddb %%" #regd ", %%" #regd " \n\t" ::)
82 82
83 #ifndef PIC 83 #ifndef PIC
(...skipping 1732 matching lines...) Expand 10 before | Expand all | Expand 10 after
1816 }while(--h);\ 1816 }while(--h);\
1817 } 1817 }
1818 PREFETCH(prefetch_mmx2, prefetcht0) 1818 PREFETCH(prefetch_mmx2, prefetcht0)
1819 PREFETCH(prefetch_3dnow, prefetch) 1819 PREFETCH(prefetch_3dnow, prefetch)
1820 #undef PREFETCH 1820 #undef PREFETCH
1821 1821
1822 #include "h264dsp_mmx.c" 1822 #include "h264dsp_mmx.c"
1823 #include "rv40dsp_mmx.c" 1823 #include "rv40dsp_mmx.c"
1824 1824
1825 /* CAVS specific */ 1825 /* CAVS specific */
1826 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
1827 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
1828
1829 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1826 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1830 put_pixels8_mmx(dst, src, stride, 8); 1827 put_pixels8_mmx(dst, src, stride, 8);
1831 } 1828 }
1832 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1829 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1833 avg_pixels8_mmx(dst, src, stride, 8); 1830 avg_pixels8_mmx(dst, src, stride, 8);
1834 } 1831 }
1835 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1832 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1836 put_pixels16_mmx(dst, src, stride, 16); 1833 put_pixels16_mmx(dst, src, stride, 16);
1837 } 1834 }
1838 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { 1835 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
1839 avg_pixels16_mmx(dst, src, stride, 16); 1836 avg_pixels16_mmx(dst, src, stride, 16);
1840 } 1837 }
1841 1838
1842 /* VC1 specific */ 1839 /* VC1 specific */
1843 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
1844
1845 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { 1840 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
1846 put_pixels8_mmx(dst, src, stride, 8); 1841 put_pixels8_mmx(dst, src, stride, 8);
1847 } 1842 }
1848 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in t rnd) { 1843 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in t rnd) {
1849 avg_pixels8_mmx2(dst, src, stride, 8); 1844 avg_pixels8_mmx2(dst, src, stride, 8);
1850 } 1845 }
1851 1846
1852 /* external functions, from idct_mmx.c */
1853 void ff_mmx_idct(DCTELEM *block);
1854 void ff_mmxext_idct(DCTELEM *block);
1855
1856 /* XXX: those functions should be suppressed ASAP when all IDCTs are 1847 /* XXX: those functions should be suppressed ASAP when all IDCTs are
1857 converted */ 1848 converted */
1858 #if CONFIG_GPL 1849 #if CONFIG_GPL
1859 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block ) 1850 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block )
1860 { 1851 {
1861 ff_mmx_idct (block); 1852 ff_mmx_idct (block);
1862 put_pixels_clamped_mmx(block, dest, line_size); 1853 put_pixels_clamped_mmx(block, dest, line_size);
1863 } 1854 }
1864 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block ) 1855 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block )
1865 { 1856 {
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after
2024 { 2015 {
2025 int (*matrix_cmp)[2] = (int(*)[2])matrix; 2016 int (*matrix_cmp)[2] = (int(*)[2])matrix;
2026 intptr_t i,j,k; 2017 intptr_t i,j,k;
2027 2018
2028 i = -len*sizeof(float); 2019 i = -len*sizeof(float);
2029 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_c mp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^ matrix_cmp[2][1]))) { 2020 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_c mp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^ matrix_cmp[2][1]))) {
2030 MIX5(IF0,IF1); 2021 MIX5(IF0,IF1);
2031 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) { 2022 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
2032 MIX5(IF1,IF0); 2023 MIX5(IF1,IF0);
2033 } else { 2024 } else {
2034 DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4]; 2025 DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];
2035 j = 2*in_ch*sizeof(float); 2026 j = 2*in_ch*sizeof(float);
2036 __asm__ volatile( 2027 __asm__ volatile(
2037 "1: \n" 2028 "1: \n"
2038 "sub $8, %0 \n" 2029 "sub $8, %0 \n"
2039 "movss (%2,%0), %%xmm6 \n" 2030 "movss (%2,%0), %%xmm6 \n"
2040 "movss 4(%2,%0), %%xmm7 \n" 2031 "movss 4(%2,%0), %%xmm7 \n"
2041 "shufps $0, %%xmm6, %%xmm6 \n" 2032 "shufps $0, %%xmm6, %%xmm6 \n"
2042 "shufps $0, %%xmm7, %%xmm7 \n" 2033 "shufps $0, %%xmm7, %%xmm7 \n"
2043 "movaps %%xmm6, (%1,%0,4) \n" 2034 "movaps %%xmm6, (%1,%0,4) \n"
2044 "movaps %%xmm7, 16(%1,%0,4) \n" 2035 "movaps %%xmm7, 16(%1,%0,4) \n"
(...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after
2411 #elif !HAVE_YASM 2402 #elif !HAVE_YASM
2412 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6) 2403 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis c_sse(a,b,c,6)
2413 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6) 2404 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)
2414 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6) 2405 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis c_3dnow(a,b,c,6)
2415 #endif 2406 #endif
2416 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse 2407 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
2417 2408
2418 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ 2409 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
2419 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\ 2410 /* gcc pessimizes register allocation if this is in the same function as float_t o_int16_interleave_sse2*/\
2420 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ 2411 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
2421 DECLARE_ALIGNED_16(int16_t, tmp)[len];\ 2412 DECLARE_ALIGNED(16, int16_t, tmp)[len];\
2422 int i,j,c;\ 2413 int i,j,c;\
2423 for(c=0; c<channels; c++){\ 2414 for(c=0; c<channels; c++){\
2424 float_to_int16_##cpu(tmp, src[c], len);\ 2415 float_to_int16_##cpu(tmp, src[c], len);\
2425 for(i=0, j=c; i<len; i++, j+=channels)\ 2416 for(i=0, j=c; i<len; i++, j+=channels)\
2426 dst[j] = tmp[i];\ 2417 dst[j] = tmp[i];\
2427 }\ 2418 }\
2428 }\ 2419 }\
2429 \ 2420 \
2430 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon g len, int channels){\ 2421 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon g len, int channels){\
2431 if(channels==1)\ 2422 if(channels==1)\
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
2497 "js 1b \n" 2488 "js 1b \n"
2498 ) 2489 )
2499 2490
2500 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ 2491 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
2501 if(channels==6) 2492 if(channels==6)
2502 ff_float_to_int16_interleave6_3dn2(dst, src, len); 2493 ff_float_to_int16_interleave6_3dn2(dst, src, len);
2503 else 2494 else
2504 float_to_int16_interleave_3dnow(dst, src, len, channels); 2495 float_to_int16_interleave_3dnow(dst, src, len, channels);
2505 } 2496 }
2506 2497
2507
2508 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
2509 void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
2510 void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
2511 void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I DWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
2512 void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u int8_t * * block, int b_w, int b_h,
2513 int src_x, int src_y, int src_stride, slice_b uffer * sb, int add, uint8_t * dst8);
2514 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui nt8_t * * block, int b_w, int b_h,
2515 int src_x, int src_y, int src_stride, slice_bu ffer * sb, int add, uint8_t * dst8);
2516
2517
2518 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); 2498 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
2519 2499
2520 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) 2500 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2521 { 2501 {
2522 mm_flags = mm_support(); 2502 mm_flags = mm_support();
2523 2503
2524 if (avctx->dsp_mask) { 2504 if (avctx->dsp_mask) {
2525 if (avctx->dsp_mask & FF_MM_FORCE) 2505 if (avctx->dsp_mask & FF_MM_FORCE)
2526 mm_flags |= (avctx->dsp_mask & 0xffff); 2506 mm_flags |= (avctx->dsp_mask & 0xffff);
2527 else 2507 else
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after
2838 } 2818 }
2839 2819
2840 2820
2841 #define H264_QPEL_FUNCS(x, y, CPU)\ 2821 #define H264_QPEL_FUNCS(x, y, CPU)\
2842 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\ 2822 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_# #CPU;\
2843 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\ 2823 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_## CPU;\
2844 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\ 2824 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_# #CPU;\
2845 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU; 2825 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_## CPU;
2846 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ 2826 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
2847 // these functions are slower than mmx on AMD, but faster on Intel 2827 // these functions are slower than mmx on AMD, but faster on Intel
2848 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
2849 c->put_pixels_tab[0][0] = put_pixels16_sse2; 2828 c->put_pixels_tab[0][0] = put_pixels16_sse2;
2850 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; 2829 c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2851 */
2852 H264_QPEL_FUNCS(0, 0, sse2); 2830 H264_QPEL_FUNCS(0, 0, sse2);
2853 } 2831 }
2854 if(mm_flags & FF_MM_SSE2){ 2832 if(mm_flags & FF_MM_SSE2){
2855 c->h264_idct8_add = ff_h264_idct8_add_sse2; 2833 c->h264_idct8_add = ff_h264_idct8_add_sse2;
2856 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; 2834 c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
2857 2835
2858 H264_QPEL_FUNCS(0, 1, sse2); 2836 H264_QPEL_FUNCS(0, 1, sse2);
2859 H264_QPEL_FUNCS(0, 2, sse2); 2837 H264_QPEL_FUNCS(0, 2, sse2);
2860 H264_QPEL_FUNCS(0, 3, sse2); 2838 H264_QPEL_FUNCS(0, 3, sse2);
2861 H264_QPEL_FUNCS(1, 1, sse2); 2839 H264_QPEL_FUNCS(1, 1, sse2);
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
3022 3000
3023 avg_no_rnd_pixels_tab[0] = just_return; 3001 avg_no_rnd_pixels_tab[0] = just_return;
3024 avg_no_rnd_pixels_tab[1] = just_return; 3002 avg_no_rnd_pixels_tab[1] = just_return;
3025 avg_no_rnd_pixels_tab[2] = just_return; 3003 avg_no_rnd_pixels_tab[2] = just_return;
3026 avg_no_rnd_pixels_tab[3] = just_return; 3004 avg_no_rnd_pixels_tab[3] = just_return;
3027 3005
3028 //av_fdct = just_return; 3006 //av_fdct = just_return;
3029 //ff_idct = just_return; 3007 //ff_idct = just_return;
3030 #endif 3008 #endif
3031 } 3009 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698