| OLD | NEW |
| 1 /* | 1 /* |
| 2 * MMX optimized DSP utils | 2 * MMX optimized DSP utils |
| 3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
| 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
| 5 * | 5 * |
| 6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
| 7 * | 7 * |
| 8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
| 9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
| 10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
| (...skipping 21 matching lines...) Expand all Loading... |
| 32 #include "vp6dsp_mmx.h" | 32 #include "vp6dsp_mmx.h" |
| 33 #include "vp6dsp_sse2.h" | 33 #include "vp6dsp_sse2.h" |
| 34 #include "idct_xvid.h" | 34 #include "idct_xvid.h" |
| 35 | 35 |
| 36 //#undef NDEBUG | 36 //#undef NDEBUG |
| 37 //#include <assert.h> | 37 //#include <assert.h> |
| 38 | 38 |
| 39 int mm_flags; /* multimedia extension flags */ | 39 int mm_flags; /* multimedia extension flags */ |
| 40 | 40 |
| 41 /* pixel operations */ | 41 /* pixel operations */ |
| 42 DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL; | 42 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; |
| 43 DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | 43 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; |
| 44 | 44 |
| 45 DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] = | 45 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = |
| 46 {0x8000000080000000ULL, 0x8000000080000000ULL}; | 46 {0x8000000080000000ULL, 0x8000000080000000ULL}; |
| 47 | 47 |
| 48 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; | 48 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; |
| 49 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; | 49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; |
| 50 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x000500
0500050005ULL}; | 50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050
00500050005ULL}; |
| 51 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x000800
0800080008ULL}; | 51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080
00800080008ULL}; |
| 52 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; | 52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; |
| 53 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x001000
1000100010ULL}; | 53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100
01000100010ULL}; |
| 54 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; | 54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; |
| 55 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C00
1C001C001CULL}; | 55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0
01C001C001CULL}; |
| 56 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x002000
2000200020ULL}; | 56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200
02000200020ULL}; |
| 57 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; | 57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; |
| 58 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x004000
4000400040ULL}; | 58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
04000400040ULL}; |
| 59 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; | 59 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; |
| 60 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL; | 60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; |
| 61 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; | 61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; |
| 62 | 62 |
| 63 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; | 63 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; |
| 64 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; | 64 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; |
| 65 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; | 65 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; |
| 66 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; | 66 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; |
| 67 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; | 67 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; |
| 68 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; | 68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; |
| 69 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; | 69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; |
| 70 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; | 70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; |
| 71 | 71 |
| 72 DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 }; | 72 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; |
| 73 DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 }; | 73 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; |
| 74 | 74 |
| 75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | 75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) |
| 76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | 76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) |
| 77 | 77 |
| 78 #define MOVQ_BFE(regd) \ | 78 #define MOVQ_BFE(regd) \ |
| 79 __asm__ volatile ( \ | 79 __asm__ volatile ( \ |
| 80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ | 80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ |
| 81 "paddb %%" #regd ", %%" #regd " \n\t" ::) | 81 "paddb %%" #regd ", %%" #regd " \n\t" ::) |
| 82 | 82 |
| 83 #ifndef PIC | 83 #ifndef PIC |
| (...skipping 1732 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1816 }while(--h);\ | 1816 }while(--h);\ |
| 1817 } | 1817 } |
| 1818 PREFETCH(prefetch_mmx2, prefetcht0) | 1818 PREFETCH(prefetch_mmx2, prefetcht0) |
| 1819 PREFETCH(prefetch_3dnow, prefetch) | 1819 PREFETCH(prefetch_3dnow, prefetch) |
| 1820 #undef PREFETCH | 1820 #undef PREFETCH |
| 1821 | 1821 |
| 1822 #include "h264dsp_mmx.c" | 1822 #include "h264dsp_mmx.c" |
| 1823 #include "rv40dsp_mmx.c" | 1823 #include "rv40dsp_mmx.c" |
| 1824 | 1824 |
| 1825 /* CAVS specific */ | 1825 /* CAVS specific */ |
| 1826 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx); | |
| 1827 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx); | |
| 1828 | |
| 1829 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1826 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1830 put_pixels8_mmx(dst, src, stride, 8); | 1827 put_pixels8_mmx(dst, src, stride, 8); |
| 1831 } | 1828 } |
| 1832 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1829 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1833 avg_pixels8_mmx(dst, src, stride, 8); | 1830 avg_pixels8_mmx(dst, src, stride, 8); |
| 1834 } | 1831 } |
| 1835 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1832 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1836 put_pixels16_mmx(dst, src, stride, 16); | 1833 put_pixels16_mmx(dst, src, stride, 16); |
| 1837 } | 1834 } |
| 1838 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1835 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
| 1839 avg_pixels16_mmx(dst, src, stride, 16); | 1836 avg_pixels16_mmx(dst, src, stride, 16); |
| 1840 } | 1837 } |
| 1841 | 1838 |
| 1842 /* VC1 specific */ | 1839 /* VC1 specific */ |
| 1843 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); | |
| 1844 | |
| 1845 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int
rnd) { | 1840 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int
rnd) { |
| 1846 put_pixels8_mmx(dst, src, stride, 8); | 1841 put_pixels8_mmx(dst, src, stride, 8); |
| 1847 } | 1842 } |
| 1848 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
t rnd) { | 1843 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
t rnd) { |
| 1849 avg_pixels8_mmx2(dst, src, stride, 8); | 1844 avg_pixels8_mmx2(dst, src, stride, 8); |
| 1850 } | 1845 } |
| 1851 | 1846 |
| 1852 /* external functions, from idct_mmx.c */ | |
| 1853 void ff_mmx_idct(DCTELEM *block); | |
| 1854 void ff_mmxext_idct(DCTELEM *block); | |
| 1855 | |
| 1856 /* XXX: those functions should be suppressed ASAP when all IDCTs are | 1847 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
| 1857 converted */ | 1848 converted */ |
| 1858 #if CONFIG_GPL | 1849 #if CONFIG_GPL |
| 1859 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block
) | 1850 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block
) |
| 1860 { | 1851 { |
| 1861 ff_mmx_idct (block); | 1852 ff_mmx_idct (block); |
| 1862 put_pixels_clamped_mmx(block, dest, line_size); | 1853 put_pixels_clamped_mmx(block, dest, line_size); |
| 1863 } | 1854 } |
| 1864 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block
) | 1855 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block
) |
| 1865 { | 1856 { |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2024 { | 2015 { |
| 2025 int (*matrix_cmp)[2] = (int(*)[2])matrix; | 2016 int (*matrix_cmp)[2] = (int(*)[2])matrix; |
| 2026 intptr_t i,j,k; | 2017 intptr_t i,j,k; |
| 2027 | 2018 |
| 2028 i = -len*sizeof(float); | 2019 i = -len*sizeof(float); |
| 2029 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_c
mp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^
matrix_cmp[2][1]))) { | 2020 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_c
mp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^
matrix_cmp[2][1]))) { |
| 2030 MIX5(IF0,IF1); | 2021 MIX5(IF0,IF1); |
| 2031 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] &&
matrix_cmp[3][0]==matrix_cmp[4][0]) { | 2022 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] &&
matrix_cmp[3][0]==matrix_cmp[4][0]) { |
| 2032 MIX5(IF1,IF0); | 2023 MIX5(IF1,IF0); |
| 2033 } else { | 2024 } else { |
| 2034 DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4]; | 2025 DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4]; |
| 2035 j = 2*in_ch*sizeof(float); | 2026 j = 2*in_ch*sizeof(float); |
| 2036 __asm__ volatile( | 2027 __asm__ volatile( |
| 2037 "1: \n" | 2028 "1: \n" |
| 2038 "sub $8, %0 \n" | 2029 "sub $8, %0 \n" |
| 2039 "movss (%2,%0), %%xmm6 \n" | 2030 "movss (%2,%0), %%xmm6 \n" |
| 2040 "movss 4(%2,%0), %%xmm7 \n" | 2031 "movss 4(%2,%0), %%xmm7 \n" |
| 2041 "shufps $0, %%xmm6, %%xmm6 \n" | 2032 "shufps $0, %%xmm6, %%xmm6 \n" |
| 2042 "shufps $0, %%xmm7, %%xmm7 \n" | 2033 "shufps $0, %%xmm7, %%xmm7 \n" |
| 2043 "movaps %%xmm6, (%1,%0,4) \n" | 2034 "movaps %%xmm6, (%1,%0,4) \n" |
| 2044 "movaps %%xmm7, 16(%1,%0,4) \n" | 2035 "movaps %%xmm7, 16(%1,%0,4) \n" |
| (...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2411 #elif !HAVE_YASM | 2402 #elif !HAVE_YASM |
| 2412 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis
c_sse(a,b,c,6) | 2403 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis
c_sse(a,b,c,6) |
| 2413 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) | 2404 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) |
| 2414 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) | 2405 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) |
| 2415 #endif | 2406 #endif |
| 2416 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | 2407 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse |
| 2417 | 2408 |
| 2418 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ | 2409 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ |
| 2419 /* gcc pessimizes register allocation if this is in the same function as float_t
o_int16_interleave_sse2*/\ | 2410 /* gcc pessimizes register allocation if this is in the same function as float_t
o_int16_interleave_sse2*/\ |
| 2420 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
float **src, long len, int channels){\ | 2411 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
float **src, long len, int channels){\ |
| 2421 DECLARE_ALIGNED_16(int16_t, tmp)[len];\ | 2412 DECLARE_ALIGNED(16, int16_t, tmp)[len];\ |
| 2422 int i,j,c;\ | 2413 int i,j,c;\ |
| 2423 for(c=0; c<channels; c++){\ | 2414 for(c=0; c<channels; c++){\ |
| 2424 float_to_int16_##cpu(tmp, src[c], len);\ | 2415 float_to_int16_##cpu(tmp, src[c], len);\ |
| 2425 for(i=0, j=c; i<len; i++, j+=channels)\ | 2416 for(i=0, j=c; i<len; i++, j+=channels)\ |
| 2426 dst[j] = tmp[i];\ | 2417 dst[j] = tmp[i];\ |
| 2427 }\ | 2418 }\ |
| 2428 }\ | 2419 }\ |
| 2429 \ | 2420 \ |
| 2430 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
g len, int channels){\ | 2421 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
g len, int channels){\ |
| 2431 if(channels==1)\ | 2422 if(channels==1)\ |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2497 "js 1b \n" | 2488 "js 1b \n" |
| 2498 ) | 2489 ) |
| 2499 | 2490 |
| 2500 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
len, int channels){ | 2491 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
len, int channels){ |
| 2501 if(channels==6) | 2492 if(channels==6) |
| 2502 ff_float_to_int16_interleave6_3dn2(dst, src, len); | 2493 ff_float_to_int16_interleave6_3dn2(dst, src, len); |
| 2503 else | 2494 else |
| 2504 float_to_int16_interleave_3dnow(dst, src, len, channels); | 2495 float_to_int16_interleave_3dnow(dst, src, len, channels); |
| 2505 } | 2496 } |
| 2506 | 2497 |
| 2507 | |
| 2508 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width); | |
| 2509 void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width); | |
| 2510 void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); | |
| 2511 void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
DWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); | |
| 2512 void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u
int8_t * * block, int b_w, int b_h, | |
| 2513 int src_x, int src_y, int src_stride, slice_b
uffer * sb, int add, uint8_t * dst8); | |
| 2514 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
nt8_t * * block, int b_w, int b_h, | |
| 2515 int src_x, int src_y, int src_stride, slice_bu
ffer * sb, int add, uint8_t * dst8); | |
| 2516 | |
| 2517 | |
| 2518 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); | 2498 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); |
| 2519 | 2499 |
| 2520 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | 2500 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
| 2521 { | 2501 { |
| 2522 mm_flags = mm_support(); | 2502 mm_flags = mm_support(); |
| 2523 | 2503 |
| 2524 if (avctx->dsp_mask) { | 2504 if (avctx->dsp_mask) { |
| 2525 if (avctx->dsp_mask & FF_MM_FORCE) | 2505 if (avctx->dsp_mask & FF_MM_FORCE) |
| 2526 mm_flags |= (avctx->dsp_mask & 0xffff); | 2506 mm_flags |= (avctx->dsp_mask & 0xffff); |
| 2527 else | 2507 else |
| (...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2838 } | 2818 } |
| 2839 | 2819 |
| 2840 | 2820 |
| 2841 #define H264_QPEL_FUNCS(x, y, CPU)\ | 2821 #define H264_QPEL_FUNCS(x, y, CPU)\ |
| 2842 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_#
#CPU;\ | 2822 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_#
#CPU;\ |
| 2843 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##
CPU;\ | 2823 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##
CPU;\ |
| 2844 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_#
#CPU;\ | 2824 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_#
#CPU;\ |
| 2845 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##
CPU; | 2825 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##
CPU; |
| 2846 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ | 2826 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ |
| 2847 // these functions are slower than mmx on AMD, but faster on Intel | 2827 // these functions are slower than mmx on AMD, but faster on Intel |
| 2848 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma | |
| 2849 c->put_pixels_tab[0][0] = put_pixels16_sse2; | 2828 c->put_pixels_tab[0][0] = put_pixels16_sse2; |
| 2850 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; | 2829 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; |
| 2851 */ | |
| 2852 H264_QPEL_FUNCS(0, 0, sse2); | 2830 H264_QPEL_FUNCS(0, 0, sse2); |
| 2853 } | 2831 } |
| 2854 if(mm_flags & FF_MM_SSE2){ | 2832 if(mm_flags & FF_MM_SSE2){ |
| 2855 c->h264_idct8_add = ff_h264_idct8_add_sse2; | 2833 c->h264_idct8_add = ff_h264_idct8_add_sse2; |
| 2856 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; | 2834 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; |
| 2857 | 2835 |
| 2858 H264_QPEL_FUNCS(0, 1, sse2); | 2836 H264_QPEL_FUNCS(0, 1, sse2); |
| 2859 H264_QPEL_FUNCS(0, 2, sse2); | 2837 H264_QPEL_FUNCS(0, 2, sse2); |
| 2860 H264_QPEL_FUNCS(0, 3, sse2); | 2838 H264_QPEL_FUNCS(0, 3, sse2); |
| 2861 H264_QPEL_FUNCS(1, 1, sse2); | 2839 H264_QPEL_FUNCS(1, 1, sse2); |
| (...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3022 | 3000 |
| 3023 avg_no_rnd_pixels_tab[0] = just_return; | 3001 avg_no_rnd_pixels_tab[0] = just_return; |
| 3024 avg_no_rnd_pixels_tab[1] = just_return; | 3002 avg_no_rnd_pixels_tab[1] = just_return; |
| 3025 avg_no_rnd_pixels_tab[2] = just_return; | 3003 avg_no_rnd_pixels_tab[2] = just_return; |
| 3026 avg_no_rnd_pixels_tab[3] = just_return; | 3004 avg_no_rnd_pixels_tab[3] = just_return; |
| 3027 | 3005 |
| 3028 //av_fdct = just_return; | 3006 //av_fdct = just_return; |
| 3029 //ff_idct = just_return; | 3007 //ff_idct = just_return; |
| 3030 #endif | 3008 #endif |
| 3031 } | 3009 } |
| OLD | NEW |