OLD | NEW |
1 /* | 1 /* |
2 * MMX optimized DSP utils | 2 * MMX optimized DSP utils |
3 * Copyright (c) 2000, 2001 Fabrice Bellard | 3 * Copyright (c) 2000, 2001 Fabrice Bellard |
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | 4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
5 * | 5 * |
6 * This file is part of FFmpeg. | 6 * This file is part of FFmpeg. |
7 * | 7 * |
8 * FFmpeg is free software; you can redistribute it and/or | 8 * FFmpeg is free software; you can redistribute it and/or |
9 * modify it under the terms of the GNU Lesser General Public | 9 * modify it under the terms of the GNU Lesser General Public |
10 * License as published by the Free Software Foundation; either | 10 * License as published by the Free Software Foundation; either |
(...skipping 21 matching lines...) Expand all Loading... |
32 #include "vp6dsp_mmx.h" | 32 #include "vp6dsp_mmx.h" |
33 #include "vp6dsp_sse2.h" | 33 #include "vp6dsp_sse2.h" |
34 #include "idct_xvid.h" | 34 #include "idct_xvid.h" |
35 | 35 |
36 //#undef NDEBUG | 36 //#undef NDEBUG |
37 //#include <assert.h> | 37 //#include <assert.h> |
38 | 38 |
39 int mm_flags; /* multimedia extension flags */ | 39 int mm_flags; /* multimedia extension flags */ |
40 | 40 |
41 /* pixel operations */ | 41 /* pixel operations */ |
42 DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL; | 42 DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; |
43 DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL; | 43 DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; |
44 | 44 |
45 DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] = | 45 DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = |
46 {0x8000000080000000ULL, 0x8000000080000000ULL}; | 46 {0x8000000080000000ULL, 0x8000000080000000ULL}; |
47 | 47 |
48 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; | 48 DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; |
49 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; | 49 DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; |
50 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x000500
0500050005ULL}; | 50 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x00050
00500050005ULL}; |
51 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x000800
0800080008ULL}; | 51 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x00080
00800080008ULL}; |
52 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; | 52 DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; |
53 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x001000
1000100010ULL}; | 53 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x00100
01000100010ULL}; |
54 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; | 54 DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; |
55 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C00
1C001C001CULL}; | 55 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C0
01C001C001CULL}; |
56 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x002000
2000200020ULL}; | 56 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x00200
02000200020ULL}; |
57 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; | 57 DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; |
58 DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x004000
4000400040ULL}; | 58 DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x00400
04000400040ULL}; |
59 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; | 59 DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; |
60 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL; | 60 DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; |
61 DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; | 61 DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; |
62 | 62 |
63 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; | 63 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; |
64 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; | 64 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; |
65 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; | 65 DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; |
66 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; | 66 DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; |
67 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; | 67 DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; |
68 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; | 68 DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; |
69 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; | 69 DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; |
70 DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; | 70 DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; |
71 | 71 |
72 DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 }; | 72 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; |
73 DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 }; | 73 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; |
74 | 74 |
75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) | 75 #define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) |
76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) | 76 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) |
77 | 77 |
78 #define MOVQ_BFE(regd) \ | 78 #define MOVQ_BFE(regd) \ |
79 __asm__ volatile ( \ | 79 __asm__ volatile ( \ |
80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ | 80 "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ |
81 "paddb %%" #regd ", %%" #regd " \n\t" ::) | 81 "paddb %%" #regd ", %%" #regd " \n\t" ::) |
82 | 82 |
83 #ifndef PIC | 83 #ifndef PIC |
(...skipping 1732 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1816 }while(--h);\ | 1816 }while(--h);\ |
1817 } | 1817 } |
1818 PREFETCH(prefetch_mmx2, prefetcht0) | 1818 PREFETCH(prefetch_mmx2, prefetcht0) |
1819 PREFETCH(prefetch_3dnow, prefetch) | 1819 PREFETCH(prefetch_3dnow, prefetch) |
1820 #undef PREFETCH | 1820 #undef PREFETCH |
1821 | 1821 |
1822 #include "h264dsp_mmx.c" | 1822 #include "h264dsp_mmx.c" |
1823 #include "rv40dsp_mmx.c" | 1823 #include "rv40dsp_mmx.c" |
1824 | 1824 |
1825 /* CAVS specific */ | 1825 /* CAVS specific */ |
1826 void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx); | |
1827 void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx); | |
1828 | |
1829 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1826 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
1830 put_pixels8_mmx(dst, src, stride, 8); | 1827 put_pixels8_mmx(dst, src, stride, 8); |
1831 } | 1828 } |
1832 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1829 void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
1833 avg_pixels8_mmx(dst, src, stride, 8); | 1830 avg_pixels8_mmx(dst, src, stride, 8); |
1834 } | 1831 } |
1835 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1832 void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
1836 put_pixels16_mmx(dst, src, stride, 16); | 1833 put_pixels16_mmx(dst, src, stride, 16); |
1837 } | 1834 } |
1838 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 1835 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
1839 avg_pixels16_mmx(dst, src, stride, 16); | 1836 avg_pixels16_mmx(dst, src, stride, 16); |
1840 } | 1837 } |
1841 | 1838 |
1842 /* VC1 specific */ | 1839 /* VC1 specific */ |
1843 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); | |
1844 | |
1845 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int
rnd) { | 1840 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int
rnd) { |
1846 put_pixels8_mmx(dst, src, stride, 8); | 1841 put_pixels8_mmx(dst, src, stride, 8); |
1847 } | 1842 } |
1848 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
t rnd) { | 1843 void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, in
t rnd) { |
1849 avg_pixels8_mmx2(dst, src, stride, 8); | 1844 avg_pixels8_mmx2(dst, src, stride, 8); |
1850 } | 1845 } |
1851 | 1846 |
1852 /* external functions, from idct_mmx.c */ | |
1853 void ff_mmx_idct(DCTELEM *block); | |
1854 void ff_mmxext_idct(DCTELEM *block); | |
1855 | |
1856 /* XXX: those functions should be suppressed ASAP when all IDCTs are | 1847 /* XXX: those functions should be suppressed ASAP when all IDCTs are |
1857 converted */ | 1848 converted */ |
1858 #if CONFIG_GPL | 1849 #if CONFIG_GPL |
1859 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block
) | 1850 static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block
) |
1860 { | 1851 { |
1861 ff_mmx_idct (block); | 1852 ff_mmx_idct (block); |
1862 put_pixels_clamped_mmx(block, dest, line_size); | 1853 put_pixels_clamped_mmx(block, dest, line_size); |
1863 } | 1854 } |
1864 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block
) | 1855 static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block
) |
1865 { | 1856 { |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2024 { | 2015 { |
2025 int (*matrix_cmp)[2] = (int(*)[2])matrix; | 2016 int (*matrix_cmp)[2] = (int(*)[2])matrix; |
2026 intptr_t i,j,k; | 2017 intptr_t i,j,k; |
2027 | 2018 |
2028 i = -len*sizeof(float); | 2019 i = -len*sizeof(float); |
2029 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_c
mp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^
matrix_cmp[2][1]))) { | 2020 if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_c
mp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^
matrix_cmp[2][1]))) { |
2030 MIX5(IF0,IF1); | 2021 MIX5(IF0,IF1); |
2031 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] &&
matrix_cmp[3][0]==matrix_cmp[4][0]) { | 2022 } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] &&
matrix_cmp[3][0]==matrix_cmp[4][0]) { |
2032 MIX5(IF1,IF0); | 2023 MIX5(IF1,IF0); |
2033 } else { | 2024 } else { |
2034 DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4]; | 2025 DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4]; |
2035 j = 2*in_ch*sizeof(float); | 2026 j = 2*in_ch*sizeof(float); |
2036 __asm__ volatile( | 2027 __asm__ volatile( |
2037 "1: \n" | 2028 "1: \n" |
2038 "sub $8, %0 \n" | 2029 "sub $8, %0 \n" |
2039 "movss (%2,%0), %%xmm6 \n" | 2030 "movss (%2,%0), %%xmm6 \n" |
2040 "movss 4(%2,%0), %%xmm7 \n" | 2031 "movss 4(%2,%0), %%xmm7 \n" |
2041 "shufps $0, %%xmm6, %%xmm6 \n" | 2032 "shufps $0, %%xmm6, %%xmm6 \n" |
2042 "shufps $0, %%xmm7, %%xmm7 \n" | 2033 "shufps $0, %%xmm7, %%xmm7 \n" |
2043 "movaps %%xmm6, (%1,%0,4) \n" | 2034 "movaps %%xmm6, (%1,%0,4) \n" |
2044 "movaps %%xmm7, 16(%1,%0,4) \n" | 2035 "movaps %%xmm7, 16(%1,%0,4) \n" |
(...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2411 #elif !HAVE_YASM | 2402 #elif !HAVE_YASM |
2412 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis
c_sse(a,b,c,6) | 2403 #define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_mis
c_sse(a,b,c,6) |
2413 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) | 2404 #define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) |
2414 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) | 2405 #define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_mis
c_3dnow(a,b,c,6) |
2415 #endif | 2406 #endif |
2416 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | 2407 #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse |
2417 | 2408 |
2418 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ | 2409 #define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ |
2419 /* gcc pessimizes register allocation if this is in the same function as float_t
o_int16_interleave_sse2*/\ | 2410 /* gcc pessimizes register allocation if this is in the same function as float_t
o_int16_interleave_sse2*/\ |
2420 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
float **src, long len, int channels){\ | 2411 static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const
float **src, long len, int channels){\ |
2421 DECLARE_ALIGNED_16(int16_t, tmp)[len];\ | 2412 DECLARE_ALIGNED(16, int16_t, tmp)[len];\ |
2422 int i,j,c;\ | 2413 int i,j,c;\ |
2423 for(c=0; c<channels; c++){\ | 2414 for(c=0; c<channels; c++){\ |
2424 float_to_int16_##cpu(tmp, src[c], len);\ | 2415 float_to_int16_##cpu(tmp, src[c], len);\ |
2425 for(i=0, j=c; i<len; i++, j+=channels)\ | 2416 for(i=0, j=c; i<len; i++, j+=channels)\ |
2426 dst[j] = tmp[i];\ | 2417 dst[j] = tmp[i];\ |
2427 }\ | 2418 }\ |
2428 }\ | 2419 }\ |
2429 \ | 2420 \ |
2430 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
g len, int channels){\ | 2421 static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon
g len, int channels){\ |
2431 if(channels==1)\ | 2422 if(channels==1)\ |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2497 "js 1b \n" | 2488 "js 1b \n" |
2498 ) | 2489 ) |
2499 | 2490 |
2500 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
len, int channels){ | 2491 static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long
len, int channels){ |
2501 if(channels==6) | 2492 if(channels==6) |
2502 ff_float_to_int16_interleave6_3dn2(dst, src, len); | 2493 ff_float_to_int16_interleave6_3dn2(dst, src, len); |
2503 else | 2494 else |
2504 float_to_int16_interleave_3dnow(dst, src, len, channels); | 2495 float_to_int16_interleave_3dnow(dst, src, len, channels); |
2505 } | 2496 } |
2506 | 2497 |
2507 | |
2508 void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width); | |
2509 void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width); | |
2510 void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); | |
2511 void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I
DWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); | |
2512 void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, u
int8_t * * block, int b_w, int b_h, | |
2513 int src_x, int src_y, int src_stride, slice_b
uffer * sb, int add, uint8_t * dst8); | |
2514 void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, ui
nt8_t * * block, int b_w, int b_h, | |
2515 int src_x, int src_y, int src_stride, slice_bu
ffer * sb, int add, uint8_t * dst8); | |
2516 | |
2517 | |
2518 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); | 2498 float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); |
2519 | 2499 |
2520 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | 2500 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
2521 { | 2501 { |
2522 mm_flags = mm_support(); | 2502 mm_flags = mm_support(); |
2523 | 2503 |
2524 if (avctx->dsp_mask) { | 2504 if (avctx->dsp_mask) { |
2525 if (avctx->dsp_mask & FF_MM_FORCE) | 2505 if (avctx->dsp_mask & FF_MM_FORCE) |
2526 mm_flags |= (avctx->dsp_mask & 0xffff); | 2506 mm_flags |= (avctx->dsp_mask & 0xffff); |
2527 else | 2507 else |
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2838 } | 2818 } |
2839 | 2819 |
2840 | 2820 |
2841 #define H264_QPEL_FUNCS(x, y, CPU)\ | 2821 #define H264_QPEL_FUNCS(x, y, CPU)\ |
2842 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_#
#CPU;\ | 2822 c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_#
#CPU;\ |
2843 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##
CPU;\ | 2823 c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##
CPU;\ |
2844 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_#
#CPU;\ | 2824 c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_#
#CPU;\ |
2845 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##
CPU; | 2825 c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##
CPU; |
2846 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ | 2826 if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ |
2847 // these functions are slower than mmx on AMD, but faster on Intel | 2827 // these functions are slower than mmx on AMD, but faster on Intel |
2848 /* FIXME works in most codecs, but crashes svq1 due to unaligned chroma | |
2849 c->put_pixels_tab[0][0] = put_pixels16_sse2; | 2828 c->put_pixels_tab[0][0] = put_pixels16_sse2; |
2850 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; | 2829 c->avg_pixels_tab[0][0] = avg_pixels16_sse2; |
2851 */ | |
2852 H264_QPEL_FUNCS(0, 0, sse2); | 2830 H264_QPEL_FUNCS(0, 0, sse2); |
2853 } | 2831 } |
2854 if(mm_flags & FF_MM_SSE2){ | 2832 if(mm_flags & FF_MM_SSE2){ |
2855 c->h264_idct8_add = ff_h264_idct8_add_sse2; | 2833 c->h264_idct8_add = ff_h264_idct8_add_sse2; |
2856 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; | 2834 c->h264_idct8_add4= ff_h264_idct8_add4_sse2; |
2857 | 2835 |
2858 H264_QPEL_FUNCS(0, 1, sse2); | 2836 H264_QPEL_FUNCS(0, 1, sse2); |
2859 H264_QPEL_FUNCS(0, 2, sse2); | 2837 H264_QPEL_FUNCS(0, 2, sse2); |
2860 H264_QPEL_FUNCS(0, 3, sse2); | 2838 H264_QPEL_FUNCS(0, 3, sse2); |
2861 H264_QPEL_FUNCS(1, 1, sse2); | 2839 H264_QPEL_FUNCS(1, 1, sse2); |
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3022 | 3000 |
3023 avg_no_rnd_pixels_tab[0] = just_return; | 3001 avg_no_rnd_pixels_tab[0] = just_return; |
3024 avg_no_rnd_pixels_tab[1] = just_return; | 3002 avg_no_rnd_pixels_tab[1] = just_return; |
3025 avg_no_rnd_pixels_tab[2] = just_return; | 3003 avg_no_rnd_pixels_tab[2] = just_return; |
3026 avg_no_rnd_pixels_tab[3] = just_return; | 3004 avg_no_rnd_pixels_tab[3] = just_return; |
3027 | 3005 |
3028 //av_fdct = just_return; | 3006 //av_fdct = just_return; |
3029 //ff_idct = just_return; | 3007 //ff_idct = just_return; |
3030 #endif | 3008 #endif |
3031 } | 3009 } |
OLD | NEW |