Index: patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c |
=================================================================== |
--- patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c (revision 41250) |
+++ patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c (working copy) |
@@ -39,38 +39,38 @@ |
int mm_flags; /* multimedia extension flags */ |
/* pixel operations */ |
-DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL; |
-DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] = |
+DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] = |
{0x8000000080000000ULL, 0x8000000080000000ULL}; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; |
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL}; |
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL}; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; |
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL}; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; |
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL}; |
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL}; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; |
-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL}; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL; |
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL}; |
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL}; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL; |
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL}; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL; |
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL}; |
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL}; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL; |
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL}; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; |
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL; |
+DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; |
-DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 }; |
-DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 }; |
+DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 }; |
+DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 }; |
#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) |
#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) |
@@ -1823,9 +1823,6 @@ |
#include "rv40dsp_mmx.c" |
/* CAVS specific */ |
-void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx); |
-void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx); |
- |
void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
put_pixels8_mmx(dst, src, stride, 8); |
} |
@@ -1840,8 +1837,6 @@ |
} |
/* VC1 specific */ |
-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); |
- |
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
put_pixels8_mmx(dst, src, stride, 8); |
} |
@@ -1849,10 +1844,6 @@ |
avg_pixels8_mmx2(dst, src, stride, 8); |
} |
-/* external functions, from idct_mmx.c */ |
-void ff_mmx_idct(DCTELEM *block); |
-void ff_mmxext_idct(DCTELEM *block); |
- |
/* XXX: those functions should be suppressed ASAP when all IDCTs are |
converted */ |
#if CONFIG_GPL |
@@ -2031,7 +2022,7 @@ |
} else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) { |
MIX5(IF1,IF0); |
} else { |
- DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4]; |
+ DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4]; |
j = 2*in_ch*sizeof(float); |
__asm__ volatile( |
"1: \n" |
@@ -2418,7 +2409,7 @@ |
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \ |
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\ |
static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\ |
- DECLARE_ALIGNED_16(int16_t, tmp)[len];\ |
+ DECLARE_ALIGNED(16, int16_t, tmp)[len];\ |
int i,j,c;\ |
for(c=0; c<channels; c++){\ |
float_to_int16_##cpu(tmp, src[c], len);\ |
@@ -2504,17 +2495,6 @@ |
float_to_int16_interleave_3dnow(dst, src, len, channels); |
} |
- |
-void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width); |
-void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width); |
-void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); |
-void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width); |
-void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
- int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); |
-void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, |
- int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); |
- |
- |
float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); |
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) |
@@ -2845,10 +2825,8 @@ |
c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; |
if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){ |
// these functions are slower than mmx on AMD, but faster on Intel |
-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma |
c->put_pixels_tab[0][0] = put_pixels16_sse2; |
c->avg_pixels_tab[0][0] = avg_pixels16_sse2; |
-*/ |
H264_QPEL_FUNCS(0, 0, sse2); |
} |
if(mm_flags & FF_MM_SSE2){ |