patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c - Issue 789004: ffmpeg roll of source to mar 9 version...

Unified Diff: patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« patched-ffmpeg-mt/libavcodec/mpeg4video_es_bsf.c ('K') | « patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h ('k') | patched-ffmpeg-mt/libavcodec/x86/dsputilenc_mmx.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c

===================================================================

--- patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c (revision 41250)

+++ patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.c (working copy)

@@ -39,38 +39,38 @@

int mm_flags; /* multimedia extension flags */

/* pixel operations */

-DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_bone) = 0x0101010101010101ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_wtwo) = 0x0002000200020002ULL;

-DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000)[2] =

+DECLARE_ALIGNED(16, const uint64_t, ff_pdw_80000000)[2] =

{0x8000000080000000ULL, 0x8000000080000000ULL};

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;

-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};

-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;

-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;

-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};

-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;

-DECLARE_ALIGNED_16(const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;

+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};

+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;

+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;

+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};

+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;

+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64 ) = {0x0040004000400040ULL, 0x0040004000400040ULL};

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;

-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;

+DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;

-DECLARE_ALIGNED_16(const double, ff_pd_1)[2] = { 1.0, 1.0 };

-DECLARE_ALIGNED_16(const double, ff_pd_2)[2] = { 2.0, 2.0 };

+DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };

+DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };

#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)

#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)

@@ -1823,9 +1823,6 @@

#include "rv40dsp_mmx.c"

/* CAVS specific */

-void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);

-void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);

void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {

put_pixels8_mmx(dst, src, stride, 8);

}

@@ -1840,8 +1837,6 @@

}

/* VC1 specific */

-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);

void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {

put_pixels8_mmx(dst, src, stride, 8);

}

@@ -1849,10 +1844,6 @@

avg_pixels8_mmx2(dst, src, stride, 8);

}

-/* external functions, from idct_mmx.c */

-void ff_mmx_idct(DCTELEM *block);

-void ff_mmxext_idct(DCTELEM *block);

/* XXX: those functions should be suppressed ASAP when all IDCTs are

converted */

#if CONFIG_GPL

@@ -2031,7 +2022,7 @@

} else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {

MIX5(IF1,IF0);

} else {

- DECLARE_ALIGNED_16(float, matrix_simd)[in_ch][2][4];

+ DECLARE_ALIGNED(16, float, matrix_simd)[in_ch][2][4];

j = 2*in_ch*sizeof(float);

__asm__ volatile(

"1: \n"

@@ -2418,7 +2409,7 @@

#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \

/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\

static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\

- DECLARE_ALIGNED_16(int16_t, tmp)[len];\

+ DECLARE_ALIGNED(16, int16_t, tmp)[len];\

int i,j,c;\

for(c=0; c<channels; c++){\

float_to_int16_##cpu(tmp, src[c], len);\

@@ -2504,17 +2495,6 @@

float_to_int16_interleave_3dnow(dst, src, len, channels);

}

-void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);

-void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);

-void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);

-void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);

-void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,

- int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);

-void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,

- int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);

float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);

void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)

@@ -2845,10 +2825,8 @@

c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;

if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){

// these functions are slower than mmx on AMD, but faster on Intel

-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma

c->put_pixels_tab[0][0] = put_pixels16_sse2;

c->avg_pixels_tab[0][0] = avg_pixels16_sse2;

-*/

H264_QPEL_FUNCS(0, 0, sse2);

}

if(mm_flags & FF_MM_SSE2){