| Index: source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h
|
| ===================================================================
|
| --- source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h (revision 59334)
|
| +++ source/patched-ffmpeg-mt/libavcodec/x86/dsputil_mmx.h (working copy)
|
| @@ -57,7 +57,7 @@
|
| extern const uint64_t ff_pb_1F;
|
| extern const uint64_t ff_pb_3F;
|
| extern const uint64_t ff_pb_81;
|
| -extern const uint64_t ff_pb_A1;
|
| +extern const xmm_reg ff_pb_A1;
|
| extern const xmm_reg ff_pb_F8;
|
| extern const uint64_t ff_pb_FC;
|
| extern const xmm_reg ff_pb_FE;
|
| @@ -94,6 +94,35 @@
|
| SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
|
| SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
|
|
|
| +static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
|
| + __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
|
| + "movd %4, %%mm0 \n\t"
|
| + "movd %5, %%mm1 \n\t"
|
| + "movd %6, %%mm2 \n\t"
|
| + "movd %7, %%mm3 \n\t"
|
| + "punpcklbw %%mm1, %%mm0 \n\t"
|
| + "punpcklbw %%mm3, %%mm2 \n\t"
|
| + "movq %%mm0, %%mm1 \n\t"
|
| + "punpcklwd %%mm2, %%mm0 \n\t"
|
| + "punpckhwd %%mm2, %%mm1 \n\t"
|
| + "movd %%mm0, %0 \n\t"
|
| + "punpckhdq %%mm0, %%mm0 \n\t"
|
| + "movd %%mm0, %1 \n\t"
|
| + "movd %%mm1, %2 \n\t"
|
| + "punpckhdq %%mm1, %%mm1 \n\t"
|
| + "movd %%mm1, %3 \n\t"
|
| +
|
| + : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
|
| + "=m" (*(uint32_t*)(dst + 1*dst_stride)),
|
| + "=m" (*(uint32_t*)(dst + 2*dst_stride)),
|
| + "=m" (*(uint32_t*)(dst + 3*dst_stride))
|
| + : "m" (*(uint32_t*)(src + 0*src_stride)),
|
| + "m" (*(uint32_t*)(src + 1*src_stride)),
|
| + "m" (*(uint32_t*)(src + 2*src_stride)),
|
| + "m" (*(uint32_t*)(src + 3*src_stride))
|
| + );
|
| +}
|
| +
|
| // e,f,g,h can be memory
|
| // out: a,d,t,c
|
| #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
|
| @@ -158,9 +187,9 @@
|
| void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
|
| void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
|
|
|
| -void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
| -void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
| -void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
| +void ff_add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
| +void ff_put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
| +void ff_put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size);
|
|
|
| void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
|
| void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
|
|
|