| Index: patched-ffmpeg-mt/libavcodec/x86/h264dsp_mmx.c
|
| ===================================================================
|
| --- patched-ffmpeg-mt/libavcodec/x86/h264dsp_mmx.c (revision 41250)
|
| +++ patched-ffmpeg-mt/libavcodec/x86/h264dsp_mmx.c (working copy)
|
| @@ -20,8 +20,8 @@
|
|
|
| #include "dsputil_mmx.h"
|
|
|
| -DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
|
| -DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
|
| +DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
|
| +DECLARE_ALIGNED(8, static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
|
|
|
| /***********************************/
|
| /* IDCT */
|
| @@ -157,12 +157,12 @@
|
| static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
|
| {
|
| int i;
|
| - DECLARE_ALIGNED_8(int16_t, b2)[64];
|
| + DECLARE_ALIGNED(8, int16_t, b2)[64];
|
|
|
| block[0] += 32;
|
|
|
| for(i=0; i<2; i++){
|
| - DECLARE_ALIGNED_8(uint64_t, tmp);
|
| + DECLARE_ALIGNED(8, uint64_t, tmp);
|
|
|
| h264_idct8_1d(block+4*i);
|
|
|
| @@ -628,7 +628,7 @@
|
|
|
| static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
|
| {
|
| - DECLARE_ALIGNED_8(uint64_t, tmp0)[2];
|
| + DECLARE_ALIGNED(8, uint64_t, tmp0)[2];
|
|
|
| __asm__ volatile(
|
| "movq (%2,%4), %%mm0 \n\t" //p1
|
| @@ -690,7 +690,7 @@
|
| {
|
| //FIXME: could cut some load/stores by merging transpose with filter
|
| // also, it only needs to transpose 6x8
|
| - DECLARE_ALIGNED_8(uint8_t, trans)[8*8];
|
| + DECLARE_ALIGNED(8, uint8_t, trans)[8*8];
|
| int i;
|
| for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
|
| if((tc0[0] & tc0[1]) < 0)
|
| @@ -734,7 +734,7 @@
|
| static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
|
| {
|
| //FIXME: could cut some load/stores by merging transpose with filter
|
| - DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
|
| + DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
|
| transpose4x4(trans, pix-2, 8, stride);
|
| transpose4x4(trans+4, pix-2+4*stride, 8, stride);
|
| h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
|
| @@ -784,7 +784,7 @@
|
| static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
|
| {
|
| //FIXME: could cut some load/stores by merging transpose with filter
|
| - DECLARE_ALIGNED_8(uint8_t, trans)[8*4];
|
| + DECLARE_ALIGNED(8, uint8_t, trans)[8*4];
|
| transpose4x4(trans, pix-2, 8, stride);
|
| transpose4x4(trans+4, pix-2+4*stride, 8, stride);
|
| h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
|
| @@ -815,7 +815,7 @@
|
| for( dir=1; dir>=0; dir-- ) {
|
| const x86_reg d_idx = dir ? -8 : -1;
|
| const int mask_mv = dir ? mask_mv1 : mask_mv0;
|
| - DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
|
| + DECLARE_ALIGNED(8, const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
|
| int b_idx, edge;
|
| for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
|
| __asm__ volatile(
|
| @@ -2106,7 +2106,7 @@
|
| #endif
|
|
|
| /* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
|
| -DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg)[4] = {
|
| +DECLARE_ALIGNED(8, static const uint64_t, h264_rnd_reg)[4] = {
|
| 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
|
| };
|
|
|
|
|