Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1030)

Side by Side Diff: patched-ffmpeg-mt/libavcodec/ppc/h264_altivec.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: '' Created 10 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> 2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3 * 3 *
4 * This file is part of FFmpeg. 4 * This file is part of FFmpeg.
5 * 5 *
6 * FFmpeg is free software; you can redistribute it and/or 6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public 7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either 8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version. 9 * version 2.1 of the License, or (at your option) any later version.
10 * 10 *
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
72 #undef PREFIX_h264_qpel16_v_lowpass_num 72 #undef PREFIX_h264_qpel16_v_lowpass_num
73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec 73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
74 #undef PREFIX_h264_qpel16_hv_lowpass_num 74 #undef PREFIX_h264_qpel16_hv_lowpass_num
75 75
76 #define H264_MC(OPNAME, SIZE, CODETYPE) \ 76 #define H264_MC(OPNAME, SIZE, CODETYPE) \
77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin t8_t *src, int stride){\ 77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin t8_t *src, int stride){\
78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ 78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
79 }\ 79 }\
80 \ 80 \
81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){ \ 81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){ \
82 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ 82 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ 84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
85 }\ 85 }\
86 \ 86 \
87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str ide);\ 88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str ide);\
89 }\ 89 }\
90 \ 90 \
91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
92 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ 92 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid e, SIZE);\ 94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid e, SIZE);\
95 }\ 95 }\
96 \ 96 \
97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
98 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ 98 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ 100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
101 }\ 101 }\
102 \ 102 \
103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str ide);\ 104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str ide);\
105 }\ 105 }\
106 \ 106 \
107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
108 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ 108 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ 109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ 110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
111 }\ 111 }\
112 \ 112 \
113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
114 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ 114 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
115 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ 115 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ 116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ 117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\ 118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\
119 }\ 119 }\
120 \ 120 \
121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
122 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ 122 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
123 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ 123 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ 124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\ 125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\
126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\ 126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\
127 }\ 127 }\
128 \ 128 \
129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
130 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ 130 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
131 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ 131 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ 132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ 133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\ 134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\
135 }\ 135 }\
136 \ 136 \
137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
138 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ 138 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
139 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ 139 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ 140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\ 141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\
142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\ 142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE , SIZE);\
143 }\ 143 }\
144 \ 144 \
145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
146 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ 146 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid e, SIZE, stride);\ 147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid e, SIZE, stride);\
148 }\ 148 }\
149 \ 149 \
150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
151 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ 151 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
152 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ 152 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
153 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ 153 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ 154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\ 155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\
156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\ 156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\
157 }\ 157 }\
158 \ 158 \
159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
160 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ 160 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
161 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ 161 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
162 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ 162 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ 163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\ 164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\
165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\ 165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ E, SIZE);\
166 }\ 166 }\
167 \ 167 \
168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
169 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ 169 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
170 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ 170 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
171 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ 171 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ 172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\ 173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\
174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\ 174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\
175 }\ 175 }\
176 \ 176 \
177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\ 177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint 8_t *src, int stride){\
178 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ 178 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
179 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ 179 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
180 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ 180 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\ 181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride) ;\
182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\ 182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI ZE, stride);\
183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\ 183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ E, SIZE);\
184 }\ 184 }\
185 185
186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, 186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
187 const uint8_t * src2, int dst_stride, 187 const uint8_t * src2, int dst_stride,
188 int src_stride1, int h) 188 int src_stride1, int h)
189 { 189 {
190 int i; 190 int i;
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ 424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\
425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ 425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
426 lv = vec_sel( lv, bodyv, edgelv ); \ 426 lv = vec_sel( lv, bodyv, edgelv ); \
427 vec_st( lv, 7, dest ); \ 427 vec_st( lv, 7, dest ); \
428 hv = vec_ld( 0, dest ); \ 428 hv = vec_ld( 0, dest ); \
429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ 429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \
430 hv = vec_sel( hv, bodyv, edgehv ); \ 430 hv = vec_sel( hv, bodyv, edgehv ); \
431 vec_st( hv, 0, dest ); \ 431 vec_st( hv, 0, dest ); \
432 } 432 }
433 433
434 void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) { 434 static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; 435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; 436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; 437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
438 438
439 vec_u8 perm_ldv = vec_lvsl(0, dst); 439 vec_u8 perm_ldv = vec_lvsl(0, dst);
440 vec_u8 perm_stv = vec_lvsr(8, dst); 440 vec_u8 perm_stv = vec_lvsr(8, dst);
441 441
442 const vec_u16 onev = vec_splat_u16(1); 442 const vec_u16 onev = vec_splat_u16(1);
443 const vec_u16 twov = vec_splat_u16(2); 443 const vec_u16 twov = vec_splat_u16(2);
444 const vec_u16 sixv = vec_splat_u16(6); 444 const vec_u16 sixv = vec_splat_u16(6);
(...skipping 28 matching lines...) Expand all
473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); 473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); 474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); 475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
476 } 476 }
477 477
478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl ock, int stride, int size) 478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl ock, int stride, int size)
479 { 479 {
480 vec_s16 dc16; 480 vec_s16 dc16;
481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; 481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
482 LOAD_ZERO; 482 LOAD_ZERO;
483 DECLARE_ALIGNED_16(int, dc); 483 DECLARE_ALIGNED(16, int, dc);
484 int i; 484 int i;
485 485
486 dc = (block[0] + 32) >> 6; 486 dc = (block[0] + 32) >> 6;
487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); 487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);
488 488
489 if (size == 4) 489 if (size == 4)
490 dc16 = vec_sld(dc16, zero_s16v, 8); 490 dc16 = vec_sld(dc16, zero_s16v, 8);
491 dcplus = vec_packsu(dc16, zero_s16v); 491 dcplus = vec_packsu(dc16, zero_s16v);
492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); 492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
493 493
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
583 \ 583 \
584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \ 584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \
585 r1 = vec_mergel(r4, r6); /*all set 1*/ \ 585 r1 = vec_mergel(r4, r6); /*all set 1*/ \
586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \ 586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \
587 r3 = vec_mergel(r5, r7); /*all set 3*/ \ 587 r3 = vec_mergel(r5, r7); /*all set 3*/ \
588 } 588 }
589 589
590 static inline void write16x4(uint8_t *dst, int dst_stride, 590 static inline void write16x4(uint8_t *dst, int dst_stride,
591 register vec_u8 r0, register vec_u8 r1, 591 register vec_u8 r0, register vec_u8 r1,
592 register vec_u8 r2, register vec_u8 r3) { 592 register vec_u8 r2, register vec_u8 r3) {
593 DECLARE_ALIGNED_16(unsigned char, result)[64]; 593 DECLARE_ALIGNED(16, unsigned char, result)[64];
594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; 594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
595 int int_dst_stride = dst_stride/4; 595 int int_dst_stride = dst_stride/4;
596 596
597 vec_st(r0, 0, result); 597 vec_st(r0, 0, result);
598 vec_st(r1, 16, result); 598 vec_st(r1, 16, result);
599 vec_st(r2, 32, result); 599 vec_st(r2, 32, result);
600 vec_st(r3, 48, result); 600 vec_st(r3, 48, result);
601 /* FIXME: there has to be a better way!!!! */ 601 /* FIXME: there has to be a better way!!!! */
602 *dst_int = *src_int; 602 *dst_int = *src_int;
603 *(dst_int+ int_dst_stride) = *(src_int + 1); 603 *(dst_int+ int_dst_stride) = *(src_int + 1);
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
763 delta = vec_subs(stage2, vec160); /* d */ \ 763 delta = vec_subs(stage2, vec160); /* d */ \
764 deltaneg = vec_min(tc0masked, deltaneg); \ 764 deltaneg = vec_min(tc0masked, deltaneg); \
765 delta = vec_min(tc0masked, delta); \ 765 delta = vec_min(tc0masked, delta); \
766 p0 = vec_subs(p0, deltaneg); \ 766 p0 = vec_subs(p0, deltaneg); \
767 q0 = vec_subs(q0, delta); \ 767 q0 = vec_subs(q0, delta); \
768 p0 = vec_adds(p0, delta); \ 768 p0 = vec_adds(p0, delta); \
769 q0 = vec_adds(q0, deltaneg); \ 769 q0 = vec_adds(q0, deltaneg); \
770 } 770 }
771 771
772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \ 772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
773 DECLARE_ALIGNED_16(unsigned char, temp)[16]; \ 773 DECLARE_ALIGNED(16, unsigned char, temp)[16]; \
774 register vec_u8 alphavec; \ 774 register vec_u8 alphavec; \
775 register vec_u8 betavec; \ 775 register vec_u8 betavec; \
776 register vec_u8 mask; \ 776 register vec_u8 mask; \
777 register vec_u8 p1mask; \ 777 register vec_u8 p1mask; \
778 register vec_u8 q1mask; \ 778 register vec_u8 q1mask; \
779 register vector signed char tc0vec; \ 779 register vector signed char tc0vec; \
780 register vec_u8 finaltc0; \ 780 register vec_u8 finaltc0; \
781 register vec_u8 tc0masked; \ 781 register vec_u8 tc0masked; \
782 register vec_u8 newp1; \ 782 register vec_u8 newp1; \
783 register vec_u8 newq1; \ 783 register vec_u8 newq1; \
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
843 write16x4(pix-2, stride, line1, line2, line3, line4); 843 write16x4(pix-2, stride, line1, line2, line3, line4);
844 } 844 }
845 845
846 static av_always_inline 846 static av_always_inline
847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei ght, int offset, int w, int h) 847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei ght, int offset, int w, int h)
848 { 848 {
849 int y, aligned; 849 int y, aligned;
850 vec_u8 vblock; 850 vec_u8 vblock;
851 vec_s16 vtemp, vweight, voffset, v0, v1; 851 vec_s16 vtemp, vweight, voffset, v0, v1;
852 vec_u16 vlog2_denom; 852 vec_u16 vlog2_denom;
853 DECLARE_ALIGNED_16(int32_t, temp)[4]; 853 DECLARE_ALIGNED(16, int32_t, temp)[4];
854 LOAD_ZERO; 854 LOAD_ZERO;
855 855
856 offset <<= log2_denom; 856 offset <<= log2_denom;
857 if(log2_denom) offset += 1<<(log2_denom-1); 857 if(log2_denom) offset += 1<<(log2_denom-1);
858 temp[0] = log2_denom; 858 temp[0] = log2_denom;
859 temp[1] = weight; 859 temp[1] = weight;
860 temp[2] = offset; 860 temp[2] = offset;
861 861
862 vtemp = (vec_s16)vec_ld(0, temp); 862 vtemp = (vec_s16)vec_ld(0, temp);
863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); 863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
(...skipping 25 matching lines...) Expand all
889 } 889 }
890 890
891 static av_always_inline 891 static av_always_inline
892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ denom, 892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_ denom,
893 int weightd, int weights, int offset, int w, int h) 893 int weightd, int weights, int offset, int w, int h)
894 { 894 {
895 int y, dst_aligned, src_aligned; 895 int y, dst_aligned, src_aligned;
896 vec_u8 vsrc, vdst; 896 vec_u8 vsrc, vdst;
897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; 897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
898 vec_u16 vlog2_denom; 898 vec_u16 vlog2_denom;
899 DECLARE_ALIGNED_16(int32_t, temp)[4]; 899 DECLARE_ALIGNED(16, int32_t, temp)[4];
900 LOAD_ZERO; 900 LOAD_ZERO;
901 901
902 offset = ((offset + 1) | 1) << log2_denom; 902 offset = ((offset + 1) | 1) << log2_denom;
903 temp[0] = log2_denom+1; 903 temp[0] = log2_denom+1;
904 temp[1] = weights; 904 temp[1] = weights;
905 temp[2] = weightd; 905 temp[2] = weightd;
906 temp[3] = offset; 906 temp[3] = offset;
907 907
908 vtemp = (vec_s16)vec_ld(0, temp); 908 vtemp = (vec_s16)vec_ld(0, temp);
909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); 909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; 1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; 1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec; 1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; 1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; 1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; 1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; 1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; 1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
1020 } 1020 }
1021 } 1021 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698