OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> | 2 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> |
3 * | 3 * |
4 * This file is part of FFmpeg. | 4 * This file is part of FFmpeg. |
5 * | 5 * |
6 * FFmpeg is free software; you can redistribute it and/or | 6 * FFmpeg is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public | 7 * modify it under the terms of the GNU Lesser General Public |
8 * License as published by the Free Software Foundation; either | 8 * License as published by the Free Software Foundation; either |
9 * version 2.1 of the License, or (at your option) any later version. | 9 * version 2.1 of the License, or (at your option) any later version. |
10 * | 10 * |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
72 #undef PREFIX_h264_qpel16_v_lowpass_num | 72 #undef PREFIX_h264_qpel16_v_lowpass_num |
73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec | 73 #undef PREFIX_h264_qpel16_hv_lowpass_altivec |
74 #undef PREFIX_h264_qpel16_hv_lowpass_num | 74 #undef PREFIX_h264_qpel16_hv_lowpass_num |
75 | 75 |
76 #define H264_MC(OPNAME, SIZE, CODETYPE) \ | 76 #define H264_MC(OPNAME, SIZE, CODETYPE) \ |
77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
t8_t *src, int stride){\ | 77 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
t8_t *src, int stride){\ |
78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ | 78 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ |
79 }\ | 79 }\ |
80 \ | 80 \ |
81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){ \ | 81 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){ \ |
82 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 82 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 83 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ | 84 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ |
85 }\ | 85 }\ |
86 \ | 86 \ |
87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 87 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ | 88 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ |
89 }\ | 89 }\ |
90 \ | 90 \ |
91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 91 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
92 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 92 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 93 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid
e, SIZE);\ | 94 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, strid
e, SIZE);\ |
95 }\ | 95 }\ |
96 \ | 96 \ |
97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 97 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
98 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 98 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 99 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ | 100 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride,
SIZE);\ |
101 }\ | 101 }\ |
102 \ | 102 \ |
103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 103 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ | 104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, str
ide);\ |
105 }\ | 105 }\ |
106 \ | 106 \ |
107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
108 DECLARE_ALIGNED_16(uint8_t, half)[SIZE*SIZE];\ | 108 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ | 109 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride,
stride, SIZE);\ | 110 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride,
stride, SIZE);\ |
111 }\ | 111 }\ |
112 \ | 112 \ |
113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 113 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
114 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 114 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
115 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 115 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | 116 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | 117 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 118 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
119 }\ | 119 }\ |
120 \ | 120 \ |
121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 121 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
122 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 122 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
123 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 123 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | 124 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ | 125 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ |
126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 126 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
127 }\ | 127 }\ |
128 \ | 128 \ |
129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 129 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
130 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 130 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
131 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 131 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ | 132 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ |
133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | 133 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 134 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
135 }\ | 135 }\ |
136 \ | 136 \ |
137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 137 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
138 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 138 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
139 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 139 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ | 140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ |
141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ | 141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ |
142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ | 142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE
, SIZE);\ |
143 }\ | 143 }\ |
144 \ | 144 \ |
145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
146 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 146 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid
e, SIZE, stride);\ | 147 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, strid
e, SIZE, stride);\ |
148 }\ | 148 }\ |
149 \ | 149 \ |
150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 150 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
151 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 151 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
152 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 152 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
153 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 153 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ | 154 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 155 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ | 156 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ |
157 }\ | 157 }\ |
158 \ | 158 \ |
159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 159 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
160 DECLARE_ALIGNED_16(uint8_t, halfH)[SIZE*SIZE];\ | 160 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
161 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 161 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
162 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 162 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ | 163 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE,
stride);\ |
164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 164 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ | 165 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZ
E, SIZE);\ |
166 }\ | 166 }\ |
167 \ | 167 \ |
168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 168 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
169 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 169 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
170 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 170 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
171 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 171 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ | 172 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 173 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ | 174 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ |
175 }\ | 175 }\ |
176 \ | 176 \ |
177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ | 177 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
8_t *src, int stride){\ |
178 DECLARE_ALIGNED_16(uint8_t, halfV)[SIZE*SIZE];\ | 178 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
179 DECLARE_ALIGNED_16(uint8_t, halfHV)[SIZE*SIZE];\ | 179 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
180 DECLARE_ALIGNED_16(int16_t, tmp)[SIZE*(SIZE+8)];\ | 180 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ | 181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride)
;\ |
182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ | 182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SI
ZE, stride);\ |
183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ | 183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZ
E, SIZE);\ |
184 }\ | 184 }\ |
185 | 185 |
186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, | 186 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
187 const uint8_t * src2, int dst_stride, | 187 const uint8_t * src2, int dst_stride, |
188 int src_stride1, int h) | 188 int src_stride1, int h) |
189 { | 189 { |
190 int i; | 190 int i; |
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ | 424 vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\ |
425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ | 425 vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \ |
426 lv = vec_sel( lv, bodyv, edgelv ); \ | 426 lv = vec_sel( lv, bodyv, edgelv ); \ |
427 vec_st( lv, 7, dest ); \ | 427 vec_st( lv, 7, dest ); \ |
428 hv = vec_ld( 0, dest ); \ | 428 hv = vec_ld( 0, dest ); \ |
429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ | 429 edgehv = vec_perm( zero_u8v, sel, perm_stv ); \ |
430 hv = vec_sel( hv, bodyv, edgehv ); \ | 430 hv = vec_sel( hv, bodyv, edgehv ); \ |
431 vec_st( hv, 0, dest ); \ | 431 vec_st( hv, 0, dest ); \ |
432 } | 432 } |
433 | 433 |
434 void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) { | 434 static void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride )
{ |
435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; | 435 vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; |
436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; | 436 vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; |
437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; | 437 vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7; |
438 | 438 |
439 vec_u8 perm_ldv = vec_lvsl(0, dst); | 439 vec_u8 perm_ldv = vec_lvsl(0, dst); |
440 vec_u8 perm_stv = vec_lvsr(8, dst); | 440 vec_u8 perm_stv = vec_lvsr(8, dst); |
441 | 441 |
442 const vec_u16 onev = vec_splat_u16(1); | 442 const vec_u16 onev = vec_splat_u16(1); |
443 const vec_u16 twov = vec_splat_u16(2); | 443 const vec_u16 twov = vec_splat_u16(2); |
444 const vec_u16 sixv = vec_splat_u16(6); | 444 const vec_u16 sixv = vec_splat_u16(6); |
(...skipping 28 matching lines...) Expand all Loading... |
473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); | 473 ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel); |
474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); | 474 ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel); |
475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); | 475 ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); |
476 } | 476 } |
477 | 477 |
478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl
ock, int stride, int size) | 478 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *bl
ock, int stride, int size) |
479 { | 479 { |
480 vec_s16 dc16; | 480 vec_s16 dc16; |
481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; | 481 vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; |
482 LOAD_ZERO; | 482 LOAD_ZERO; |
483 DECLARE_ALIGNED_16(int, dc); | 483 DECLARE_ALIGNED(16, int, dc); |
484 int i; | 484 int i; |
485 | 485 |
486 dc = (block[0] + 32) >> 6; | 486 dc = (block[0] + 32) >> 6; |
487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); | 487 dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1); |
488 | 488 |
489 if (size == 4) | 489 if (size == 4) |
490 dc16 = vec_sld(dc16, zero_s16v, 8); | 490 dc16 = vec_sld(dc16, zero_s16v, 8); |
491 dcplus = vec_packsu(dc16, zero_s16v); | 491 dcplus = vec_packsu(dc16, zero_s16v); |
492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); | 492 dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v); |
493 | 493 |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
583 \ | 583 \ |
584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \ | 584 r0 = vec_mergeh(r4, r6); /*all set 0*/ \ |
585 r1 = vec_mergel(r4, r6); /*all set 1*/ \ | 585 r1 = vec_mergel(r4, r6); /*all set 1*/ \ |
586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \ | 586 r2 = vec_mergeh(r5, r7); /*all set 2*/ \ |
587 r3 = vec_mergel(r5, r7); /*all set 3*/ \ | 587 r3 = vec_mergel(r5, r7); /*all set 3*/ \ |
588 } | 588 } |
589 | 589 |
590 static inline void write16x4(uint8_t *dst, int dst_stride, | 590 static inline void write16x4(uint8_t *dst, int dst_stride, |
591 register vec_u8 r0, register vec_u8 r1, | 591 register vec_u8 r0, register vec_u8 r1, |
592 register vec_u8 r2, register vec_u8 r3) { | 592 register vec_u8 r2, register vec_u8 r3) { |
593 DECLARE_ALIGNED_16(unsigned char, result)[64]; | 593 DECLARE_ALIGNED(16, unsigned char, result)[64]; |
594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; | 594 uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst; |
595 int int_dst_stride = dst_stride/4; | 595 int int_dst_stride = dst_stride/4; |
596 | 596 |
597 vec_st(r0, 0, result); | 597 vec_st(r0, 0, result); |
598 vec_st(r1, 16, result); | 598 vec_st(r1, 16, result); |
599 vec_st(r2, 32, result); | 599 vec_st(r2, 32, result); |
600 vec_st(r3, 48, result); | 600 vec_st(r3, 48, result); |
601 /* FIXME: there has to be a better way!!!! */ | 601 /* FIXME: there has to be a better way!!!! */ |
602 *dst_int = *src_int; | 602 *dst_int = *src_int; |
603 *(dst_int+ int_dst_stride) = *(src_int + 1); | 603 *(dst_int+ int_dst_stride) = *(src_int + 1); |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
763 delta = vec_subs(stage2, vec160); /* d */
\ | 763 delta = vec_subs(stage2, vec160); /* d */
\ |
764 deltaneg = vec_min(tc0masked, deltaneg);
\ | 764 deltaneg = vec_min(tc0masked, deltaneg);
\ |
765 delta = vec_min(tc0masked, delta);
\ | 765 delta = vec_min(tc0masked, delta);
\ |
766 p0 = vec_subs(p0, deltaneg);
\ | 766 p0 = vec_subs(p0, deltaneg);
\ |
767 q0 = vec_subs(q0, delta);
\ | 767 q0 = vec_subs(q0, delta);
\ |
768 p0 = vec_adds(p0, delta);
\ | 768 p0 = vec_adds(p0, delta);
\ |
769 q0 = vec_adds(q0, deltaneg);
\ | 769 q0 = vec_adds(q0, deltaneg);
\ |
770 } | 770 } |
771 | 771 |
772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0)
{ \ | 772 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0)
{ \ |
773 DECLARE_ALIGNED_16(unsigned char, temp)[16];
\ | 773 DECLARE_ALIGNED(16, unsigned char, temp)[16];
\ |
774 register vec_u8 alphavec;
\ | 774 register vec_u8 alphavec;
\ |
775 register vec_u8 betavec;
\ | 775 register vec_u8 betavec;
\ |
776 register vec_u8 mask;
\ | 776 register vec_u8 mask;
\ |
777 register vec_u8 p1mask;
\ | 777 register vec_u8 p1mask;
\ |
778 register vec_u8 q1mask;
\ | 778 register vec_u8 q1mask;
\ |
779 register vector signed char tc0vec;
\ | 779 register vector signed char tc0vec;
\ |
780 register vec_u8 finaltc0;
\ | 780 register vec_u8 finaltc0;
\ |
781 register vec_u8 tc0masked;
\ | 781 register vec_u8 tc0masked;
\ |
782 register vec_u8 newp1;
\ | 782 register vec_u8 newp1;
\ |
783 register vec_u8 newq1;
\ | 783 register vec_u8 newq1;
\ |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
843 write16x4(pix-2, stride, line1, line2, line3, line4); | 843 write16x4(pix-2, stride, line1, line2, line3, line4); |
844 } | 844 } |
845 | 845 |
846 static av_always_inline | 846 static av_always_inline |
847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
ght, int offset, int w, int h) | 847 void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int wei
ght, int offset, int w, int h) |
848 { | 848 { |
849 int y, aligned; | 849 int y, aligned; |
850 vec_u8 vblock; | 850 vec_u8 vblock; |
851 vec_s16 vtemp, vweight, voffset, v0, v1; | 851 vec_s16 vtemp, vweight, voffset, v0, v1; |
852 vec_u16 vlog2_denom; | 852 vec_u16 vlog2_denom; |
853 DECLARE_ALIGNED_16(int32_t, temp)[4]; | 853 DECLARE_ALIGNED(16, int32_t, temp)[4]; |
854 LOAD_ZERO; | 854 LOAD_ZERO; |
855 | 855 |
856 offset <<= log2_denom; | 856 offset <<= log2_denom; |
857 if(log2_denom) offset += 1<<(log2_denom-1); | 857 if(log2_denom) offset += 1<<(log2_denom-1); |
858 temp[0] = log2_denom; | 858 temp[0] = log2_denom; |
859 temp[1] = weight; | 859 temp[1] = weight; |
860 temp[2] = offset; | 860 temp[2] = offset; |
861 | 861 |
862 vtemp = (vec_s16)vec_ld(0, temp); | 862 vtemp = (vec_s16)vec_ld(0, temp); |
863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); | 863 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); |
(...skipping 25 matching lines...) Expand all Loading... |
889 } | 889 } |
890 | 890 |
891 static av_always_inline | 891 static av_always_inline |
892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
denom, | 892 void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_
denom, |
893 int weightd, int weights, int offset, int w, int
h) | 893 int weightd, int weights, int offset, int w, int
h) |
894 { | 894 { |
895 int y, dst_aligned, src_aligned; | 895 int y, dst_aligned, src_aligned; |
896 vec_u8 vsrc, vdst; | 896 vec_u8 vsrc, vdst; |
897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; | 897 vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3; |
898 vec_u16 vlog2_denom; | 898 vec_u16 vlog2_denom; |
899 DECLARE_ALIGNED_16(int32_t, temp)[4]; | 899 DECLARE_ALIGNED(16, int32_t, temp)[4]; |
900 LOAD_ZERO; | 900 LOAD_ZERO; |
901 | 901 |
902 offset = ((offset + 1) | 1) << log2_denom; | 902 offset = ((offset + 1) | 1) << log2_denom; |
903 temp[0] = log2_denom+1; | 903 temp[0] = log2_denom+1; |
904 temp[1] = weights; | 904 temp[1] = weights; |
905 temp[2] = weightd; | 905 temp[2] = weightd; |
906 temp[3] = offset; | 906 temp[3] = offset; |
907 | 907 |
908 vtemp = (vec_s16)vec_ld(0, temp); | 908 vtemp = (vec_s16)vec_ld(0, temp); |
909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); | 909 vlog2_denom = (vec_u16)vec_splat(vtemp, 1); |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; | 1012 c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec; |
1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; | 1013 c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec; |
1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec; | 1014 c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec; |
1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; | 1015 c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec; |
1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; | 1016 c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec; |
1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; | 1017 c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec; |
1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; | 1018 c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec; |
1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; | 1019 c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec; |
1020 } | 1020 } |
1021 } | 1021 } |
OLD | NEW |