Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(24)

Side by Side Diff: source/row_neon64.cc

Issue 1345643002: neon64 use yuvconstants (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ 122 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \
123 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ 123 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \
124 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ 124 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \
125 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ 125 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \
126 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ 126 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \
127 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ 127 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \
128 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ 128 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \
129 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ 129 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \
130 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ 130 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \
131 131
132 // YUV to RGB conversion constants. 132 // BT.601 YUV to RGB reference
133 // R = (Y - 16) * 1.164 - V * -1.596
134 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813
135 // B = (Y - 16) * 1.164 - U * -2.018
136
133 // Y contribution to R,G,B. Scale and bias. 137 // Y contribution to R,G,B. Scale and bias.
138 // TODO(fbarchard): Consider moving constants into a common header.
134 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ 139 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
135 #define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ 140 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
136 141
137 // U and V contributions to R,G,B. 142 // U and V contributions to R,G,B.
138 #define UB -128 /* -min(128, round(2.018 * 64)) */ 143 #define UB -128 /* max(-128, round(-2.018 * 64)) */
139 #define UG 25 /* -round(-0.391 * 64) */ 144 #define UG 25 /* round(0.391 * 64) */
140 #define VG 52 /* -round(-0.813 * 64) */ 145 #define VG 52 /* round(0.813 * 64) */
141 #define VR -102 /* -round(1.596 * 64) */ 146 #define VR -102 /* round(-1.596 * 64) */
142 147
143 // Bias values to subtract 16 from Y and 128 from U and V. 148 // Bias values to subtract 16 from Y and 128 from U and V.
144 #define BB (UB * 128 - YGB) 149 #define BB (UB * 128 + YGB)
145 #define BG (UG * 128 + VG * 128 - YGB) 150 #define BG (UG * 128 + VG * 128 + YGB)
146 #define BR (VR * 128 - YGB) 151 #define BR (VR * 128 + YGB)
147 152
148 static vec16 kUVBiasBGR = { BB, BG, BR, 0, 0, 0, 0, 0 }; 153 YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = {
149 static vec32 kYToRgb = { 0x0101 * YG, 0, 0, 0 }; 154 { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
155 { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
156 { BB, BG, BR, 0, 0, 0, 0, 0 },
157 { 0x0101 * YG, 0, 0, 0 }
158 };
150 159
151 #undef YG 160 #undef YG
152 #undef YGB 161 #undef YGB
153 #undef UB 162 #undef UB
154 #undef UG 163 #undef UG
155 #undef VG 164 #undef VG
156 #undef VR 165 #undef VR
157 #undef BB 166 #undef BB
158 #undef BG 167 #undef BG
159 #undef BR 168 #undef BR
160 169
170 // TODO(fbarchard): Use structure for constants like 32 bit code.
161 #define RGBTOUV_SETUP_REG \ 171 #define RGBTOUV_SETUP_REG \
162 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ 172 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
163 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ 173 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
164 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ 174 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
165 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ 175 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
166 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ 176 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
167 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ 177 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
168 178
169
170 #ifdef HAS_I444TOARGBROW_NEON 179 #ifdef HAS_I444TOARGBROW_NEON
171 void I444ToARGBRow_NEON(const uint8* src_y, 180 void I444ToARGBRow_NEON(const uint8* src_y,
172 const uint8* src_u, 181 const uint8* src_u,
173 const uint8* src_v, 182 const uint8* src_v,
174 uint8* dst_argb, 183 uint8* dst_argb,
175 int width) { 184 int width) {
176 asm volatile ( 185 asm volatile (
177 YUV422TORGB_SETUP_REG 186 YUV422TORGB_SETUP_REG
178 "1: \n" 187 "1: \n"
179 READYUV444 188 READYUV444
180 YUV422TORGB(v22, v21, v20) 189 YUV422TORGB(v22, v21, v20)
181 "subs %w4, %w4, #8 \n" 190 "subs %w4, %w4, #8 \n"
182 "movi v23.8b, #255 \n" /* A */ 191 "movi v23.8b, #255 \n" /* A */
183 MEMACCESS(3) 192 MEMACCESS(3)
184 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 193 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
185 "b.gt 1b \n" 194 "b.gt 1b \n"
186 : "+r"(src_y), // %0 195 : "+r"(src_y), // %0
187 "+r"(src_u), // %1 196 "+r"(src_u), // %1
188 "+r"(src_v), // %2 197 "+r"(src_v), // %2
189 "+r"(dst_argb), // %3 198 "+r"(dst_argb), // %3
190 "+r"(width) // %4 199 "+r"(width) // %4
191 : [kUVBiasBGR]"r"(&kUVBiasBGR), 200 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
192 [kYToRgb]"r"(&kYToRgb) 201 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
193 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 202 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
194 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 203 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
195 ); 204 );
196 } 205 }
197 #endif // HAS_I444TOARGBROW_NEON 206 #endif // HAS_I444TOARGBROW_NEON
198 207
199 #ifdef HAS_I422TOARGBROW_NEON 208 #ifdef HAS_I422TOARGBROW_NEON
200 void I422ToARGBRow_NEON(const uint8* src_y, 209 void I422ToARGBRow_NEON(const uint8* src_y,
201 const uint8* src_u, 210 const uint8* src_u,
202 const uint8* src_v, 211 const uint8* src_v,
203 uint8* dst_argb, 212 uint8* dst_argb,
204 int width) { 213 int width) {
205 asm volatile ( 214 asm volatile (
206 YUV422TORGB_SETUP_REG 215 YUV422TORGB_SETUP_REG
207 "1: \n" 216 "1: \n"
208 READYUV422 217 READYUV422
209 YUV422TORGB(v22, v21, v20) 218 YUV422TORGB(v22, v21, v20)
210 "subs %w4, %w4, #8 \n" 219 "subs %w4, %w4, #8 \n"
211 "movi v23.8b, #255 \n" /* A */ 220 "movi v23.8b, #255 \n" /* A */
212 MEMACCESS(3) 221 MEMACCESS(3)
213 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 222 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
214 "b.gt 1b \n" 223 "b.gt 1b \n"
215 : "+r"(src_y), // %0 224 : "+r"(src_y), // %0
216 "+r"(src_u), // %1 225 "+r"(src_u), // %1
217 "+r"(src_v), // %2 226 "+r"(src_v), // %2
218 "+r"(dst_argb), // %3 227 "+r"(dst_argb), // %3
219 "+r"(width) // %4 228 "+r"(width) // %4
220 : [kUVBiasBGR]"r"(&kUVBiasBGR), 229 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
221 [kYToRgb]"r"(&kYToRgb) 230 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
222 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 231 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
223 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 232 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
224 ); 233 );
225 } 234 }
226 #endif // HAS_I422TOARGBROW_NEON 235 #endif // HAS_I422TOARGBROW_NEON
227 236
228 #ifdef HAS_I411TOARGBROW_NEON 237 #ifdef HAS_I411TOARGBROW_NEON
229 void I411ToARGBRow_NEON(const uint8* src_y, 238 void I411ToARGBRow_NEON(const uint8* src_y,
230 const uint8* src_u, 239 const uint8* src_u,
231 const uint8* src_v, 240 const uint8* src_v,
232 uint8* dst_argb, 241 uint8* dst_argb,
233 int width) { 242 int width) {
234 asm volatile ( 243 asm volatile (
235 YUV422TORGB_SETUP_REG 244 YUV422TORGB_SETUP_REG
236 "1: \n" 245 "1: \n"
237 READYUV411 246 READYUV411
238 YUV422TORGB(v22, v21, v20) 247 YUV422TORGB(v22, v21, v20)
239 "subs %w4, %w4, #8 \n" 248 "subs %w4, %w4, #8 \n"
240 "movi v23.8b, #255 \n" /* A */ 249 "movi v23.8b, #255 \n" /* A */
241 MEMACCESS(3) 250 MEMACCESS(3)
242 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 251 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
243 "b.gt 1b \n" 252 "b.gt 1b \n"
244 : "+r"(src_y), // %0 253 : "+r"(src_y), // %0
245 "+r"(src_u), // %1 254 "+r"(src_u), // %1
246 "+r"(src_v), // %2 255 "+r"(src_v), // %2
247 "+r"(dst_argb), // %3 256 "+r"(dst_argb), // %3
248 "+r"(width) // %4 257 "+r"(width) // %4
249 : [kUVBiasBGR]"r"(&kUVBiasBGR), 258 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
250 [kYToRgb]"r"(&kYToRgb) 259 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
251 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 260 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
252 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 261 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
253 ); 262 );
254 } 263 }
255 #endif // HAS_I411TOARGBROW_NEON 264 #endif // HAS_I411TOARGBROW_NEON
256 265
257 #ifdef HAS_I422TOBGRAROW_NEON 266 #ifdef HAS_I422TOBGRAROW_NEON
258 void I422ToBGRARow_NEON(const uint8* src_y, 267 void I422ToBGRARow_NEON(const uint8* src_y,
259 const uint8* src_u, 268 const uint8* src_u,
260 const uint8* src_v, 269 const uint8* src_v,
261 uint8* dst_bgra, 270 uint8* dst_bgra,
262 int width) { 271 int width) {
263 asm volatile ( 272 asm volatile (
264 YUV422TORGB_SETUP_REG 273 YUV422TORGB_SETUP_REG
265 "1: \n" 274 "1: \n"
266 READYUV422 275 READYUV422
267 YUV422TORGB(v21, v22, v23) 276 YUV422TORGB(v21, v22, v23)
268 "subs %w4, %w4, #8 \n" 277 "subs %w4, %w4, #8 \n"
269 "movi v20.8b, #255 \n" /* A */ 278 "movi v20.8b, #255 \n" /* A */
270 MEMACCESS(3) 279 MEMACCESS(3)
271 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 280 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
272 "b.gt 1b \n" 281 "b.gt 1b \n"
273 : "+r"(src_y), // %0 282 : "+r"(src_y), // %0
274 "+r"(src_u), // %1 283 "+r"(src_u), // %1
275 "+r"(src_v), // %2 284 "+r"(src_v), // %2
276 "+r"(dst_bgra), // %3 285 "+r"(dst_bgra), // %3
277 "+r"(width) // %4 286 "+r"(width) // %4
278 : [kUVBiasBGR]"r"(&kUVBiasBGR), 287 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
279 [kYToRgb]"r"(&kYToRgb) 288 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
280 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 289 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
281 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 290 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
282 ); 291 );
283 } 292 }
284 #endif // HAS_I422TOBGRAROW_NEON 293 #endif // HAS_I422TOBGRAROW_NEON
285 294
286 #ifdef HAS_I422TOABGRROW_NEON 295 #ifdef HAS_I422TOABGRROW_NEON
287 void I422ToABGRRow_NEON(const uint8* src_y, 296 void I422ToABGRRow_NEON(const uint8* src_y,
288 const uint8* src_u, 297 const uint8* src_u,
289 const uint8* src_v, 298 const uint8* src_v,
290 uint8* dst_abgr, 299 uint8* dst_abgr,
291 int width) { 300 int width) {
292 asm volatile ( 301 asm volatile (
293 YUV422TORGB_SETUP_REG 302 YUV422TORGB_SETUP_REG
294 "1: \n" 303 "1: \n"
295 READYUV422 304 READYUV422
296 YUV422TORGB(v20, v21, v22) 305 YUV422TORGB(v20, v21, v22)
297 "subs %w4, %w4, #8 \n" 306 "subs %w4, %w4, #8 \n"
298 "movi v23.8b, #255 \n" /* A */ 307 "movi v23.8b, #255 \n" /* A */
299 MEMACCESS(3) 308 MEMACCESS(3)
300 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 309 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
301 "b.gt 1b \n" 310 "b.gt 1b \n"
302 : "+r"(src_y), // %0 311 : "+r"(src_y), // %0
303 "+r"(src_u), // %1 312 "+r"(src_u), // %1
304 "+r"(src_v), // %2 313 "+r"(src_v), // %2
305 "+r"(dst_abgr), // %3 314 "+r"(dst_abgr), // %3
306 "+r"(width) // %4 315 "+r"(width) // %4
307 : [kUVBiasBGR]"r"(&kUVBiasBGR), 316 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
308 [kYToRgb]"r"(&kYToRgb) 317 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
309 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 318 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
310 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 319 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
311 ); 320 );
312 } 321 }
313 #endif // HAS_I422TOABGRROW_NEON 322 #endif // HAS_I422TOABGRROW_NEON
314 323
315 #ifdef HAS_I422TORGBAROW_NEON 324 #ifdef HAS_I422TORGBAROW_NEON
316 void I422ToRGBARow_NEON(const uint8* src_y, 325 void I422ToRGBARow_NEON(const uint8* src_y,
317 const uint8* src_u, 326 const uint8* src_u,
318 const uint8* src_v, 327 const uint8* src_v,
319 uint8* dst_rgba, 328 uint8* dst_rgba,
320 int width) { 329 int width) {
321 asm volatile ( 330 asm volatile (
322 YUV422TORGB_SETUP_REG 331 YUV422TORGB_SETUP_REG
323 "1: \n" 332 "1: \n"
324 READYUV422 333 READYUV422
325 YUV422TORGB(v23, v22, v21) 334 YUV422TORGB(v23, v22, v21)
326 "subs %w4, %w4, #8 \n" 335 "subs %w4, %w4, #8 \n"
327 "movi v20.8b, #255 \n" /* A */ 336 "movi v20.8b, #255 \n" /* A */
328 MEMACCESS(3) 337 MEMACCESS(3)
329 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 338 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
330 "b.gt 1b \n" 339 "b.gt 1b \n"
331 : "+r"(src_y), // %0 340 : "+r"(src_y), // %0
332 "+r"(src_u), // %1 341 "+r"(src_u), // %1
333 "+r"(src_v), // %2 342 "+r"(src_v), // %2
334 "+r"(dst_rgba), // %3 343 "+r"(dst_rgba), // %3
335 "+r"(width) // %4 344 "+r"(width) // %4
336 : [kUVBiasBGR]"r"(&kUVBiasBGR), 345 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
337 [kYToRgb]"r"(&kYToRgb) 346 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
338 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 347 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
339 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 348 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
340 ); 349 );
341 } 350 }
342 #endif // HAS_I422TORGBAROW_NEON 351 #endif // HAS_I422TORGBAROW_NEON
343 352
344 #ifdef HAS_I422TORGB24ROW_NEON 353 #ifdef HAS_I422TORGB24ROW_NEON
345 void I422ToRGB24Row_NEON(const uint8* src_y, 354 void I422ToRGB24Row_NEON(const uint8* src_y,
346 const uint8* src_u, 355 const uint8* src_u,
347 const uint8* src_v, 356 const uint8* src_v,
348 uint8* dst_rgb24, 357 uint8* dst_rgb24,
349 int width) { 358 int width) {
350 asm volatile ( 359 asm volatile (
351 YUV422TORGB_SETUP_REG 360 YUV422TORGB_SETUP_REG
352 "1: \n" 361 "1: \n"
353 READYUV422 362 READYUV422
354 YUV422TORGB(v22, v21, v20) 363 YUV422TORGB(v22, v21, v20)
355 "subs %w4, %w4, #8 \n" 364 "subs %w4, %w4, #8 \n"
356 MEMACCESS(3) 365 MEMACCESS(3)
357 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" 366 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
358 "b.gt 1b \n" 367 "b.gt 1b \n"
359 : "+r"(src_y), // %0 368 : "+r"(src_y), // %0
360 "+r"(src_u), // %1 369 "+r"(src_u), // %1
361 "+r"(src_v), // %2 370 "+r"(src_v), // %2
362 "+r"(dst_rgb24), // %3 371 "+r"(dst_rgb24), // %3
363 "+r"(width) // %4 372 "+r"(width) // %4
364 : [kUVBiasBGR]"r"(&kUVBiasBGR), 373 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
365 [kYToRgb]"r"(&kYToRgb) 374 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
366 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 375 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
367 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 376 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
368 ); 377 );
369 } 378 }
370 #endif // HAS_I422TORGB24ROW_NEON 379 #endif // HAS_I422TORGB24ROW_NEON
371 380
372 #ifdef HAS_I422TORAWROW_NEON 381 #ifdef HAS_I422TORAWROW_NEON
373 void I422ToRAWRow_NEON(const uint8* src_y, 382 void I422ToRAWRow_NEON(const uint8* src_y,
374 const uint8* src_u, 383 const uint8* src_u,
375 const uint8* src_v, 384 const uint8* src_v,
376 uint8* dst_raw, 385 uint8* dst_raw,
377 int width) { 386 int width) {
378 asm volatile ( 387 asm volatile (
379 YUV422TORGB_SETUP_REG 388 YUV422TORGB_SETUP_REG
380 "1: \n" 389 "1: \n"
381 READYUV422 390 READYUV422
382 YUV422TORGB(v20, v21, v22) 391 YUV422TORGB(v20, v21, v22)
383 "subs %w4, %w4, #8 \n" 392 "subs %w4, %w4, #8 \n"
384 MEMACCESS(3) 393 MEMACCESS(3)
385 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" 394 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
386 "b.gt 1b \n" 395 "b.gt 1b \n"
387 : "+r"(src_y), // %0 396 : "+r"(src_y), // %0
388 "+r"(src_u), // %1 397 "+r"(src_u), // %1
389 "+r"(src_v), // %2 398 "+r"(src_v), // %2
390 "+r"(dst_raw), // %3 399 "+r"(dst_raw), // %3
391 "+r"(width) // %4 400 "+r"(width) // %4
392 : [kUVBiasBGR]"r"(&kUVBiasBGR), 401 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
393 [kYToRgb]"r"(&kYToRgb) 402 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
394 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 403 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
395 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 404 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
396 ); 405 );
397 } 406 }
398 #endif // HAS_I422TORAWROW_NEON 407 #endif // HAS_I422TORAWROW_NEON
399 408
400 #define ARGBTORGB565 \ 409 #define ARGBTORGB565 \
401 "shll v0.8h, v22.8b, #8 \n" /* R */ \ 410 "shll v0.8h, v22.8b, #8 \n" /* R */ \
402 "shll v20.8h, v20.8b, #8 \n" /* B */ \ 411 "shll v20.8h, v20.8b, #8 \n" /* B */ \
403 "shll v21.8h, v21.8b, #8 \n" /* G */ \ 412 "shll v21.8h, v21.8b, #8 \n" /* G */ \
(...skipping 14 matching lines...) Expand all
418 "subs %w4, %w4, #8 \n" 427 "subs %w4, %w4, #8 \n"
419 ARGBTORGB565 428 ARGBTORGB565
420 MEMACCESS(3) 429 MEMACCESS(3)
421 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 430 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
422 "b.gt 1b \n" 431 "b.gt 1b \n"
423 : "+r"(src_y), // %0 432 : "+r"(src_y), // %0
424 "+r"(src_u), // %1 433 "+r"(src_u), // %1
425 "+r"(src_v), // %2 434 "+r"(src_v), // %2
426 "+r"(dst_rgb565), // %3 435 "+r"(dst_rgb565), // %3
427 "+r"(width) // %4 436 "+r"(width) // %4
428 : [kUVBiasBGR]"r"(&kUVBiasBGR), 437 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
429 [kYToRgb]"r"(&kYToRgb) 438 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
430 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 439 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
431 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 440 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
432 ); 441 );
433 } 442 }
434 #endif // HAS_I422TORGB565ROW_NEON 443 #endif // HAS_I422TORGB565ROW_NEON
435 444
436 #define ARGBTOARGB1555 \ 445 #define ARGBTOARGB1555 \
437 "shll v0.8h, v23.8b, #8 \n" /* A */ \ 446 "shll v0.8h, v23.8b, #8 \n" /* A */ \
438 "shll v22.8h, v22.8b, #8 \n" /* R */ \ 447 "shll v22.8h, v22.8b, #8 \n" /* R */ \
439 "shll v20.8h, v20.8b, #8 \n" /* B */ \ 448 "shll v20.8h, v20.8b, #8 \n" /* B */ \
(...skipping 17 matching lines...) Expand all
457 "movi v23.8b, #255 \n" 466 "movi v23.8b, #255 \n"
458 ARGBTOARGB1555 467 ARGBTOARGB1555
459 MEMACCESS(3) 468 MEMACCESS(3)
460 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 469 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
461 "b.gt 1b \n" 470 "b.gt 1b \n"
462 : "+r"(src_y), // %0 471 : "+r"(src_y), // %0
463 "+r"(src_u), // %1 472 "+r"(src_u), // %1
464 "+r"(src_v), // %2 473 "+r"(src_v), // %2
465 "+r"(dst_argb1555), // %3 474 "+r"(dst_argb1555), // %3
466 "+r"(width) // %4 475 "+r"(width) // %4
467 : [kUVBiasBGR]"r"(&kUVBiasBGR), 476 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
468 [kYToRgb]"r"(&kYToRgb) 477 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
469 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 478 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
470 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 479 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
471 ); 480 );
472 } 481 }
473 #endif // HAS_I422TOARGB1555ROW_NEON 482 #endif // HAS_I422TOARGB1555ROW_NEON
474 483
475 #define ARGBTOARGB4444 \ 484 #define ARGBTOARGB4444 \
476 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ 485 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
477 "ushr v20.8b, v20.8b, #4 \n" /* B */ \ 486 "ushr v20.8b, v20.8b, #4 \n" /* B */ \
478 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ 487 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \
(...skipping 19 matching lines...) Expand all
498 "movi v23.8b, #255 \n" 507 "movi v23.8b, #255 \n"
499 ARGBTOARGB4444 508 ARGBTOARGB4444
500 MEMACCESS(3) 509 MEMACCESS(3)
501 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. 510 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444.
502 "b.gt 1b \n" 511 "b.gt 1b \n"
503 : "+r"(src_y), // %0 512 : "+r"(src_y), // %0
504 "+r"(src_u), // %1 513 "+r"(src_u), // %1
505 "+r"(src_v), // %2 514 "+r"(src_v), // %2
506 "+r"(dst_argb4444), // %3 515 "+r"(dst_argb4444), // %3
507 "+r"(width) // %4 516 "+r"(width) // %4
508 : [kUVBiasBGR]"r"(&kUVBiasBGR), 517 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
509 [kYToRgb]"r"(&kYToRgb) 518 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
510 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 519 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
511 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 520 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
512 ); 521 );
513 } 522 }
514 #endif // HAS_I422TOARGB4444ROW_NEON 523 #endif // HAS_I422TOARGB4444ROW_NEON
515 524
516 #ifdef HAS_I400TOARGBROW_NEON 525 #ifdef HAS_I400TOARGBROW_NEON
517 void I400ToARGBRow_NEON(const uint8* src_y, 526 void I400ToARGBRow_NEON(const uint8* src_y,
518 uint8* dst_argb, 527 uint8* dst_argb,
519 int width) { 528 int width) {
520 int64 width64 = (int64)(width); 529 int64 width64 = (int64)(width);
521 asm volatile ( 530 asm volatile (
522 YUV422TORGB_SETUP_REG 531 YUV422TORGB_SETUP_REG
523 "1: \n" 532 "1: \n"
524 READYUV400 533 READYUV400
525 YUV422TORGB(v22, v21, v20) 534 YUV422TORGB(v22, v21, v20)
526 "subs %w2, %w2, #8 \n" 535 "subs %w2, %w2, #8 \n"
527 "movi v23.8b, #255 \n" 536 "movi v23.8b, #255 \n"
528 MEMACCESS(1) 537 MEMACCESS(1)
529 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 538 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
530 "b.gt 1b \n" 539 "b.gt 1b \n"
531 : "+r"(src_y), // %0 540 : "+r"(src_y), // %0
532 "+r"(dst_argb), // %1 541 "+r"(dst_argb), // %1
533 "+r"(width64) // %2 542 "+r"(width64) // %2
534 : [kUVBiasBGR]"r"(&kUVBiasBGR), 543 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
535 [kYToRgb]"r"(&kYToRgb) 544 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
536 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 545 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
537 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 546 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
538 ); 547 );
539 } 548 }
540 #endif // HAS_I400TOARGBROW_NEON 549 #endif // HAS_I400TOARGBROW_NEON
541 550
542 #ifdef HAS_J400TOARGBROW_NEON 551 #ifdef HAS_J400TOARGBROW_NEON
543 void J400ToARGBRow_NEON(const uint8* src_y, 552 void J400ToARGBRow_NEON(const uint8* src_y,
544 uint8* dst_argb, 553 uint8* dst_argb,
545 int width) { 554 int width) {
(...skipping 29 matching lines...) Expand all
575 YUV422TORGB(v22, v21, v20) 584 YUV422TORGB(v22, v21, v20)
576 "subs %w3, %w3, #8 \n" 585 "subs %w3, %w3, #8 \n"
577 "movi v23.8b, #255 \n" 586 "movi v23.8b, #255 \n"
578 MEMACCESS(2) 587 MEMACCESS(2)
579 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 588 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
580 "b.gt 1b \n" 589 "b.gt 1b \n"
581 : "+r"(src_y), // %0 590 : "+r"(src_y), // %0
582 "+r"(src_uv), // %1 591 "+r"(src_uv), // %1
583 "+r"(dst_argb), // %2 592 "+r"(dst_argb), // %2
584 "+r"(width) // %3 593 "+r"(width) // %3
585 : [kUVBiasBGR]"r"(&kUVBiasBGR), 594 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
586 [kYToRgb]"r"(&kYToRgb) 595 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
587 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 596 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
588 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 597 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
589 ); 598 );
590 } 599 }
591 #endif // HAS_NV12TOARGBROW_NEON 600 #endif // HAS_NV12TOARGBROW_NEON
592 601
593 #ifdef HAS_NV21TOARGBROW_NEON 602 #ifdef HAS_NV21TOARGBROW_NEON
594 void NV21ToARGBRow_NEON(const uint8* src_y, 603 void NV21ToARGBRow_NEON(const uint8* src_y,
595 const uint8* src_uv, 604 const uint8* src_uv,
596 uint8* dst_argb, 605 uint8* dst_argb,
597 int width) { 606 int width) {
598 asm volatile ( 607 asm volatile (
599 YUV422TORGB_SETUP_REG 608 YUV422TORGB_SETUP_REG
600 "1: \n" 609 "1: \n"
601 READNV21 610 READNV21
602 YUV422TORGB(v22, v21, v20) 611 YUV422TORGB(v22, v21, v20)
603 "subs %w3, %w3, #8 \n" 612 "subs %w3, %w3, #8 \n"
604 "movi v23.8b, #255 \n" 613 "movi v23.8b, #255 \n"
605 MEMACCESS(2) 614 MEMACCESS(2)
606 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 615 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
607 "b.gt 1b \n" 616 "b.gt 1b \n"
608 : "+r"(src_y), // %0 617 : "+r"(src_y), // %0
609 "+r"(src_uv), // %1 618 "+r"(src_uv), // %1
610 "+r"(dst_argb), // %2 619 "+r"(dst_argb), // %2
611 "+r"(width) // %3 620 "+r"(width) // %3
612 : [kUVBiasBGR]"r"(&kUVBiasBGR), 621 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
613 [kYToRgb]"r"(&kYToRgb) 622 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 623 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 624 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
616 ); 625 );
617 } 626 }
618 #endif // HAS_NV21TOARGBROW_NEON 627 #endif // HAS_NV21TOARGBROW_NEON
619 628
620 #ifdef HAS_NV12TORGB565ROW_NEON 629 #ifdef HAS_NV12TORGB565ROW_NEON
621 void NV12ToRGB565Row_NEON(const uint8* src_y, 630 void NV12ToRGB565Row_NEON(const uint8* src_y,
622 const uint8* src_uv, 631 const uint8* src_uv,
623 uint8* dst_rgb565, 632 uint8* dst_rgb565,
624 int width) { 633 int width) {
625 asm volatile ( 634 asm volatile (
626 YUV422TORGB_SETUP_REG 635 YUV422TORGB_SETUP_REG
627 "1: \n" 636 "1: \n"
628 READNV12 637 READNV12
629 YUV422TORGB(v22, v21, v20) 638 YUV422TORGB(v22, v21, v20)
630 "subs %w3, %w3, #8 \n" 639 "subs %w3, %w3, #8 \n"
631 ARGBTORGB565 640 ARGBTORGB565
632 MEMACCESS(2) 641 MEMACCESS(2)
633 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. 642 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
634 "b.gt 1b \n" 643 "b.gt 1b \n"
635 : "+r"(src_y), // %0 644 : "+r"(src_y), // %0
636 "+r"(src_uv), // %1 645 "+r"(src_uv), // %1
637 "+r"(dst_rgb565), // %2 646 "+r"(dst_rgb565), // %2
638 "+r"(width) // %3 647 "+r"(width) // %3
639 : [kUVBiasBGR]"r"(&kUVBiasBGR), 648 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
640 [kYToRgb]"r"(&kYToRgb) 649 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
641 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 650 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
642 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 651 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
643 ); 652 );
644 } 653 }
645 #endif // HAS_NV12TORGB565ROW_NEON 654 #endif // HAS_NV12TORGB565ROW_NEON
646 655
647 #ifdef HAS_NV21TORGB565ROW_NEON 656 #ifdef HAS_NV21TORGB565ROW_NEON
648 void NV21ToRGB565Row_NEON(const uint8* src_y, 657 void NV21ToRGB565Row_NEON(const uint8* src_y,
649 const uint8* src_uv, 658 const uint8* src_uv,
650 uint8* dst_rgb565, 659 uint8* dst_rgb565,
651 int width) { 660 int width) {
652 asm volatile ( 661 asm volatile (
653 YUV422TORGB_SETUP_REG 662 YUV422TORGB_SETUP_REG
654 "1: \n" 663 "1: \n"
655 READNV21 664 READNV21
656 YUV422TORGB(v22, v21, v20) 665 YUV422TORGB(v22, v21, v20)
657 "subs %w3, %w3, #8 \n" 666 "subs %w3, %w3, #8 \n"
658 ARGBTORGB565 667 ARGBTORGB565
659 MEMACCESS(2) 668 MEMACCESS(2)
660 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. 669 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
661 "b.gt 1b \n" 670 "b.gt 1b \n"
662 : "+r"(src_y), // %0 671 : "+r"(src_y), // %0
663 "+r"(src_uv), // %1 672 "+r"(src_uv), // %1
664 "+r"(dst_rgb565), // %2 673 "+r"(dst_rgb565), // %2
665 "+r"(width) // %3 674 "+r"(width) // %3
666 : [kUVBiasBGR]"r"(&kUVBiasBGR), 675 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
667 [kYToRgb]"r"(&kYToRgb) 676 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
668 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 677 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
669 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 678 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
670 ); 679 );
671 } 680 }
672 #endif // HAS_NV21TORGB565ROW_NEON 681 #endif // HAS_NV21TORGB565ROW_NEON
673 682
674 #ifdef HAS_YUY2TOARGBROW_NEON 683 #ifdef HAS_YUY2TOARGBROW_NEON
675 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, 684 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
676 uint8* dst_argb, 685 uint8* dst_argb,
677 int width) { 686 int width) {
678 int64 width64 = (int64)(width); 687 int64 width64 = (int64)(width);
679 asm volatile ( 688 asm volatile (
680 YUV422TORGB_SETUP_REG 689 YUV422TORGB_SETUP_REG
681 "1: \n" 690 "1: \n"
682 READYUY2 691 READYUY2
683 YUV422TORGB(v22, v21, v20) 692 YUV422TORGB(v22, v21, v20)
684 "subs %w2, %w2, #8 \n" 693 "subs %w2, %w2, #8 \n"
685 "movi v23.8b, #255 \n" 694 "movi v23.8b, #255 \n"
686 MEMACCESS(1) 695 MEMACCESS(1)
687 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 696 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
688 "b.gt 1b \n" 697 "b.gt 1b \n"
689 : "+r"(src_yuy2), // %0 698 : "+r"(src_yuy2), // %0
690 "+r"(dst_argb), // %1 699 "+r"(dst_argb), // %1
691 "+r"(width64) // %2 700 "+r"(width64) // %2
692 : [kUVBiasBGR]"r"(&kUVBiasBGR), 701 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
693 [kYToRgb]"r"(&kYToRgb) 702 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
694 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 703 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
695 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 704 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
696 ); 705 );
697 } 706 }
698 #endif // HAS_YUY2TOARGBROW_NEON 707 #endif // HAS_YUY2TOARGBROW_NEON
699 708
700 #ifdef HAS_UYVYTOARGBROW_NEON 709 #ifdef HAS_UYVYTOARGBROW_NEON
701 void UYVYToARGBRow_NEON(const uint8* src_uyvy, 710 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
702 uint8* dst_argb, 711 uint8* dst_argb,
703 int width) { 712 int width) {
704 int64 width64 = (int64)(width); 713 int64 width64 = (int64)(width);
705 asm volatile ( 714 asm volatile (
706 YUV422TORGB_SETUP_REG 715 YUV422TORGB_SETUP_REG
707 "1: \n" 716 "1: \n"
708 READUYVY 717 READUYVY
709 YUV422TORGB(v22, v21, v20) 718 YUV422TORGB(v22, v21, v20)
710 "subs %w2, %w2, #8 \n" 719 "subs %w2, %w2, #8 \n"
711 "movi v23.8b, #255 \n" 720 "movi v23.8b, #255 \n"
712 MEMACCESS(1) 721 MEMACCESS(1)
713 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" 722 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
714 "b.gt 1b \n" 723 "b.gt 1b \n"
715 : "+r"(src_uyvy), // %0 724 : "+r"(src_uyvy), // %0
716 "+r"(dst_argb), // %1 725 "+r"(dst_argb), // %1
717 "+r"(width64) // %2 726 "+r"(width64) // %2
718 : [kUVBiasBGR]"r"(&kUVBiasBGR), 727 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
719 [kYToRgb]"r"(&kYToRgb) 728 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
720 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 729 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
721 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 730 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
722 ); 731 );
723 } 732 }
724 #endif // HAS_UYVYTOARGBROW_NEON 733 #endif // HAS_UYVYTOARGBROW_NEON
725 734
726 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 735 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
727 #ifdef HAS_SPLITUVROW_NEON 736 #ifdef HAS_SPLITUVROW_NEON
728 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 737 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
729 int width) { 738 int width) {
(...skipping 2348 matching lines...) Expand 10 before | Expand all | Expand 10 after
3078 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 3087 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
3079 ); 3088 );
3080 } 3089 }
3081 #endif // HAS_SOBELYROW_NEON 3090 #endif // HAS_SOBELYROW_NEON
3082 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 3091 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
3083 3092
3084 #ifdef __cplusplus 3093 #ifdef __cplusplus
3085 } // extern "C" 3094 } // extern "C"
3086 } // namespace libyuv 3095 } // namespace libyuv
3087 #endif 3096 #endif
OLDNEW
« no previous file with comments | « source/row_neon.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698