OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
120 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ | 120 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ |
121 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ | 121 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ |
122 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ | 122 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ |
123 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ | 123 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ |
124 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ | 124 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ |
125 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ | 125 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ |
126 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ | 126 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ |
127 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ | 127 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ |
128 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ | 128 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ |
129 | 129 |
130 // TODO(fbarchard): Use structure for constants like 32 bit code. | |
131 #define RGBTOUV_SETUP_REG \ | |
132 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ | |
133 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ | |
134 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ | |
135 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ | |
136 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ | |
137 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ | |
138 | |
139 #ifdef HAS_I444TOARGBROW_NEON | 130 #ifdef HAS_I444TOARGBROW_NEON |
140 void I444ToARGBRow_NEON(const uint8* src_y, | 131 void I444ToARGBRow_NEON(const uint8* src_y, |
141 const uint8* src_u, | 132 const uint8* src_u, |
142 const uint8* src_v, | 133 const uint8* src_v, |
143 uint8* dst_argb, | 134 uint8* dst_argb, |
144 const struct YuvConstants* yuvconstants, | 135 const struct YuvConstants* yuvconstants, |
145 int width) { | 136 int width) { |
146 asm volatile ( | 137 asm volatile ( |
147 YUVTORGB_SETUP | 138 YUVTORGB_SETUP |
| 139 "movi v23.8b, #255 \n" /* A */ |
148 "1: \n" | 140 "1: \n" |
149 READYUV444 | 141 READYUV444 |
150 YUVTORGB(v22, v21, v20) | 142 YUVTORGB(v22, v21, v20) |
151 "subs %w4, %w4, #8 \n" | 143 "subs %w4, %w4, #8 \n" |
152 "movi v23.8b, #255 \n" /* A */ | |
153 MEMACCESS(3) | 144 MEMACCESS(3) |
154 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 145 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
155 "b.gt 1b \n" | 146 "b.gt 1b \n" |
156 : "+r"(src_y), // %0 | 147 : "+r"(src_y), // %0 |
157 "+r"(src_u), // %1 | 148 "+r"(src_u), // %1 |
158 "+r"(src_v), // %2 | 149 "+r"(src_v), // %2 |
159 "+r"(dst_argb), // %3 | 150 "+r"(dst_argb), // %3 |
160 "+r"(width) // %4 | 151 "+r"(width) // %4 |
161 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 152 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
162 [kUVToG]"r"(&yuvconstants->kUVToG), | 153 [kUVToG]"r"(&yuvconstants->kUVToG), |
163 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 154 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
164 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 155 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
165 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 156 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
166 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 157 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
167 ); | 158 ); |
168 } | 159 } |
169 #endif // HAS_I444TOARGBROW_NEON | 160 #endif // HAS_I444TOARGBROW_NEON |
170 | 161 |
171 // TODO(fbarchard): Switch to Matrix version of this function. | |
172 #ifdef HAS_I422TOARGBROW_NEON | 162 #ifdef HAS_I422TOARGBROW_NEON |
173 void I422ToARGBRow_NEON(const uint8* src_y, | 163 void I422ToARGBRow_NEON(const uint8* src_y, |
174 const uint8* src_u, | 164 const uint8* src_u, |
175 const uint8* src_v, | 165 const uint8* src_v, |
176 uint8* dst_argb, | 166 uint8* dst_argb, |
177 const struct YuvConstants* yuvconstants, | 167 const struct YuvConstants* yuvconstants, |
178 int width) { | 168 int width) { |
179 asm volatile ( | 169 asm volatile ( |
180 YUVTORGB_SETUP | 170 YUVTORGB_SETUP |
| 171 "movi v23.8b, #255 \n" /* A */ |
181 "1: \n" | 172 "1: \n" |
182 READYUV422 | 173 READYUV422 |
183 YUVTORGB(v22, v21, v20) | 174 YUVTORGB(v22, v21, v20) |
184 "subs %w4, %w4, #8 \n" | 175 "subs %w4, %w4, #8 \n" |
185 "movi v23.8b, #255 \n" /* A */ | |
186 MEMACCESS(3) | 176 MEMACCESS(3) |
187 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 177 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
188 "b.gt 1b \n" | 178 "b.gt 1b \n" |
189 : "+r"(src_y), // %0 | 179 : "+r"(src_y), // %0 |
190 "+r"(src_u), // %1 | 180 "+r"(src_u), // %1 |
191 "+r"(src_v), // %2 | 181 "+r"(src_v), // %2 |
192 "+r"(dst_argb), // %3 | 182 "+r"(dst_argb), // %3 |
193 "+r"(width) // %4 | 183 "+r"(width) // %4 |
194 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 184 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
195 [kUVToG]"r"(&yuvconstants->kUVToG), | 185 [kUVToG]"r"(&yuvconstants->kUVToG), |
196 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 186 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
197 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 187 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
198 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 188 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
199 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 189 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
200 ); | 190 ); |
201 } | 191 } |
202 #endif // HAS_I422TOARGBROW_NEON | 192 #endif // HAS_I422TOARGBROW_NEON |
203 | 193 |
| 194 #ifdef HAS_I422ALPHATOARGBROW_NEON |
| 195 void I422AlphaToARGBRow_NEON(const uint8* src_y, |
| 196 const uint8* src_u, |
| 197 const uint8* src_v, |
| 198 const uint8* src_a, |
| 199 uint8* dst_argb, |
| 200 const struct YuvConstants* yuvconstants, |
| 201 int width) { |
| 202 asm volatile ( |
| 203 YUVTORGB_SETUP |
| 204 "1: \n" |
| 205 READYUV422 |
| 206 YUVTORGB(v22, v21, v20) |
| 207 MEMACCESS(3) |
| 208 "ld1 {v23.8b}, [%3], #8 \n" |
| 209 "subs %w5, %w5, #8 \n" |
| 210 MEMACCESS(4) |
| 211 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n" |
| 212 "b.gt 1b \n" |
| 213 : "+r"(src_y), // %0 |
| 214 "+r"(src_u), // %1 |
| 215 "+r"(src_v), // %2 |
| 216 "+r"(src_a), // %3 |
| 217 "+r"(dst_argb), // %4 |
| 218 "+r"(width) // %5 |
| 219 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 220 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 221 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 222 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 223 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 224 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 225 ); |
| 226 } |
| 227 #endif // HAS_I422ALPHATOARGBROW_NEON |
| 228 |
204 #ifdef HAS_I411TOARGBROW_NEON | 229 #ifdef HAS_I411TOARGBROW_NEON |
205 void I411ToARGBRow_NEON(const uint8* src_y, | 230 void I411ToARGBRow_NEON(const uint8* src_y, |
206 const uint8* src_u, | 231 const uint8* src_u, |
207 const uint8* src_v, | 232 const uint8* src_v, |
208 uint8* dst_argb, | 233 uint8* dst_argb, |
209 const struct YuvConstants* yuvconstants, | 234 const struct YuvConstants* yuvconstants, |
210 int width) { | 235 int width) { |
211 asm volatile ( | 236 asm volatile ( |
212 YUVTORGB_SETUP | 237 YUVTORGB_SETUP |
| 238 "movi v23.8b, #255 \n" /* A */ |
213 "1: \n" | 239 "1: \n" |
214 READYUV411 | 240 READYUV411 |
215 YUVTORGB(v22, v21, v20) | 241 YUVTORGB(v22, v21, v20) |
216 "subs %w4, %w4, #8 \n" | 242 "subs %w4, %w4, #8 \n" |
217 "movi v23.8b, #255 \n" /* A */ | |
218 MEMACCESS(3) | 243 MEMACCESS(3) |
219 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 244 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
220 "b.gt 1b \n" | 245 "b.gt 1b \n" |
221 : "+r"(src_y), // %0 | 246 : "+r"(src_y), // %0 |
222 "+r"(src_u), // %1 | 247 "+r"(src_u), // %1 |
223 "+r"(src_v), // %2 | 248 "+r"(src_v), // %2 |
224 "+r"(dst_argb), // %3 | 249 "+r"(dst_argb), // %3 |
225 "+r"(width) // %4 | 250 "+r"(width) // %4 |
226 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 251 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
227 [kUVToG]"r"(&yuvconstants->kUVToG), | 252 [kUVToG]"r"(&yuvconstants->kUVToG), |
228 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 253 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
229 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 254 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
230 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 255 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
231 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 256 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
232 ); | 257 ); |
233 } | 258 } |
234 #endif // HAS_I411TOARGBROW_NEON | 259 #endif // HAS_I411TOARGBROW_NEON |
235 | 260 |
236 #ifdef HAS_I422TORGBAROW_NEON | 261 #ifdef HAS_I422TORGBAROW_NEON |
237 void I422ToRGBARow_NEON(const uint8* src_y, | 262 void I422ToRGBARow_NEON(const uint8* src_y, |
238 const uint8* src_u, | 263 const uint8* src_u, |
239 const uint8* src_v, | 264 const uint8* src_v, |
240 uint8* dst_rgba, | 265 uint8* dst_rgba, |
241 const struct YuvConstants* yuvconstants, | 266 const struct YuvConstants* yuvconstants, |
242 int width) { | 267 int width) { |
243 asm volatile ( | 268 asm volatile ( |
244 YUVTORGB_SETUP | 269 YUVTORGB_SETUP |
| 270 "movi v20.8b, #255 \n" /* A */ |
245 "1: \n" | 271 "1: \n" |
246 READYUV422 | 272 READYUV422 |
247 YUVTORGB(v23, v22, v21) | 273 YUVTORGB(v23, v22, v21) |
248 "subs %w4, %w4, #8 \n" | 274 "subs %w4, %w4, #8 \n" |
249 "movi v20.8b, #255 \n" /* A */ | |
250 MEMACCESS(3) | 275 MEMACCESS(3) |
251 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 276 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
252 "b.gt 1b \n" | 277 "b.gt 1b \n" |
253 : "+r"(src_y), // %0 | 278 : "+r"(src_y), // %0 |
254 "+r"(src_u), // %1 | 279 "+r"(src_u), // %1 |
255 "+r"(src_v), // %2 | 280 "+r"(src_v), // %2 |
256 "+r"(dst_rgba), // %3 | 281 "+r"(dst_rgba), // %3 |
257 "+r"(width) // %4 | 282 "+r"(width) // %4 |
258 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 283 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
259 [kUVToG]"r"(&yuvconstants->kUVToG), | 284 [kUVToG]"r"(&yuvconstants->kUVToG), |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
346 | 371 |
347 #ifdef HAS_I422TOARGB1555ROW_NEON | 372 #ifdef HAS_I422TOARGB1555ROW_NEON |
348 void I422ToARGB1555Row_NEON(const uint8* src_y, | 373 void I422ToARGB1555Row_NEON(const uint8* src_y, |
349 const uint8* src_u, | 374 const uint8* src_u, |
350 const uint8* src_v, | 375 const uint8* src_v, |
351 uint8* dst_argb1555, | 376 uint8* dst_argb1555, |
352 const struct YuvConstants* yuvconstants, | 377 const struct YuvConstants* yuvconstants, |
353 int width) { | 378 int width) { |
354 asm volatile ( | 379 asm volatile ( |
355 YUVTORGB_SETUP | 380 YUVTORGB_SETUP |
| 381 "movi v23.8b, #255 \n" |
356 "1: \n" | 382 "1: \n" |
357 READYUV422 | 383 READYUV422 |
358 YUVTORGB(v22, v21, v20) | 384 YUVTORGB(v22, v21, v20) |
359 "subs %w4, %w4, #8 \n" | 385 "subs %w4, %w4, #8 \n" |
360 "movi v23.8b, #255 \n" | |
361 ARGBTOARGB1555 | 386 ARGBTOARGB1555 |
362 MEMACCESS(3) | 387 MEMACCESS(3) |
363 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. | 388 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. |
364 "b.gt 1b \n" | 389 "b.gt 1b \n" |
365 : "+r"(src_y), // %0 | 390 : "+r"(src_y), // %0 |
366 "+r"(src_u), // %1 | 391 "+r"(src_u), // %1 |
367 "+r"(src_v), // %2 | 392 "+r"(src_v), // %2 |
368 "+r"(dst_argb1555), // %3 | 393 "+r"(dst_argb1555), // %3 |
369 "+r"(width) // %4 | 394 "+r"(width) // %4 |
370 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 395 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
421 } | 446 } |
422 #endif // HAS_I422TOARGB4444ROW_NEON | 447 #endif // HAS_I422TOARGB4444ROW_NEON |
423 | 448 |
424 #ifdef HAS_I400TOARGBROW_NEON | 449 #ifdef HAS_I400TOARGBROW_NEON |
425 void I400ToARGBRow_NEON(const uint8* src_y, | 450 void I400ToARGBRow_NEON(const uint8* src_y, |
426 uint8* dst_argb, | 451 uint8* dst_argb, |
427 int width) { | 452 int width) { |
428 int64 width64 = (int64)(width); | 453 int64 width64 = (int64)(width); |
429 asm volatile ( | 454 asm volatile ( |
430 YUVTORGB_SETUP | 455 YUVTORGB_SETUP |
| 456 "movi v23.8b, #255 \n" |
431 "1: \n" | 457 "1: \n" |
432 READYUV400 | 458 READYUV400 |
433 YUVTORGB(v22, v21, v20) | 459 YUVTORGB(v22, v21, v20) |
434 "subs %w2, %w2, #8 \n" | 460 "subs %w2, %w2, #8 \n" |
435 "movi v23.8b, #255 \n" | |
436 MEMACCESS(1) | 461 MEMACCESS(1) |
437 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 462 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
438 "b.gt 1b \n" | 463 "b.gt 1b \n" |
439 : "+r"(src_y), // %0 | 464 : "+r"(src_y), // %0 |
440 "+r"(dst_argb), // %1 | 465 "+r"(dst_argb), // %1 |
441 "+r"(width64) // %2 | 466 "+r"(width64) // %2 |
442 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), | 467 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), |
443 [kUVToG]"r"(&kYuvI601Constants.kUVToG), | 468 [kUVToG]"r"(&kYuvI601Constants.kUVToG), |
444 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), | 469 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), |
445 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) | 470 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) |
(...skipping 28 matching lines...) Expand all Loading... |
474 #endif // HAS_J400TOARGBROW_NEON | 499 #endif // HAS_J400TOARGBROW_NEON |
475 | 500 |
476 #ifdef HAS_NV12TOARGBROW_NEON | 501 #ifdef HAS_NV12TOARGBROW_NEON |
477 void NV12ToARGBRow_NEON(const uint8* src_y, | 502 void NV12ToARGBRow_NEON(const uint8* src_y, |
478 const uint8* src_uv, | 503 const uint8* src_uv, |
479 uint8* dst_argb, | 504 uint8* dst_argb, |
480 const struct YuvConstants* yuvconstants, | 505 const struct YuvConstants* yuvconstants, |
481 int width) { | 506 int width) { |
482 asm volatile ( | 507 asm volatile ( |
483 YUVTORGB_SETUP | 508 YUVTORGB_SETUP |
| 509 "movi v23.8b, #255 \n" |
484 "1: \n" | 510 "1: \n" |
485 READNV12 | 511 READNV12 |
486 YUVTORGB(v22, v21, v20) | 512 YUVTORGB(v22, v21, v20) |
487 "subs %w3, %w3, #8 \n" | 513 "subs %w3, %w3, #8 \n" |
488 "movi v23.8b, #255 \n" | |
489 MEMACCESS(2) | 514 MEMACCESS(2) |
490 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 515 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
491 "b.gt 1b \n" | 516 "b.gt 1b \n" |
492 : "+r"(src_y), // %0 | 517 : "+r"(src_y), // %0 |
493 "+r"(src_uv), // %1 | 518 "+r"(src_uv), // %1 |
494 "+r"(dst_argb), // %2 | 519 "+r"(dst_argb), // %2 |
495 "+r"(width) // %3 | 520 "+r"(width) // %3 |
496 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 521 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
497 [kUVToG]"r"(&yuvconstants->kUVToG), | 522 [kUVToG]"r"(&yuvconstants->kUVToG), |
498 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 523 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
499 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 524 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
500 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 525 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
501 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 526 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
502 ); | 527 ); |
503 } | 528 } |
504 #endif // HAS_NV12TOARGBROW_NEON | 529 #endif // HAS_NV12TOARGBROW_NEON |
505 | 530 |
506 #ifdef HAS_NV12TOARGBROW_NEON | 531 #ifdef HAS_NV12TOARGBROW_NEON |
507 void NV21ToARGBRow_NEON(const uint8* src_y, | 532 void NV21ToARGBRow_NEON(const uint8* src_y, |
508 const uint8* src_vu, | 533 const uint8* src_vu, |
509 uint8* dst_argb, | 534 uint8* dst_argb, |
510 const struct YuvConstants* yuvconstants, | 535 const struct YuvConstants* yuvconstants, |
511 int width) { | 536 int width) { |
512 asm volatile ( | 537 asm volatile ( |
513 YUVTORGB_SETUP | 538 YUVTORGB_SETUP |
| 539 "movi v23.8b, #255 \n" |
514 "1: \n" | 540 "1: \n" |
515 READNV21 | 541 READNV21 |
516 YUVTORGB(v22, v21, v20) | 542 YUVTORGB(v22, v21, v20) |
517 "subs %w3, %w3, #8 \n" | 543 "subs %w3, %w3, #8 \n" |
518 "movi v23.8b, #255 \n" | |
519 MEMACCESS(2) | 544 MEMACCESS(2) |
520 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 545 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
521 "b.gt 1b \n" | 546 "b.gt 1b \n" |
522 : "+r"(src_y), // %0 | 547 : "+r"(src_y), // %0 |
523 "+r"(src_vu), // %1 | 548 "+r"(src_vu), // %1 |
524 "+r"(dst_argb), // %2 | 549 "+r"(dst_argb), // %2 |
525 "+r"(width) // %3 | 550 "+r"(width) // %3 |
526 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 551 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
527 [kUVToG]"r"(&yuvconstants->kUVToG), | 552 [kUVToG]"r"(&yuvconstants->kUVToG), |
528 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 553 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
564 #endif // HAS_NV12TORGB565ROW_NEON | 589 #endif // HAS_NV12TORGB565ROW_NEON |
565 | 590 |
566 #ifdef HAS_YUY2TOARGBROW_NEON | 591 #ifdef HAS_YUY2TOARGBROW_NEON |
567 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, | 592 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
568 uint8* dst_argb, | 593 uint8* dst_argb, |
569 const struct YuvConstants* yuvconstants, | 594 const struct YuvConstants* yuvconstants, |
570 int width) { | 595 int width) { |
571 int64 width64 = (int64)(width); | 596 int64 width64 = (int64)(width); |
572 asm volatile ( | 597 asm volatile ( |
573 YUVTORGB_SETUP | 598 YUVTORGB_SETUP |
| 599 "movi v23.8b, #255 \n" |
574 "1: \n" | 600 "1: \n" |
575 READYUY2 | 601 READYUY2 |
576 YUVTORGB(v22, v21, v20) | 602 YUVTORGB(v22, v21, v20) |
577 "subs %w2, %w2, #8 \n" | 603 "subs %w2, %w2, #8 \n" |
578 "movi v23.8b, #255 \n" | |
579 MEMACCESS(1) | 604 MEMACCESS(1) |
580 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 605 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
581 "b.gt 1b \n" | 606 "b.gt 1b \n" |
582 : "+r"(src_yuy2), // %0 | 607 : "+r"(src_yuy2), // %0 |
583 "+r"(dst_argb), // %1 | 608 "+r"(dst_argb), // %1 |
584 "+r"(width64) // %2 | 609 "+r"(width64) // %2 |
585 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 610 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
586 [kUVToG]"r"(&yuvconstants->kUVToG), | 611 [kUVToG]"r"(&yuvconstants->kUVToG), |
587 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 612 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
588 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 613 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
589 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
590 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
591 ); | 616 ); |
592 } | 617 } |
593 #endif // HAS_YUY2TOARGBROW_NEON | 618 #endif // HAS_YUY2TOARGBROW_NEON |
594 | 619 |
595 #ifdef HAS_UYVYTOARGBROW_NEON | 620 #ifdef HAS_UYVYTOARGBROW_NEON |
596 void UYVYToARGBRow_NEON(const uint8* src_uyvy, | 621 void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
597 uint8* dst_argb, | 622 uint8* dst_argb, |
598 const struct YuvConstants* yuvconstants, | 623 const struct YuvConstants* yuvconstants, |
599 int width) { | 624 int width) { |
600 int64 width64 = (int64)(width); | 625 int64 width64 = (int64)(width); |
601 asm volatile ( | 626 asm volatile ( |
602 YUVTORGB_SETUP | 627 YUVTORGB_SETUP |
| 628 "movi v23.8b, #255 \n" |
603 "1: \n" | 629 "1: \n" |
604 READUYVY | 630 READUYVY |
605 YUVTORGB(v22, v21, v20) | 631 YUVTORGB(v22, v21, v20) |
606 "subs %w2, %w2, #8 \n" | 632 "subs %w2, %w2, #8 \n" |
607 "movi v23.8b, #255 \n" | |
608 MEMACCESS(1) | 633 MEMACCESS(1) |
609 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" | 634 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" |
610 "b.gt 1b \n" | 635 "b.gt 1b \n" |
611 : "+r"(src_uyvy), // %0 | 636 : "+r"(src_uyvy), // %0 |
612 "+r"(dst_argb), // %1 | 637 "+r"(dst_argb), // %1 |
613 "+r"(width64) // %2 | 638 "+r"(width64) // %2 |
614 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 639 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
615 [kUVToG]"r"(&yuvconstants->kUVToG), | 640 [kUVToG]"r"(&yuvconstants->kUVToG), |
616 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 641 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
617 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 642 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
(...skipping 819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1437 "+r"(dst_u), // %1 | 1462 "+r"(dst_u), // %1 |
1438 "+r"(dst_v), // %2 | 1463 "+r"(dst_v), // %2 |
1439 "+r"(width) // %3 | 1464 "+r"(width) // %3 |
1440 : | 1465 : |
1441 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", | 1466 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", |
1442 "v24", "v25", "v26", "v27", "v28", "v29" | 1467 "v24", "v25", "v26", "v27", "v28", "v29" |
1443 ); | 1468 ); |
1444 } | 1469 } |
1445 #endif // HAS_ARGBTOUV444ROW_NEON | 1470 #endif // HAS_ARGBTOUV444ROW_NEON |
1446 | 1471 |
| 1472 #define RGBTOUV_SETUP_REG \ |
| 1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ |
| 1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ |
| 1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ |
| 1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ |
| 1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ |
| 1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ |
| 1479 |
1447 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. | 1480 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. |
1448 #ifdef HAS_ARGBTOUV422ROW_NEON | 1481 #ifdef HAS_ARGBTOUV422ROW_NEON |
1449 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | 1482 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, |
1450 int width) { | 1483 int width) { |
1451 asm volatile ( | 1484 asm volatile ( |
1452 RGBTOUV_SETUP_REG | 1485 RGBTOUV_SETUP_REG |
1453 "1: \n" | 1486 "1: \n" |
1454 MEMACCESS(0) | 1487 MEMACCESS(0) |
1455 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. | 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. |
1456 | 1489 |
(...skipping 1538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2995 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 3028 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
2996 ); | 3029 ); |
2997 } | 3030 } |
2998 #endif // HAS_SOBELYROW_NEON | 3031 #endif // HAS_SOBELYROW_NEON |
2999 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 3032 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
3000 | 3033 |
3001 #ifdef __cplusplus | 3034 #ifdef __cplusplus |
3002 } // extern "C" | 3035 } // extern "C" |
3003 } // namespace libyuv | 3036 } // namespace libyuv |
3004 #endif | 3037 #endif |
OLD | NEW |