| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 120 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ | 120 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ |
| 121 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ | 121 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ |
| 122 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ | 122 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ |
| 123 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ | 123 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ |
| 124 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ | 124 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ |
| 125 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ | 125 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ |
| 126 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ | 126 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ |
| 127 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ | 127 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ |
| 128 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ | 128 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ |
| 129 | 129 |
| 130 // TODO(fbarchard): Use structure for constants like 32 bit code. | |
| 131 #define RGBTOUV_SETUP_REG \ | |
| 132 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ | |
| 133 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ | |
| 134 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ | |
| 135 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ | |
| 136 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ | |
| 137 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ | |
| 138 | |
| 139 #ifdef HAS_I444TOARGBROW_NEON | 130 #ifdef HAS_I444TOARGBROW_NEON |
| 140 void I444ToARGBRow_NEON(const uint8* src_y, | 131 void I444ToARGBRow_NEON(const uint8* src_y, |
| 141 const uint8* src_u, | 132 const uint8* src_u, |
| 142 const uint8* src_v, | 133 const uint8* src_v, |
| 143 uint8* dst_argb, | 134 uint8* dst_argb, |
| 144 const struct YuvConstants* yuvconstants, | 135 const struct YuvConstants* yuvconstants, |
| 145 int width) { | 136 int width) { |
| 146 asm volatile ( | 137 asm volatile ( |
| 147 YUVTORGB_SETUP | 138 YUVTORGB_SETUP |
| 139 "movi v23.8b, #255 \n" /* A */ |
| 148 "1: \n" | 140 "1: \n" |
| 149 READYUV444 | 141 READYUV444 |
| 150 YUVTORGB(v22, v21, v20) | 142 YUVTORGB(v22, v21, v20) |
| 151 "subs %w4, %w4, #8 \n" | 143 "subs %w4, %w4, #8 \n" |
| 152 "movi v23.8b, #255 \n" /* A */ | |
| 153 MEMACCESS(3) | 144 MEMACCESS(3) |
| 154 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 145 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 155 "b.gt 1b \n" | 146 "b.gt 1b \n" |
| 156 : "+r"(src_y), // %0 | 147 : "+r"(src_y), // %0 |
| 157 "+r"(src_u), // %1 | 148 "+r"(src_u), // %1 |
| 158 "+r"(src_v), // %2 | 149 "+r"(src_v), // %2 |
| 159 "+r"(dst_argb), // %3 | 150 "+r"(dst_argb), // %3 |
| 160 "+r"(width) // %4 | 151 "+r"(width) // %4 |
| 161 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 152 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 162 [kUVToG]"r"(&yuvconstants->kUVToG), | 153 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 163 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 154 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 164 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 155 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 165 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 156 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 166 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 157 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 167 ); | 158 ); |
| 168 } | 159 } |
| 169 #endif // HAS_I444TOARGBROW_NEON | 160 #endif // HAS_I444TOARGBROW_NEON |
| 170 | 161 |
| 171 // TODO(fbarchard): Switch to Matrix version of this function. | |
| 172 #ifdef HAS_I422TOARGBROW_NEON | 162 #ifdef HAS_I422TOARGBROW_NEON |
| 173 void I422ToARGBRow_NEON(const uint8* src_y, | 163 void I422ToARGBRow_NEON(const uint8* src_y, |
| 174 const uint8* src_u, | 164 const uint8* src_u, |
| 175 const uint8* src_v, | 165 const uint8* src_v, |
| 176 uint8* dst_argb, | 166 uint8* dst_argb, |
| 177 const struct YuvConstants* yuvconstants, | 167 const struct YuvConstants* yuvconstants, |
| 178 int width) { | 168 int width) { |
| 179 asm volatile ( | 169 asm volatile ( |
| 180 YUVTORGB_SETUP | 170 YUVTORGB_SETUP |
| 171 "movi v23.8b, #255 \n" /* A */ |
| 181 "1: \n" | 172 "1: \n" |
| 182 READYUV422 | 173 READYUV422 |
| 183 YUVTORGB(v22, v21, v20) | 174 YUVTORGB(v22, v21, v20) |
| 184 "subs %w4, %w4, #8 \n" | 175 "subs %w4, %w4, #8 \n" |
| 185 "movi v23.8b, #255 \n" /* A */ | |
| 186 MEMACCESS(3) | 176 MEMACCESS(3) |
| 187 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 177 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 188 "b.gt 1b \n" | 178 "b.gt 1b \n" |
| 189 : "+r"(src_y), // %0 | 179 : "+r"(src_y), // %0 |
| 190 "+r"(src_u), // %1 | 180 "+r"(src_u), // %1 |
| 191 "+r"(src_v), // %2 | 181 "+r"(src_v), // %2 |
| 192 "+r"(dst_argb), // %3 | 182 "+r"(dst_argb), // %3 |
| 193 "+r"(width) // %4 | 183 "+r"(width) // %4 |
| 194 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 184 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 195 [kUVToG]"r"(&yuvconstants->kUVToG), | 185 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 196 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 186 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 197 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 187 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 198 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 188 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 199 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 189 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 200 ); | 190 ); |
| 201 } | 191 } |
| 202 #endif // HAS_I422TOARGBROW_NEON | 192 #endif // HAS_I422TOARGBROW_NEON |
| 203 | 193 |
| 194 #ifdef HAS_I422ALPHATOARGBROW_NEON |
| 195 void I422AlphaToARGBRow_NEON(const uint8* src_y, |
| 196 const uint8* src_u, |
| 197 const uint8* src_v, |
| 198 const uint8* src_a, |
| 199 uint8* dst_argb, |
| 200 const struct YuvConstants* yuvconstants, |
| 201 int width) { |
| 202 asm volatile ( |
| 203 YUVTORGB_SETUP |
| 204 "1: \n" |
| 205 READYUV422 |
| 206 YUVTORGB(v22, v21, v20) |
| 207 MEMACCESS(3) |
| 208 "ld1 {v23.8b}, [%3], #8 \n" |
| 209 "subs %w5, %w5, #8 \n" |
| 210 MEMACCESS(4) |
| 211 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n" |
| 212 "b.gt 1b \n" |
| 213 : "+r"(src_y), // %0 |
| 214 "+r"(src_u), // %1 |
| 215 "+r"(src_v), // %2 |
| 216 "+r"(src_a), // %3 |
| 217 "+r"(dst_argb), // %4 |
| 218 "+r"(width) // %5 |
| 219 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 220 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 221 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 222 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 223 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 224 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 225 ); |
| 226 } |
| 227 #endif // HAS_I422ALPHATOARGBROW_NEON |
| 228 |
| 204 #ifdef HAS_I411TOARGBROW_NEON | 229 #ifdef HAS_I411TOARGBROW_NEON |
| 205 void I411ToARGBRow_NEON(const uint8* src_y, | 230 void I411ToARGBRow_NEON(const uint8* src_y, |
| 206 const uint8* src_u, | 231 const uint8* src_u, |
| 207 const uint8* src_v, | 232 const uint8* src_v, |
| 208 uint8* dst_argb, | 233 uint8* dst_argb, |
| 209 const struct YuvConstants* yuvconstants, | 234 const struct YuvConstants* yuvconstants, |
| 210 int width) { | 235 int width) { |
| 211 asm volatile ( | 236 asm volatile ( |
| 212 YUVTORGB_SETUP | 237 YUVTORGB_SETUP |
| 238 "movi v23.8b, #255 \n" /* A */ |
| 213 "1: \n" | 239 "1: \n" |
| 214 READYUV411 | 240 READYUV411 |
| 215 YUVTORGB(v22, v21, v20) | 241 YUVTORGB(v22, v21, v20) |
| 216 "subs %w4, %w4, #8 \n" | 242 "subs %w4, %w4, #8 \n" |
| 217 "movi v23.8b, #255 \n" /* A */ | |
| 218 MEMACCESS(3) | 243 MEMACCESS(3) |
| 219 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 244 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 220 "b.gt 1b \n" | 245 "b.gt 1b \n" |
| 221 : "+r"(src_y), // %0 | 246 : "+r"(src_y), // %0 |
| 222 "+r"(src_u), // %1 | 247 "+r"(src_u), // %1 |
| 223 "+r"(src_v), // %2 | 248 "+r"(src_v), // %2 |
| 224 "+r"(dst_argb), // %3 | 249 "+r"(dst_argb), // %3 |
| 225 "+r"(width) // %4 | 250 "+r"(width) // %4 |
| 226 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 251 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 227 [kUVToG]"r"(&yuvconstants->kUVToG), | 252 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 228 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 253 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 229 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 254 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 230 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 255 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 231 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 256 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 232 ); | 257 ); |
| 233 } | 258 } |
| 234 #endif // HAS_I411TOARGBROW_NEON | 259 #endif // HAS_I411TOARGBROW_NEON |
| 235 | 260 |
| 236 #ifdef HAS_I422TORGBAROW_NEON | 261 #ifdef HAS_I422TORGBAROW_NEON |
| 237 void I422ToRGBARow_NEON(const uint8* src_y, | 262 void I422ToRGBARow_NEON(const uint8* src_y, |
| 238 const uint8* src_u, | 263 const uint8* src_u, |
| 239 const uint8* src_v, | 264 const uint8* src_v, |
| 240 uint8* dst_rgba, | 265 uint8* dst_rgba, |
| 241 const struct YuvConstants* yuvconstants, | 266 const struct YuvConstants* yuvconstants, |
| 242 int width) { | 267 int width) { |
| 243 asm volatile ( | 268 asm volatile ( |
| 244 YUVTORGB_SETUP | 269 YUVTORGB_SETUP |
| 270 "movi v20.8b, #255 \n" /* A */ |
| 245 "1: \n" | 271 "1: \n" |
| 246 READYUV422 | 272 READYUV422 |
| 247 YUVTORGB(v23, v22, v21) | 273 YUVTORGB(v23, v22, v21) |
| 248 "subs %w4, %w4, #8 \n" | 274 "subs %w4, %w4, #8 \n" |
| 249 "movi v20.8b, #255 \n" /* A */ | |
| 250 MEMACCESS(3) | 275 MEMACCESS(3) |
| 251 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 276 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 252 "b.gt 1b \n" | 277 "b.gt 1b \n" |
| 253 : "+r"(src_y), // %0 | 278 : "+r"(src_y), // %0 |
| 254 "+r"(src_u), // %1 | 279 "+r"(src_u), // %1 |
| 255 "+r"(src_v), // %2 | 280 "+r"(src_v), // %2 |
| 256 "+r"(dst_rgba), // %3 | 281 "+r"(dst_rgba), // %3 |
| 257 "+r"(width) // %4 | 282 "+r"(width) // %4 |
| 258 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 283 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 259 [kUVToG]"r"(&yuvconstants->kUVToG), | 284 [kUVToG]"r"(&yuvconstants->kUVToG), |
| (...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 346 | 371 |
| 347 #ifdef HAS_I422TOARGB1555ROW_NEON | 372 #ifdef HAS_I422TOARGB1555ROW_NEON |
| 348 void I422ToARGB1555Row_NEON(const uint8* src_y, | 373 void I422ToARGB1555Row_NEON(const uint8* src_y, |
| 349 const uint8* src_u, | 374 const uint8* src_u, |
| 350 const uint8* src_v, | 375 const uint8* src_v, |
| 351 uint8* dst_argb1555, | 376 uint8* dst_argb1555, |
| 352 const struct YuvConstants* yuvconstants, | 377 const struct YuvConstants* yuvconstants, |
| 353 int width) { | 378 int width) { |
| 354 asm volatile ( | 379 asm volatile ( |
| 355 YUVTORGB_SETUP | 380 YUVTORGB_SETUP |
| 381 "movi v23.8b, #255 \n" |
| 356 "1: \n" | 382 "1: \n" |
| 357 READYUV422 | 383 READYUV422 |
| 358 YUVTORGB(v22, v21, v20) | 384 YUVTORGB(v22, v21, v20) |
| 359 "subs %w4, %w4, #8 \n" | 385 "subs %w4, %w4, #8 \n" |
| 360 "movi v23.8b, #255 \n" | |
| 361 ARGBTOARGB1555 | 386 ARGBTOARGB1555 |
| 362 MEMACCESS(3) | 387 MEMACCESS(3) |
| 363 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. | 388 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. |
| 364 "b.gt 1b \n" | 389 "b.gt 1b \n" |
| 365 : "+r"(src_y), // %0 | 390 : "+r"(src_y), // %0 |
| 366 "+r"(src_u), // %1 | 391 "+r"(src_u), // %1 |
| 367 "+r"(src_v), // %2 | 392 "+r"(src_v), // %2 |
| 368 "+r"(dst_argb1555), // %3 | 393 "+r"(dst_argb1555), // %3 |
| 369 "+r"(width) // %4 | 394 "+r"(width) // %4 |
| 370 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 395 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 421 } | 446 } |
| 422 #endif // HAS_I422TOARGB4444ROW_NEON | 447 #endif // HAS_I422TOARGB4444ROW_NEON |
| 423 | 448 |
| 424 #ifdef HAS_I400TOARGBROW_NEON | 449 #ifdef HAS_I400TOARGBROW_NEON |
| 425 void I400ToARGBRow_NEON(const uint8* src_y, | 450 void I400ToARGBRow_NEON(const uint8* src_y, |
| 426 uint8* dst_argb, | 451 uint8* dst_argb, |
| 427 int width) { | 452 int width) { |
| 428 int64 width64 = (int64)(width); | 453 int64 width64 = (int64)(width); |
| 429 asm volatile ( | 454 asm volatile ( |
| 430 YUVTORGB_SETUP | 455 YUVTORGB_SETUP |
| 456 "movi v23.8b, #255 \n" |
| 431 "1: \n" | 457 "1: \n" |
| 432 READYUV400 | 458 READYUV400 |
| 433 YUVTORGB(v22, v21, v20) | 459 YUVTORGB(v22, v21, v20) |
| 434 "subs %w2, %w2, #8 \n" | 460 "subs %w2, %w2, #8 \n" |
| 435 "movi v23.8b, #255 \n" | |
| 436 MEMACCESS(1) | 461 MEMACCESS(1) |
| 437 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 462 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
| 438 "b.gt 1b \n" | 463 "b.gt 1b \n" |
| 439 : "+r"(src_y), // %0 | 464 : "+r"(src_y), // %0 |
| 440 "+r"(dst_argb), // %1 | 465 "+r"(dst_argb), // %1 |
| 441 "+r"(width64) // %2 | 466 "+r"(width64) // %2 |
| 442 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), | 467 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), |
| 443 [kUVToG]"r"(&kYuvI601Constants.kUVToG), | 468 [kUVToG]"r"(&kYuvI601Constants.kUVToG), |
| 444 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), | 469 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), |
| 445 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) | 470 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) |
| (...skipping 28 matching lines...) Expand all Loading... |
| 474 #endif // HAS_J400TOARGBROW_NEON | 499 #endif // HAS_J400TOARGBROW_NEON |
| 475 | 500 |
| 476 #ifdef HAS_NV12TOARGBROW_NEON | 501 #ifdef HAS_NV12TOARGBROW_NEON |
| 477 void NV12ToARGBRow_NEON(const uint8* src_y, | 502 void NV12ToARGBRow_NEON(const uint8* src_y, |
| 478 const uint8* src_uv, | 503 const uint8* src_uv, |
| 479 uint8* dst_argb, | 504 uint8* dst_argb, |
| 480 const struct YuvConstants* yuvconstants, | 505 const struct YuvConstants* yuvconstants, |
| 481 int width) { | 506 int width) { |
| 482 asm volatile ( | 507 asm volatile ( |
| 483 YUVTORGB_SETUP | 508 YUVTORGB_SETUP |
| 509 "movi v23.8b, #255 \n" |
| 484 "1: \n" | 510 "1: \n" |
| 485 READNV12 | 511 READNV12 |
| 486 YUVTORGB(v22, v21, v20) | 512 YUVTORGB(v22, v21, v20) |
| 487 "subs %w3, %w3, #8 \n" | 513 "subs %w3, %w3, #8 \n" |
| 488 "movi v23.8b, #255 \n" | |
| 489 MEMACCESS(2) | 514 MEMACCESS(2) |
| 490 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 515 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
| 491 "b.gt 1b \n" | 516 "b.gt 1b \n" |
| 492 : "+r"(src_y), // %0 | 517 : "+r"(src_y), // %0 |
| 493 "+r"(src_uv), // %1 | 518 "+r"(src_uv), // %1 |
| 494 "+r"(dst_argb), // %2 | 519 "+r"(dst_argb), // %2 |
| 495 "+r"(width) // %3 | 520 "+r"(width) // %3 |
| 496 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 521 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 497 [kUVToG]"r"(&yuvconstants->kUVToG), | 522 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 498 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 523 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 499 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 524 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 500 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 525 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 501 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 526 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 502 ); | 527 ); |
| 503 } | 528 } |
| 504 #endif // HAS_NV12TOARGBROW_NEON | 529 #endif // HAS_NV12TOARGBROW_NEON |
| 505 | 530 |
| 506 #ifdef HAS_NV12TOARGBROW_NEON | 531 #ifdef HAS_NV12TOARGBROW_NEON |
| 507 void NV21ToARGBRow_NEON(const uint8* src_y, | 532 void NV21ToARGBRow_NEON(const uint8* src_y, |
| 508 const uint8* src_vu, | 533 const uint8* src_vu, |
| 509 uint8* dst_argb, | 534 uint8* dst_argb, |
| 510 const struct YuvConstants* yuvconstants, | 535 const struct YuvConstants* yuvconstants, |
| 511 int width) { | 536 int width) { |
| 512 asm volatile ( | 537 asm volatile ( |
| 513 YUVTORGB_SETUP | 538 YUVTORGB_SETUP |
| 539 "movi v23.8b, #255 \n" |
| 514 "1: \n" | 540 "1: \n" |
| 515 READNV21 | 541 READNV21 |
| 516 YUVTORGB(v22, v21, v20) | 542 YUVTORGB(v22, v21, v20) |
| 517 "subs %w3, %w3, #8 \n" | 543 "subs %w3, %w3, #8 \n" |
| 518 "movi v23.8b, #255 \n" | |
| 519 MEMACCESS(2) | 544 MEMACCESS(2) |
| 520 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 545 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
| 521 "b.gt 1b \n" | 546 "b.gt 1b \n" |
| 522 : "+r"(src_y), // %0 | 547 : "+r"(src_y), // %0 |
| 523 "+r"(src_vu), // %1 | 548 "+r"(src_vu), // %1 |
| 524 "+r"(dst_argb), // %2 | 549 "+r"(dst_argb), // %2 |
| 525 "+r"(width) // %3 | 550 "+r"(width) // %3 |
| 526 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 551 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 527 [kUVToG]"r"(&yuvconstants->kUVToG), | 552 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 528 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 553 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 564 #endif // HAS_NV12TORGB565ROW_NEON | 589 #endif // HAS_NV12TORGB565ROW_NEON |
| 565 | 590 |
| 566 #ifdef HAS_YUY2TOARGBROW_NEON | 591 #ifdef HAS_YUY2TOARGBROW_NEON |
| 567 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, | 592 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
| 568 uint8* dst_argb, | 593 uint8* dst_argb, |
| 569 const struct YuvConstants* yuvconstants, | 594 const struct YuvConstants* yuvconstants, |
| 570 int width) { | 595 int width) { |
| 571 int64 width64 = (int64)(width); | 596 int64 width64 = (int64)(width); |
| 572 asm volatile ( | 597 asm volatile ( |
| 573 YUVTORGB_SETUP | 598 YUVTORGB_SETUP |
| 599 "movi v23.8b, #255 \n" |
| 574 "1: \n" | 600 "1: \n" |
| 575 READYUY2 | 601 READYUY2 |
| 576 YUVTORGB(v22, v21, v20) | 602 YUVTORGB(v22, v21, v20) |
| 577 "subs %w2, %w2, #8 \n" | 603 "subs %w2, %w2, #8 \n" |
| 578 "movi v23.8b, #255 \n" | |
| 579 MEMACCESS(1) | 604 MEMACCESS(1) |
| 580 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 605 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
| 581 "b.gt 1b \n" | 606 "b.gt 1b \n" |
| 582 : "+r"(src_yuy2), // %0 | 607 : "+r"(src_yuy2), // %0 |
| 583 "+r"(dst_argb), // %1 | 608 "+r"(dst_argb), // %1 |
| 584 "+r"(width64) // %2 | 609 "+r"(width64) // %2 |
| 585 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 610 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 586 [kUVToG]"r"(&yuvconstants->kUVToG), | 611 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 587 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 612 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 588 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 613 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 589 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 590 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 591 ); | 616 ); |
| 592 } | 617 } |
| 593 #endif // HAS_YUY2TOARGBROW_NEON | 618 #endif // HAS_YUY2TOARGBROW_NEON |
| 594 | 619 |
| 595 #ifdef HAS_UYVYTOARGBROW_NEON | 620 #ifdef HAS_UYVYTOARGBROW_NEON |
| 596 void UYVYToARGBRow_NEON(const uint8* src_uyvy, | 621 void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
| 597 uint8* dst_argb, | 622 uint8* dst_argb, |
| 598 const struct YuvConstants* yuvconstants, | 623 const struct YuvConstants* yuvconstants, |
| 599 int width) { | 624 int width) { |
| 600 int64 width64 = (int64)(width); | 625 int64 width64 = (int64)(width); |
| 601 asm volatile ( | 626 asm volatile ( |
| 602 YUVTORGB_SETUP | 627 YUVTORGB_SETUP |
| 628 "movi v23.8b, #255 \n" |
| 603 "1: \n" | 629 "1: \n" |
| 604 READUYVY | 630 READUYVY |
| 605 YUVTORGB(v22, v21, v20) | 631 YUVTORGB(v22, v21, v20) |
| 606 "subs %w2, %w2, #8 \n" | 632 "subs %w2, %w2, #8 \n" |
| 607 "movi v23.8b, #255 \n" | |
| 608 MEMACCESS(1) | 633 MEMACCESS(1) |
| 609 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" | 634 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" |
| 610 "b.gt 1b \n" | 635 "b.gt 1b \n" |
| 611 : "+r"(src_uyvy), // %0 | 636 : "+r"(src_uyvy), // %0 |
| 612 "+r"(dst_argb), // %1 | 637 "+r"(dst_argb), // %1 |
| 613 "+r"(width64) // %2 | 638 "+r"(width64) // %2 |
| 614 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 639 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 615 [kUVToG]"r"(&yuvconstants->kUVToG), | 640 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 616 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 641 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 617 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 642 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| (...skipping 819 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1437 "+r"(dst_u), // %1 | 1462 "+r"(dst_u), // %1 |
| 1438 "+r"(dst_v), // %2 | 1463 "+r"(dst_v), // %2 |
| 1439 "+r"(width) // %3 | 1464 "+r"(width) // %3 |
| 1440 : | 1465 : |
| 1441 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", | 1466 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", |
| 1442 "v24", "v25", "v26", "v27", "v28", "v29" | 1467 "v24", "v25", "v26", "v27", "v28", "v29" |
| 1443 ); | 1468 ); |
| 1444 } | 1469 } |
| 1445 #endif // HAS_ARGBTOUV444ROW_NEON | 1470 #endif // HAS_ARGBTOUV444ROW_NEON |
| 1446 | 1471 |
| 1472 #define RGBTOUV_SETUP_REG \ |
| 1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ |
| 1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ |
| 1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ |
| 1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ |
| 1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ |
| 1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ |
| 1479 |
| 1447 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. | 1480 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. |
| 1448 #ifdef HAS_ARGBTOUV422ROW_NEON | 1481 #ifdef HAS_ARGBTOUV422ROW_NEON |
| 1449 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | 1482 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, |
| 1450 int width) { | 1483 int width) { |
| 1451 asm volatile ( | 1484 asm volatile ( |
| 1452 RGBTOUV_SETUP_REG | 1485 RGBTOUV_SETUP_REG |
| 1453 "1: \n" | 1486 "1: \n" |
| 1454 MEMACCESS(0) | 1487 MEMACCESS(0) |
| 1455 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. | 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. |
| 1456 | 1489 |
| (...skipping 1538 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2995 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 3028 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
| 2996 ); | 3029 ); |
| 2997 } | 3030 } |
| 2998 #endif // HAS_SOBELYROW_NEON | 3031 #endif // HAS_SOBELYROW_NEON |
| 2999 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 3032 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
| 3000 | 3033 |
| 3001 #ifdef __cplusplus | 3034 #ifdef __cplusplus |
| 3002 } // extern "C" | 3035 } // extern "C" |
| 3003 } // namespace libyuv | 3036 } // namespace libyuv |
| 3004 #endif | 3037 #endif |
| OLD | NEW |