| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 122 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ | 122 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ |
| 123 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ | 123 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ |
| 124 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ | 124 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ |
| 125 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ | 125 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ |
| 126 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ | 126 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ |
| 127 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ | 127 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ |
| 128 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ | 128 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ |
| 129 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ | 129 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ |
| 130 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ | 130 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ |
| 131 | 131 |
| 132 // YUV to RGB conversion constants. | 132 // BT.601 YUV to RGB reference |
| 133 // R = (Y - 16) * 1.164 - V * -1.596 |
| 134 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 |
| 135 // B = (Y - 16) * 1.164 - U * -2.018 |
| 136 |
| 133 // Y contribution to R,G,B. Scale and bias. | 137 // Y contribution to R,G,B. Scale and bias. |
| 138 // TODO(fbarchard): Consider moving constants into a common header. |
| 134 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ | 139 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ |
| 135 #define YGB 1160 /* 1.164 * 64 * 16 - adjusted for even error distribution */ | 140 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ |
| 136 | 141 |
| 137 // U and V contributions to R,G,B. | 142 // U and V contributions to R,G,B. |
| 138 #define UB -128 /* -min(128, round(2.018 * 64)) */ | 143 #define UB -128 /* max(-128, round(-2.018 * 64)) */ |
| 139 #define UG 25 /* -round(-0.391 * 64) */ | 144 #define UG 25 /* round(0.391 * 64) */ |
| 140 #define VG 52 /* -round(-0.813 * 64) */ | 145 #define VG 52 /* round(0.813 * 64) */ |
| 141 #define VR -102 /* -round(1.596 * 64) */ | 146 #define VR -102 /* round(-1.596 * 64) */ |
| 142 | 147 |
| 143 // Bias values to subtract 16 from Y and 128 from U and V. | 148 // Bias values to subtract 16 from Y and 128 from U and V. |
| 144 #define BB (UB * 128 - YGB) | 149 #define BB (UB * 128 + YGB) |
| 145 #define BG (UG * 128 + VG * 128 - YGB) | 150 #define BG (UG * 128 + VG * 128 + YGB) |
| 146 #define BR (VR * 128 - YGB) | 151 #define BR (VR * 128 + YGB) |
| 147 | 152 |
| 148 static vec16 kUVBiasBGR = { BB, BG, BR, 0, 0, 0, 0, 0 }; | 153 YuvConstantsNEON SIMD_ALIGNED(kYuvConstantsNEON) = { |
| 149 static vec32 kYToRgb = { 0x0101 * YG, 0, 0, 0 }; | 154 { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 }, |
| 155 { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 }, |
| 156 { BB, BG, BR, 0, 0, 0, 0, 0 }, |
| 157 { 0x0101 * YG, 0, 0, 0 } |
| 158 }; |
| 150 | 159 |
| 151 #undef YG | 160 #undef YG |
| 152 #undef YGB | 161 #undef YGB |
| 153 #undef UB | 162 #undef UB |
| 154 #undef UG | 163 #undef UG |
| 155 #undef VG | 164 #undef VG |
| 156 #undef VR | 165 #undef VR |
| 157 #undef BB | 166 #undef BB |
| 158 #undef BG | 167 #undef BG |
| 159 #undef BR | 168 #undef BR |
| 160 | 169 |
| 170 // TODO(fbarchard): Use structure for constants like 32 bit code. |
| 161 #define RGBTOUV_SETUP_REG \ | 171 #define RGBTOUV_SETUP_REG \ |
| 162 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ | 172 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ |
| 163 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ | 173 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ |
| 164 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ | 174 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ |
| 165 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ | 175 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ |
| 166 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ | 176 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ |
| 167 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ | 177 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ |
| 168 | 178 |
| 169 | |
| 170 #ifdef HAS_I444TOARGBROW_NEON | 179 #ifdef HAS_I444TOARGBROW_NEON |
| 171 void I444ToARGBRow_NEON(const uint8* src_y, | 180 void I444ToARGBRow_NEON(const uint8* src_y, |
| 172 const uint8* src_u, | 181 const uint8* src_u, |
| 173 const uint8* src_v, | 182 const uint8* src_v, |
| 174 uint8* dst_argb, | 183 uint8* dst_argb, |
| 175 int width) { | 184 int width) { |
| 176 asm volatile ( | 185 asm volatile ( |
| 177 YUV422TORGB_SETUP_REG | 186 YUV422TORGB_SETUP_REG |
| 178 "1: \n" | 187 "1: \n" |
| 179 READYUV444 | 188 READYUV444 |
| 180 YUV422TORGB(v22, v21, v20) | 189 YUV422TORGB(v22, v21, v20) |
| 181 "subs %w4, %w4, #8 \n" | 190 "subs %w4, %w4, #8 \n" |
| 182 "movi v23.8b, #255 \n" /* A */ | 191 "movi v23.8b, #255 \n" /* A */ |
| 183 MEMACCESS(3) | 192 MEMACCESS(3) |
| 184 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 193 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 185 "b.gt 1b \n" | 194 "b.gt 1b \n" |
| 186 : "+r"(src_y), // %0 | 195 : "+r"(src_y), // %0 |
| 187 "+r"(src_u), // %1 | 196 "+r"(src_u), // %1 |
| 188 "+r"(src_v), // %2 | 197 "+r"(src_v), // %2 |
| 189 "+r"(dst_argb), // %3 | 198 "+r"(dst_argb), // %3 |
| 190 "+r"(width) // %4 | 199 "+r"(width) // %4 |
| 191 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 200 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 192 [kYToRgb]"r"(&kYToRgb) | 201 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 193 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 202 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 194 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 203 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 195 ); | 204 ); |
| 196 } | 205 } |
| 197 #endif // HAS_I444TOARGBROW_NEON | 206 #endif // HAS_I444TOARGBROW_NEON |
| 198 | 207 |
| 199 #ifdef HAS_I422TOARGBROW_NEON | 208 #ifdef HAS_I422TOARGBROW_NEON |
| 200 void I422ToARGBRow_NEON(const uint8* src_y, | 209 void I422ToARGBRow_NEON(const uint8* src_y, |
| 201 const uint8* src_u, | 210 const uint8* src_u, |
| 202 const uint8* src_v, | 211 const uint8* src_v, |
| 203 uint8* dst_argb, | 212 uint8* dst_argb, |
| 204 int width) { | 213 int width) { |
| 205 asm volatile ( | 214 asm volatile ( |
| 206 YUV422TORGB_SETUP_REG | 215 YUV422TORGB_SETUP_REG |
| 207 "1: \n" | 216 "1: \n" |
| 208 READYUV422 | 217 READYUV422 |
| 209 YUV422TORGB(v22, v21, v20) | 218 YUV422TORGB(v22, v21, v20) |
| 210 "subs %w4, %w4, #8 \n" | 219 "subs %w4, %w4, #8 \n" |
| 211 "movi v23.8b, #255 \n" /* A */ | 220 "movi v23.8b, #255 \n" /* A */ |
| 212 MEMACCESS(3) | 221 MEMACCESS(3) |
| 213 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 222 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 214 "b.gt 1b \n" | 223 "b.gt 1b \n" |
| 215 : "+r"(src_y), // %0 | 224 : "+r"(src_y), // %0 |
| 216 "+r"(src_u), // %1 | 225 "+r"(src_u), // %1 |
| 217 "+r"(src_v), // %2 | 226 "+r"(src_v), // %2 |
| 218 "+r"(dst_argb), // %3 | 227 "+r"(dst_argb), // %3 |
| 219 "+r"(width) // %4 | 228 "+r"(width) // %4 |
| 220 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 229 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 221 [kYToRgb]"r"(&kYToRgb) | 230 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 222 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 231 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 223 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 232 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 224 ); | 233 ); |
| 225 } | 234 } |
| 226 #endif // HAS_I422TOARGBROW_NEON | 235 #endif // HAS_I422TOARGBROW_NEON |
| 227 | 236 |
| 228 #ifdef HAS_I411TOARGBROW_NEON | 237 #ifdef HAS_I411TOARGBROW_NEON |
| 229 void I411ToARGBRow_NEON(const uint8* src_y, | 238 void I411ToARGBRow_NEON(const uint8* src_y, |
| 230 const uint8* src_u, | 239 const uint8* src_u, |
| 231 const uint8* src_v, | 240 const uint8* src_v, |
| 232 uint8* dst_argb, | 241 uint8* dst_argb, |
| 233 int width) { | 242 int width) { |
| 234 asm volatile ( | 243 asm volatile ( |
| 235 YUV422TORGB_SETUP_REG | 244 YUV422TORGB_SETUP_REG |
| 236 "1: \n" | 245 "1: \n" |
| 237 READYUV411 | 246 READYUV411 |
| 238 YUV422TORGB(v22, v21, v20) | 247 YUV422TORGB(v22, v21, v20) |
| 239 "subs %w4, %w4, #8 \n" | 248 "subs %w4, %w4, #8 \n" |
| 240 "movi v23.8b, #255 \n" /* A */ | 249 "movi v23.8b, #255 \n" /* A */ |
| 241 MEMACCESS(3) | 250 MEMACCESS(3) |
| 242 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 251 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 243 "b.gt 1b \n" | 252 "b.gt 1b \n" |
| 244 : "+r"(src_y), // %0 | 253 : "+r"(src_y), // %0 |
| 245 "+r"(src_u), // %1 | 254 "+r"(src_u), // %1 |
| 246 "+r"(src_v), // %2 | 255 "+r"(src_v), // %2 |
| 247 "+r"(dst_argb), // %3 | 256 "+r"(dst_argb), // %3 |
| 248 "+r"(width) // %4 | 257 "+r"(width) // %4 |
| 249 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 258 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 250 [kYToRgb]"r"(&kYToRgb) | 259 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 251 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 260 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 252 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 261 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 253 ); | 262 ); |
| 254 } | 263 } |
| 255 #endif // HAS_I411TOARGBROW_NEON | 264 #endif // HAS_I411TOARGBROW_NEON |
| 256 | 265 |
| 257 #ifdef HAS_I422TOBGRAROW_NEON | 266 #ifdef HAS_I422TOBGRAROW_NEON |
| 258 void I422ToBGRARow_NEON(const uint8* src_y, | 267 void I422ToBGRARow_NEON(const uint8* src_y, |
| 259 const uint8* src_u, | 268 const uint8* src_u, |
| 260 const uint8* src_v, | 269 const uint8* src_v, |
| 261 uint8* dst_bgra, | 270 uint8* dst_bgra, |
| 262 int width) { | 271 int width) { |
| 263 asm volatile ( | 272 asm volatile ( |
| 264 YUV422TORGB_SETUP_REG | 273 YUV422TORGB_SETUP_REG |
| 265 "1: \n" | 274 "1: \n" |
| 266 READYUV422 | 275 READYUV422 |
| 267 YUV422TORGB(v21, v22, v23) | 276 YUV422TORGB(v21, v22, v23) |
| 268 "subs %w4, %w4, #8 \n" | 277 "subs %w4, %w4, #8 \n" |
| 269 "movi v20.8b, #255 \n" /* A */ | 278 "movi v20.8b, #255 \n" /* A */ |
| 270 MEMACCESS(3) | 279 MEMACCESS(3) |
| 271 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 280 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 272 "b.gt 1b \n" | 281 "b.gt 1b \n" |
| 273 : "+r"(src_y), // %0 | 282 : "+r"(src_y), // %0 |
| 274 "+r"(src_u), // %1 | 283 "+r"(src_u), // %1 |
| 275 "+r"(src_v), // %2 | 284 "+r"(src_v), // %2 |
| 276 "+r"(dst_bgra), // %3 | 285 "+r"(dst_bgra), // %3 |
| 277 "+r"(width) // %4 | 286 "+r"(width) // %4 |
| 278 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 287 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 279 [kYToRgb]"r"(&kYToRgb) | 288 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 280 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 289 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 281 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 290 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 282 ); | 291 ); |
| 283 } | 292 } |
| 284 #endif // HAS_I422TOBGRAROW_NEON | 293 #endif // HAS_I422TOBGRAROW_NEON |
| 285 | 294 |
| 286 #ifdef HAS_I422TOABGRROW_NEON | 295 #ifdef HAS_I422TOABGRROW_NEON |
| 287 void I422ToABGRRow_NEON(const uint8* src_y, | 296 void I422ToABGRRow_NEON(const uint8* src_y, |
| 288 const uint8* src_u, | 297 const uint8* src_u, |
| 289 const uint8* src_v, | 298 const uint8* src_v, |
| 290 uint8* dst_abgr, | 299 uint8* dst_abgr, |
| 291 int width) { | 300 int width) { |
| 292 asm volatile ( | 301 asm volatile ( |
| 293 YUV422TORGB_SETUP_REG | 302 YUV422TORGB_SETUP_REG |
| 294 "1: \n" | 303 "1: \n" |
| 295 READYUV422 | 304 READYUV422 |
| 296 YUV422TORGB(v20, v21, v22) | 305 YUV422TORGB(v20, v21, v22) |
| 297 "subs %w4, %w4, #8 \n" | 306 "subs %w4, %w4, #8 \n" |
| 298 "movi v23.8b, #255 \n" /* A */ | 307 "movi v23.8b, #255 \n" /* A */ |
| 299 MEMACCESS(3) | 308 MEMACCESS(3) |
| 300 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 309 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 301 "b.gt 1b \n" | 310 "b.gt 1b \n" |
| 302 : "+r"(src_y), // %0 | 311 : "+r"(src_y), // %0 |
| 303 "+r"(src_u), // %1 | 312 "+r"(src_u), // %1 |
| 304 "+r"(src_v), // %2 | 313 "+r"(src_v), // %2 |
| 305 "+r"(dst_abgr), // %3 | 314 "+r"(dst_abgr), // %3 |
| 306 "+r"(width) // %4 | 315 "+r"(width) // %4 |
| 307 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 316 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 308 [kYToRgb]"r"(&kYToRgb) | 317 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 309 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 318 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 310 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 319 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 311 ); | 320 ); |
| 312 } | 321 } |
| 313 #endif // HAS_I422TOABGRROW_NEON | 322 #endif // HAS_I422TOABGRROW_NEON |
| 314 | 323 |
| 315 #ifdef HAS_I422TORGBAROW_NEON | 324 #ifdef HAS_I422TORGBAROW_NEON |
| 316 void I422ToRGBARow_NEON(const uint8* src_y, | 325 void I422ToRGBARow_NEON(const uint8* src_y, |
| 317 const uint8* src_u, | 326 const uint8* src_u, |
| 318 const uint8* src_v, | 327 const uint8* src_v, |
| 319 uint8* dst_rgba, | 328 uint8* dst_rgba, |
| 320 int width) { | 329 int width) { |
| 321 asm volatile ( | 330 asm volatile ( |
| 322 YUV422TORGB_SETUP_REG | 331 YUV422TORGB_SETUP_REG |
| 323 "1: \n" | 332 "1: \n" |
| 324 READYUV422 | 333 READYUV422 |
| 325 YUV422TORGB(v23, v22, v21) | 334 YUV422TORGB(v23, v22, v21) |
| 326 "subs %w4, %w4, #8 \n" | 335 "subs %w4, %w4, #8 \n" |
| 327 "movi v20.8b, #255 \n" /* A */ | 336 "movi v20.8b, #255 \n" /* A */ |
| 328 MEMACCESS(3) | 337 MEMACCESS(3) |
| 329 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" | 338 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" |
| 330 "b.gt 1b \n" | 339 "b.gt 1b \n" |
| 331 : "+r"(src_y), // %0 | 340 : "+r"(src_y), // %0 |
| 332 "+r"(src_u), // %1 | 341 "+r"(src_u), // %1 |
| 333 "+r"(src_v), // %2 | 342 "+r"(src_v), // %2 |
| 334 "+r"(dst_rgba), // %3 | 343 "+r"(dst_rgba), // %3 |
| 335 "+r"(width) // %4 | 344 "+r"(width) // %4 |
| 336 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 345 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 337 [kYToRgb]"r"(&kYToRgb) | 346 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 338 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 347 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 339 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 348 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 340 ); | 349 ); |
| 341 } | 350 } |
| 342 #endif // HAS_I422TORGBAROW_NEON | 351 #endif // HAS_I422TORGBAROW_NEON |
| 343 | 352 |
| 344 #ifdef HAS_I422TORGB24ROW_NEON | 353 #ifdef HAS_I422TORGB24ROW_NEON |
| 345 void I422ToRGB24Row_NEON(const uint8* src_y, | 354 void I422ToRGB24Row_NEON(const uint8* src_y, |
| 346 const uint8* src_u, | 355 const uint8* src_u, |
| 347 const uint8* src_v, | 356 const uint8* src_v, |
| 348 uint8* dst_rgb24, | 357 uint8* dst_rgb24, |
| 349 int width) { | 358 int width) { |
| 350 asm volatile ( | 359 asm volatile ( |
| 351 YUV422TORGB_SETUP_REG | 360 YUV422TORGB_SETUP_REG |
| 352 "1: \n" | 361 "1: \n" |
| 353 READYUV422 | 362 READYUV422 |
| 354 YUV422TORGB(v22, v21, v20) | 363 YUV422TORGB(v22, v21, v20) |
| 355 "subs %w4, %w4, #8 \n" | 364 "subs %w4, %w4, #8 \n" |
| 356 MEMACCESS(3) | 365 MEMACCESS(3) |
| 357 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" | 366 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" |
| 358 "b.gt 1b \n" | 367 "b.gt 1b \n" |
| 359 : "+r"(src_y), // %0 | 368 : "+r"(src_y), // %0 |
| 360 "+r"(src_u), // %1 | 369 "+r"(src_u), // %1 |
| 361 "+r"(src_v), // %2 | 370 "+r"(src_v), // %2 |
| 362 "+r"(dst_rgb24), // %3 | 371 "+r"(dst_rgb24), // %3 |
| 363 "+r"(width) // %4 | 372 "+r"(width) // %4 |
| 364 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 373 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 365 [kYToRgb]"r"(&kYToRgb) | 374 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 366 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 375 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 367 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 376 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 368 ); | 377 ); |
| 369 } | 378 } |
| 370 #endif // HAS_I422TORGB24ROW_NEON | 379 #endif // HAS_I422TORGB24ROW_NEON |
| 371 | 380 |
| 372 #ifdef HAS_I422TORAWROW_NEON | 381 #ifdef HAS_I422TORAWROW_NEON |
| 373 void I422ToRAWRow_NEON(const uint8* src_y, | 382 void I422ToRAWRow_NEON(const uint8* src_y, |
| 374 const uint8* src_u, | 383 const uint8* src_u, |
| 375 const uint8* src_v, | 384 const uint8* src_v, |
| 376 uint8* dst_raw, | 385 uint8* dst_raw, |
| 377 int width) { | 386 int width) { |
| 378 asm volatile ( | 387 asm volatile ( |
| 379 YUV422TORGB_SETUP_REG | 388 YUV422TORGB_SETUP_REG |
| 380 "1: \n" | 389 "1: \n" |
| 381 READYUV422 | 390 READYUV422 |
| 382 YUV422TORGB(v20, v21, v22) | 391 YUV422TORGB(v20, v21, v22) |
| 383 "subs %w4, %w4, #8 \n" | 392 "subs %w4, %w4, #8 \n" |
| 384 MEMACCESS(3) | 393 MEMACCESS(3) |
| 385 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" | 394 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" |
| 386 "b.gt 1b \n" | 395 "b.gt 1b \n" |
| 387 : "+r"(src_y), // %0 | 396 : "+r"(src_y), // %0 |
| 388 "+r"(src_u), // %1 | 397 "+r"(src_u), // %1 |
| 389 "+r"(src_v), // %2 | 398 "+r"(src_v), // %2 |
| 390 "+r"(dst_raw), // %3 | 399 "+r"(dst_raw), // %3 |
| 391 "+r"(width) // %4 | 400 "+r"(width) // %4 |
| 392 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 401 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 393 [kYToRgb]"r"(&kYToRgb) | 402 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 394 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 403 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 395 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 404 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 396 ); | 405 ); |
| 397 } | 406 } |
| 398 #endif // HAS_I422TORAWROW_NEON | 407 #endif // HAS_I422TORAWROW_NEON |
| 399 | 408 |
| 400 #define ARGBTORGB565 \ | 409 #define ARGBTORGB565 \ |
| 401 "shll v0.8h, v22.8b, #8 \n" /* R */ \ | 410 "shll v0.8h, v22.8b, #8 \n" /* R */ \ |
| 402 "shll v20.8h, v20.8b, #8 \n" /* B */ \ | 411 "shll v20.8h, v20.8b, #8 \n" /* B */ \ |
| 403 "shll v21.8h, v21.8b, #8 \n" /* G */ \ | 412 "shll v21.8h, v21.8b, #8 \n" /* G */ \ |
| (...skipping 14 matching lines...) Expand all Loading... |
| 418 "subs %w4, %w4, #8 \n" | 427 "subs %w4, %w4, #8 \n" |
| 419 ARGBTORGB565 | 428 ARGBTORGB565 |
| 420 MEMACCESS(3) | 429 MEMACCESS(3) |
| 421 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. | 430 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. |
| 422 "b.gt 1b \n" | 431 "b.gt 1b \n" |
| 423 : "+r"(src_y), // %0 | 432 : "+r"(src_y), // %0 |
| 424 "+r"(src_u), // %1 | 433 "+r"(src_u), // %1 |
| 425 "+r"(src_v), // %2 | 434 "+r"(src_v), // %2 |
| 426 "+r"(dst_rgb565), // %3 | 435 "+r"(dst_rgb565), // %3 |
| 427 "+r"(width) // %4 | 436 "+r"(width) // %4 |
| 428 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 437 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 429 [kYToRgb]"r"(&kYToRgb) | 438 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 430 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 439 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 431 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 440 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 432 ); | 441 ); |
| 433 } | 442 } |
| 434 #endif // HAS_I422TORGB565ROW_NEON | 443 #endif // HAS_I422TORGB565ROW_NEON |
| 435 | 444 |
| 436 #define ARGBTOARGB1555 \ | 445 #define ARGBTOARGB1555 \ |
| 437 "shll v0.8h, v23.8b, #8 \n" /* A */ \ | 446 "shll v0.8h, v23.8b, #8 \n" /* A */ \ |
| 438 "shll v22.8h, v22.8b, #8 \n" /* R */ \ | 447 "shll v22.8h, v22.8b, #8 \n" /* R */ \ |
| 439 "shll v20.8h, v20.8b, #8 \n" /* B */ \ | 448 "shll v20.8h, v20.8b, #8 \n" /* B */ \ |
| (...skipping 17 matching lines...) Expand all Loading... |
| 457 "movi v23.8b, #255 \n" | 466 "movi v23.8b, #255 \n" |
| 458 ARGBTOARGB1555 | 467 ARGBTOARGB1555 |
| 459 MEMACCESS(3) | 468 MEMACCESS(3) |
| 460 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. | 469 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. |
| 461 "b.gt 1b \n" | 470 "b.gt 1b \n" |
| 462 : "+r"(src_y), // %0 | 471 : "+r"(src_y), // %0 |
| 463 "+r"(src_u), // %1 | 472 "+r"(src_u), // %1 |
| 464 "+r"(src_v), // %2 | 473 "+r"(src_v), // %2 |
| 465 "+r"(dst_argb1555), // %3 | 474 "+r"(dst_argb1555), // %3 |
| 466 "+r"(width) // %4 | 475 "+r"(width) // %4 |
| 467 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 476 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 468 [kYToRgb]"r"(&kYToRgb) | 477 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 469 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 478 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 470 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 479 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 471 ); | 480 ); |
| 472 } | 481 } |
| 473 #endif // HAS_I422TOARGB1555ROW_NEON | 482 #endif // HAS_I422TOARGB1555ROW_NEON |
| 474 | 483 |
| 475 #define ARGBTOARGB4444 \ | 484 #define ARGBTOARGB4444 \ |
| 476 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ | 485 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ |
| 477 "ushr v20.8b, v20.8b, #4 \n" /* B */ \ | 486 "ushr v20.8b, v20.8b, #4 \n" /* B */ \ |
| 478 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ | 487 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ |
| (...skipping 19 matching lines...) Expand all Loading... |
| 498 "movi v23.8b, #255 \n" | 507 "movi v23.8b, #255 \n" |
| 499 ARGBTOARGB4444 | 508 ARGBTOARGB4444 |
| 500 MEMACCESS(3) | 509 MEMACCESS(3) |
| 501 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. | 510 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. |
| 502 "b.gt 1b \n" | 511 "b.gt 1b \n" |
| 503 : "+r"(src_y), // %0 | 512 : "+r"(src_y), // %0 |
| 504 "+r"(src_u), // %1 | 513 "+r"(src_u), // %1 |
| 505 "+r"(src_v), // %2 | 514 "+r"(src_v), // %2 |
| 506 "+r"(dst_argb4444), // %3 | 515 "+r"(dst_argb4444), // %3 |
| 507 "+r"(width) // %4 | 516 "+r"(width) // %4 |
| 508 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 517 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 509 [kYToRgb]"r"(&kYToRgb) | 518 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 510 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 519 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 511 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 520 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 512 ); | 521 ); |
| 513 } | 522 } |
| 514 #endif // HAS_I422TOARGB4444ROW_NEON | 523 #endif // HAS_I422TOARGB4444ROW_NEON |
| 515 | 524 |
| 516 #ifdef HAS_I400TOARGBROW_NEON | 525 #ifdef HAS_I400TOARGBROW_NEON |
| 517 void I400ToARGBRow_NEON(const uint8* src_y, | 526 void I400ToARGBRow_NEON(const uint8* src_y, |
| 518 uint8* dst_argb, | 527 uint8* dst_argb, |
| 519 int width) { | 528 int width) { |
| 520 int64 width64 = (int64)(width); | 529 int64 width64 = (int64)(width); |
| 521 asm volatile ( | 530 asm volatile ( |
| 522 YUV422TORGB_SETUP_REG | 531 YUV422TORGB_SETUP_REG |
| 523 "1: \n" | 532 "1: \n" |
| 524 READYUV400 | 533 READYUV400 |
| 525 YUV422TORGB(v22, v21, v20) | 534 YUV422TORGB(v22, v21, v20) |
| 526 "subs %w2, %w2, #8 \n" | 535 "subs %w2, %w2, #8 \n" |
| 527 "movi v23.8b, #255 \n" | 536 "movi v23.8b, #255 \n" |
| 528 MEMACCESS(1) | 537 MEMACCESS(1) |
| 529 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 538 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
| 530 "b.gt 1b \n" | 539 "b.gt 1b \n" |
| 531 : "+r"(src_y), // %0 | 540 : "+r"(src_y), // %0 |
| 532 "+r"(dst_argb), // %1 | 541 "+r"(dst_argb), // %1 |
| 533 "+r"(width64) // %2 | 542 "+r"(width64) // %2 |
| 534 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 543 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 535 [kYToRgb]"r"(&kYToRgb) | 544 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 536 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 545 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 537 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 546 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 538 ); | 547 ); |
| 539 } | 548 } |
| 540 #endif // HAS_I400TOARGBROW_NEON | 549 #endif // HAS_I400TOARGBROW_NEON |
| 541 | 550 |
| 542 #ifdef HAS_J400TOARGBROW_NEON | 551 #ifdef HAS_J400TOARGBROW_NEON |
| 543 void J400ToARGBRow_NEON(const uint8* src_y, | 552 void J400ToARGBRow_NEON(const uint8* src_y, |
| 544 uint8* dst_argb, | 553 uint8* dst_argb, |
| 545 int width) { | 554 int width) { |
| (...skipping 29 matching lines...) Expand all Loading... |
| 575 YUV422TORGB(v22, v21, v20) | 584 YUV422TORGB(v22, v21, v20) |
| 576 "subs %w3, %w3, #8 \n" | 585 "subs %w3, %w3, #8 \n" |
| 577 "movi v23.8b, #255 \n" | 586 "movi v23.8b, #255 \n" |
| 578 MEMACCESS(2) | 587 MEMACCESS(2) |
| 579 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 588 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
| 580 "b.gt 1b \n" | 589 "b.gt 1b \n" |
| 581 : "+r"(src_y), // %0 | 590 : "+r"(src_y), // %0 |
| 582 "+r"(src_uv), // %1 | 591 "+r"(src_uv), // %1 |
| 583 "+r"(dst_argb), // %2 | 592 "+r"(dst_argb), // %2 |
| 584 "+r"(width) // %3 | 593 "+r"(width) // %3 |
| 585 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 594 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 586 [kYToRgb]"r"(&kYToRgb) | 595 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 587 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 596 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 588 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 597 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 589 ); | 598 ); |
| 590 } | 599 } |
| 591 #endif // HAS_NV12TOARGBROW_NEON | 600 #endif // HAS_NV12TOARGBROW_NEON |
| 592 | 601 |
| 593 #ifdef HAS_NV21TOARGBROW_NEON | 602 #ifdef HAS_NV21TOARGBROW_NEON |
| 594 void NV21ToARGBRow_NEON(const uint8* src_y, | 603 void NV21ToARGBRow_NEON(const uint8* src_y, |
| 595 const uint8* src_uv, | 604 const uint8* src_uv, |
| 596 uint8* dst_argb, | 605 uint8* dst_argb, |
| 597 int width) { | 606 int width) { |
| 598 asm volatile ( | 607 asm volatile ( |
| 599 YUV422TORGB_SETUP_REG | 608 YUV422TORGB_SETUP_REG |
| 600 "1: \n" | 609 "1: \n" |
| 601 READNV21 | 610 READNV21 |
| 602 YUV422TORGB(v22, v21, v20) | 611 YUV422TORGB(v22, v21, v20) |
| 603 "subs %w3, %w3, #8 \n" | 612 "subs %w3, %w3, #8 \n" |
| 604 "movi v23.8b, #255 \n" | 613 "movi v23.8b, #255 \n" |
| 605 MEMACCESS(2) | 614 MEMACCESS(2) |
| 606 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" | 615 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" |
| 607 "b.gt 1b \n" | 616 "b.gt 1b \n" |
| 608 : "+r"(src_y), // %0 | 617 : "+r"(src_y), // %0 |
| 609 "+r"(src_uv), // %1 | 618 "+r"(src_uv), // %1 |
| 610 "+r"(dst_argb), // %2 | 619 "+r"(dst_argb), // %2 |
| 611 "+r"(width) // %3 | 620 "+r"(width) // %3 |
| 612 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 621 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 613 [kYToRgb]"r"(&kYToRgb) | 622 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 623 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 624 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 616 ); | 625 ); |
| 617 } | 626 } |
| 618 #endif // HAS_NV21TOARGBROW_NEON | 627 #endif // HAS_NV21TOARGBROW_NEON |
| 619 | 628 |
| 620 #ifdef HAS_NV12TORGB565ROW_NEON | 629 #ifdef HAS_NV12TORGB565ROW_NEON |
| 621 void NV12ToRGB565Row_NEON(const uint8* src_y, | 630 void NV12ToRGB565Row_NEON(const uint8* src_y, |
| 622 const uint8* src_uv, | 631 const uint8* src_uv, |
| 623 uint8* dst_rgb565, | 632 uint8* dst_rgb565, |
| 624 int width) { | 633 int width) { |
| 625 asm volatile ( | 634 asm volatile ( |
| 626 YUV422TORGB_SETUP_REG | 635 YUV422TORGB_SETUP_REG |
| 627 "1: \n" | 636 "1: \n" |
| 628 READNV12 | 637 READNV12 |
| 629 YUV422TORGB(v22, v21, v20) | 638 YUV422TORGB(v22, v21, v20) |
| 630 "subs %w3, %w3, #8 \n" | 639 "subs %w3, %w3, #8 \n" |
| 631 ARGBTORGB565 | 640 ARGBTORGB565 |
| 632 MEMACCESS(2) | 641 MEMACCESS(2) |
| 633 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. | 642 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. |
| 634 "b.gt 1b \n" | 643 "b.gt 1b \n" |
| 635 : "+r"(src_y), // %0 | 644 : "+r"(src_y), // %0 |
| 636 "+r"(src_uv), // %1 | 645 "+r"(src_uv), // %1 |
| 637 "+r"(dst_rgb565), // %2 | 646 "+r"(dst_rgb565), // %2 |
| 638 "+r"(width) // %3 | 647 "+r"(width) // %3 |
| 639 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 648 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 640 [kYToRgb]"r"(&kYToRgb) | 649 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 641 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 650 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 642 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 651 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 643 ); | 652 ); |
| 644 } | 653 } |
| 645 #endif // HAS_NV12TORGB565ROW_NEON | 654 #endif // HAS_NV12TORGB565ROW_NEON |
| 646 | 655 |
| 647 #ifdef HAS_NV21TORGB565ROW_NEON | 656 #ifdef HAS_NV21TORGB565ROW_NEON |
| 648 void NV21ToRGB565Row_NEON(const uint8* src_y, | 657 void NV21ToRGB565Row_NEON(const uint8* src_y, |
| 649 const uint8* src_uv, | 658 const uint8* src_uv, |
| 650 uint8* dst_rgb565, | 659 uint8* dst_rgb565, |
| 651 int width) { | 660 int width) { |
| 652 asm volatile ( | 661 asm volatile ( |
| 653 YUV422TORGB_SETUP_REG | 662 YUV422TORGB_SETUP_REG |
| 654 "1: \n" | 663 "1: \n" |
| 655 READNV21 | 664 READNV21 |
| 656 YUV422TORGB(v22, v21, v20) | 665 YUV422TORGB(v22, v21, v20) |
| 657 "subs %w3, %w3, #8 \n" | 666 "subs %w3, %w3, #8 \n" |
| 658 ARGBTORGB565 | 667 ARGBTORGB565 |
| 659 MEMACCESS(2) | 668 MEMACCESS(2) |
| 660 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. | 669 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. |
| 661 "b.gt 1b \n" | 670 "b.gt 1b \n" |
| 662 : "+r"(src_y), // %0 | 671 : "+r"(src_y), // %0 |
| 663 "+r"(src_uv), // %1 | 672 "+r"(src_uv), // %1 |
| 664 "+r"(dst_rgb565), // %2 | 673 "+r"(dst_rgb565), // %2 |
| 665 "+r"(width) // %3 | 674 "+r"(width) // %3 |
| 666 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 675 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 667 [kYToRgb]"r"(&kYToRgb) | 676 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 668 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 677 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 669 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 678 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 670 ); | 679 ); |
| 671 } | 680 } |
| 672 #endif // HAS_NV21TORGB565ROW_NEON | 681 #endif // HAS_NV21TORGB565ROW_NEON |
| 673 | 682 |
| 674 #ifdef HAS_YUY2TOARGBROW_NEON | 683 #ifdef HAS_YUY2TOARGBROW_NEON |
| 675 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, | 684 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
| 676 uint8* dst_argb, | 685 uint8* dst_argb, |
| 677 int width) { | 686 int width) { |
| 678 int64 width64 = (int64)(width); | 687 int64 width64 = (int64)(width); |
| 679 asm volatile ( | 688 asm volatile ( |
| 680 YUV422TORGB_SETUP_REG | 689 YUV422TORGB_SETUP_REG |
| 681 "1: \n" | 690 "1: \n" |
| 682 READYUY2 | 691 READYUY2 |
| 683 YUV422TORGB(v22, v21, v20) | 692 YUV422TORGB(v22, v21, v20) |
| 684 "subs %w2, %w2, #8 \n" | 693 "subs %w2, %w2, #8 \n" |
| 685 "movi v23.8b, #255 \n" | 694 "movi v23.8b, #255 \n" |
| 686 MEMACCESS(1) | 695 MEMACCESS(1) |
| 687 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" | 696 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" |
| 688 "b.gt 1b \n" | 697 "b.gt 1b \n" |
| 689 : "+r"(src_yuy2), // %0 | 698 : "+r"(src_yuy2), // %0 |
| 690 "+r"(dst_argb), // %1 | 699 "+r"(dst_argb), // %1 |
| 691 "+r"(width64) // %2 | 700 "+r"(width64) // %2 |
| 692 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 701 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 693 [kYToRgb]"r"(&kYToRgb) | 702 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 694 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 703 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 695 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 704 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 696 ); | 705 ); |
| 697 } | 706 } |
| 698 #endif // HAS_YUY2TOARGBROW_NEON | 707 #endif // HAS_YUY2TOARGBROW_NEON |
| 699 | 708 |
| 700 #ifdef HAS_UYVYTOARGBROW_NEON | 709 #ifdef HAS_UYVYTOARGBROW_NEON |
| 701 void UYVYToARGBRow_NEON(const uint8* src_uyvy, | 710 void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
| 702 uint8* dst_argb, | 711 uint8* dst_argb, |
| 703 int width) { | 712 int width) { |
| 704 int64 width64 = (int64)(width); | 713 int64 width64 = (int64)(width); |
| 705 asm volatile ( | 714 asm volatile ( |
| 706 YUV422TORGB_SETUP_REG | 715 YUV422TORGB_SETUP_REG |
| 707 "1: \n" | 716 "1: \n" |
| 708 READUYVY | 717 READUYVY |
| 709 YUV422TORGB(v22, v21, v20) | 718 YUV422TORGB(v22, v21, v20) |
| 710 "subs %w2, %w2, #8 \n" | 719 "subs %w2, %w2, #8 \n" |
| 711 "movi v23.8b, #255 \n" | 720 "movi v23.8b, #255 \n" |
| 712 MEMACCESS(1) | 721 MEMACCESS(1) |
| 713 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" | 722 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" |
| 714 "b.gt 1b \n" | 723 "b.gt 1b \n" |
| 715 : "+r"(src_uyvy), // %0 | 724 : "+r"(src_uyvy), // %0 |
| 716 "+r"(dst_argb), // %1 | 725 "+r"(dst_argb), // %1 |
| 717 "+r"(width64) // %2 | 726 "+r"(width64) // %2 |
| 718 : [kUVBiasBGR]"r"(&kUVBiasBGR), | 727 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), |
| 719 [kYToRgb]"r"(&kYToRgb) | 728 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) |
| 720 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", | 729 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", |
| 721 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" | 730 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" |
| 722 ); | 731 ); |
| 723 } | 732 } |
| 724 #endif // HAS_UYVYTOARGBROW_NEON | 733 #endif // HAS_UYVYTOARGBROW_NEON |
| 725 | 734 |
| 726 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. | 735 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. |
| 727 #ifdef HAS_SPLITUVROW_NEON | 736 #ifdef HAS_SPLITUVROW_NEON |
| 728 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, | 737 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, |
| 729 int width) { | 738 int width) { |
| (...skipping 2348 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3078 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 3087 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
| 3079 ); | 3088 ); |
| 3080 } | 3089 } |
| 3081 #endif // HAS_SOBELYROW_NEON | 3090 #endif // HAS_SOBELYROW_NEON |
| 3082 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 3091 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
| 3083 | 3092 |
| 3084 #ifdef __cplusplus | 3093 #ifdef __cplusplus |
| 3085 } // extern "C" | 3094 } // extern "C" |
| 3086 } // namespace libyuv | 3095 } // namespace libyuv |
| 3087 #endif | 3096 #endif |
| OLD | NEW |