| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 131 "vqadd.s16 q9, q9, q10 \n" /* R */ \ | 131 "vqadd.s16 q9, q9, q10 \n" /* R */ \ |
| 132 "vqsub.s16 q0, q0, q3 \n" /* G */ \ | 132 "vqsub.s16 q0, q0, q3 \n" /* G */ \ |
| 133 "vqshrun.s16 d20, q8, #6 \n" /* B */ \ | 133 "vqshrun.s16 d20, q8, #6 \n" /* B */ \ |
| 134 "vqshrun.s16 d22, q9, #6 \n" /* R */ \ | 134 "vqshrun.s16 d22, q9, #6 \n" /* R */ \ |
| 135 "vqshrun.s16 d21, q0, #6 \n" /* G */ | 135 "vqshrun.s16 d21, q0, #6 \n" /* G */ |
| 136 | 136 |
| 137 void I444ToARGBRow_NEON(const uint8* src_y, | 137 void I444ToARGBRow_NEON(const uint8* src_y, |
| 138 const uint8* src_u, | 138 const uint8* src_u, |
| 139 const uint8* src_v, | 139 const uint8* src_v, |
| 140 uint8* dst_argb, | 140 uint8* dst_argb, |
| 141 struct YuvConstants* yuvconstants, | 141 const struct YuvConstants* yuvconstants, |
| 142 int width) { | 142 int width) { |
| 143 asm volatile ( | 143 asm volatile ( |
| 144 YUVTORGB_SETUP | 144 YUVTORGB_SETUP |
| 145 "1: \n" | 145 "1: \n" |
| 146 READYUV444 | 146 READYUV444 |
| 147 YUVTORGB | 147 YUVTORGB |
| 148 "subs %4, %4, #8 \n" | 148 "subs %4, %4, #8 \n" |
| 149 "vmov.u8 d23, #255 \n" | 149 "vmov.u8 d23, #255 \n" |
| 150 MEMACCESS(3) | 150 MEMACCESS(3) |
| 151 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" | 151 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" |
| 152 "bgt 1b \n" | 152 "bgt 1b \n" |
| 153 : "+r"(src_y), // %0 | 153 : "+r"(src_y), // %0 |
| 154 "+r"(src_u), // %1 | 154 "+r"(src_u), // %1 |
| 155 "+r"(src_v), // %2 | 155 "+r"(src_v), // %2 |
| 156 "+r"(dst_argb), // %3 | 156 "+r"(dst_argb), // %3 |
| 157 "+r"(width) // %4 | 157 "+r"(width) // %4 |
| 158 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 158 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 159 [kUVToG]"r"(&yuvconstants->kUVToG), | 159 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 160 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 160 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 161 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 161 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 162 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 162 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 163 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 163 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 164 ); | 164 ); |
| 165 } | 165 } |
| 166 | 166 |
| 167 void I422ToARGBRow_NEON(const uint8* src_y, | 167 void I422ToARGBRow_NEON(const uint8* src_y, |
| 168 const uint8* src_u, | 168 const uint8* src_u, |
| 169 const uint8* src_v, | 169 const uint8* src_v, |
| 170 uint8* dst_argb, | 170 uint8* dst_argb, |
| 171 struct YuvConstants* yuvconstants, | 171 const struct YuvConstants* yuvconstants, |
| 172 int width) { | 172 int width) { |
| 173 asm volatile ( | 173 asm volatile ( |
| 174 YUVTORGB_SETUP | 174 YUVTORGB_SETUP |
| 175 "1: \n" | 175 "1: \n" |
| 176 READYUV422 | 176 READYUV422 |
| 177 YUVTORGB | 177 YUVTORGB |
| 178 "subs %4, %4, #8 \n" | 178 "subs %4, %4, #8 \n" |
| 179 "vmov.u8 d23, #255 \n" | 179 "vmov.u8 d23, #255 \n" |
| 180 MEMACCESS(3) | 180 MEMACCESS(3) |
| 181 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" | 181 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" |
| 182 "bgt 1b \n" | 182 "bgt 1b \n" |
| 183 : "+r"(src_y), // %0 | 183 : "+r"(src_y), // %0 |
| 184 "+r"(src_u), // %1 | 184 "+r"(src_u), // %1 |
| 185 "+r"(src_v), // %2 | 185 "+r"(src_v), // %2 |
| 186 "+r"(dst_argb), // %3 | 186 "+r"(dst_argb), // %3 |
| 187 "+r"(width) // %4 | 187 "+r"(width) // %4 |
| 188 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 188 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 189 [kUVToG]"r"(&yuvconstants->kUVToG), | 189 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 190 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 190 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 191 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 191 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 192 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 192 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 193 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 193 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 194 ); | 194 ); |
| 195 } | 195 } |
| 196 | 196 |
| 197 void I411ToARGBRow_NEON(const uint8* src_y, | 197 void I411ToARGBRow_NEON(const uint8* src_y, |
| 198 const uint8* src_u, | 198 const uint8* src_u, |
| 199 const uint8* src_v, | 199 const uint8* src_v, |
| 200 uint8* dst_argb, | 200 uint8* dst_argb, |
| 201 struct YuvConstants* yuvconstants, | 201 const struct YuvConstants* yuvconstants, |
| 202 int width) { | 202 int width) { |
| 203 asm volatile ( | 203 asm volatile ( |
| 204 YUVTORGB_SETUP | 204 YUVTORGB_SETUP |
| 205 "1: \n" | 205 "1: \n" |
| 206 READYUV411 | 206 READYUV411 |
| 207 YUVTORGB | 207 YUVTORGB |
| 208 "subs %4, %4, #8 \n" | 208 "subs %4, %4, #8 \n" |
| 209 "vmov.u8 d23, #255 \n" | 209 "vmov.u8 d23, #255 \n" |
| 210 MEMACCESS(3) | 210 MEMACCESS(3) |
| 211 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" | 211 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" |
| 212 "bgt 1b \n" | 212 "bgt 1b \n" |
| 213 : "+r"(src_y), // %0 | 213 : "+r"(src_y), // %0 |
| 214 "+r"(src_u), // %1 | 214 "+r"(src_u), // %1 |
| 215 "+r"(src_v), // %2 | 215 "+r"(src_v), // %2 |
| 216 "+r"(dst_argb), // %3 | 216 "+r"(dst_argb), // %3 |
| 217 "+r"(width) // %4 | 217 "+r"(width) // %4 |
| 218 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 218 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 219 [kUVToG]"r"(&yuvconstants->kUVToG), | 219 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 220 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 220 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 221 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 221 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 222 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 222 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 223 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 223 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 224 ); | 224 ); |
| 225 } | 225 } |
| 226 | 226 |
| 227 void I422ToBGRARow_NEON(const uint8* src_y, | 227 void I422ToBGRARow_NEON(const uint8* src_y, |
| 228 const uint8* src_u, | 228 const uint8* src_u, |
| 229 const uint8* src_v, | 229 const uint8* src_v, |
| 230 uint8* dst_bgra, | 230 uint8* dst_bgra, |
| 231 struct YuvConstants* yuvconstants, | 231 const struct YuvConstants* yuvconstants, |
| 232 int width) { | 232 int width) { |
| 233 asm volatile ( | 233 asm volatile ( |
| 234 YUVTORGB_SETUP | 234 YUVTORGB_SETUP |
| 235 "1: \n" | 235 "1: \n" |
| 236 READYUV422 | 236 READYUV422 |
| 237 YUVTORGB | 237 YUVTORGB |
| 238 "subs %4, %4, #8 \n" | 238 "subs %4, %4, #8 \n" |
| 239 "vswp.u8 d20, d22 \n" | 239 "vswp.u8 d20, d22 \n" |
| 240 "vmov.u8 d19, #255 \n" | 240 "vmov.u8 d19, #255 \n" |
| 241 MEMACCESS(3) | 241 MEMACCESS(3) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 252 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 252 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 253 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 253 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 254 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 254 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 255 ); | 255 ); |
| 256 } | 256 } |
| 257 | 257 |
| 258 void I422ToABGRRow_NEON(const uint8* src_y, | 258 void I422ToABGRRow_NEON(const uint8* src_y, |
| 259 const uint8* src_u, | 259 const uint8* src_u, |
| 260 const uint8* src_v, | 260 const uint8* src_v, |
| 261 uint8* dst_abgr, | 261 uint8* dst_abgr, |
| 262 struct YuvConstants* yuvconstants, | 262 const struct YuvConstants* yuvconstants, |
| 263 int width) { | 263 int width) { |
| 264 asm volatile ( | 264 asm volatile ( |
| 265 YUVTORGB_SETUP | 265 YUVTORGB_SETUP |
| 266 "1: \n" | 266 "1: \n" |
| 267 READYUV422 | 267 READYUV422 |
| 268 YUVTORGB | 268 YUVTORGB |
| 269 "subs %4, %4, #8 \n" | 269 "subs %4, %4, #8 \n" |
| 270 "vswp.u8 d20, d22 \n" | 270 "vswp.u8 d20, d22 \n" |
| 271 "vmov.u8 d23, #255 \n" | 271 "vmov.u8 d23, #255 \n" |
| 272 MEMACCESS(3) | 272 MEMACCESS(3) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 283 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 283 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 284 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 284 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 285 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 285 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 286 ); | 286 ); |
| 287 } | 287 } |
| 288 | 288 |
| 289 void I422ToRGBARow_NEON(const uint8* src_y, | 289 void I422ToRGBARow_NEON(const uint8* src_y, |
| 290 const uint8* src_u, | 290 const uint8* src_u, |
| 291 const uint8* src_v, | 291 const uint8* src_v, |
| 292 uint8* dst_rgba, | 292 uint8* dst_rgba, |
| 293 struct YuvConstants* yuvconstants, | 293 const struct YuvConstants* yuvconstants, |
| 294 int width) { | 294 int width) { |
| 295 asm volatile ( | 295 asm volatile ( |
| 296 YUVTORGB_SETUP | 296 YUVTORGB_SETUP |
| 297 "1: \n" | 297 "1: \n" |
| 298 READYUV422 | 298 READYUV422 |
| 299 YUVTORGB | 299 YUVTORGB |
| 300 "subs %4, %4, #8 \n" | 300 "subs %4, %4, #8 \n" |
| 301 "vmov.u8 d19, #255 \n" | 301 "vmov.u8 d19, #255 \n" |
| 302 MEMACCESS(3) | 302 MEMACCESS(3) |
| 303 "vst4.8 {d19, d20, d21, d22}, [%3]! \n" | 303 "vst4.8 {d19, d20, d21, d22}, [%3]! \n" |
| 304 "bgt 1b \n" | 304 "bgt 1b \n" |
| 305 : "+r"(src_y), // %0 | 305 : "+r"(src_y), // %0 |
| 306 "+r"(src_u), // %1 | 306 "+r"(src_u), // %1 |
| 307 "+r"(src_v), // %2 | 307 "+r"(src_v), // %2 |
| 308 "+r"(dst_rgba), // %3 | 308 "+r"(dst_rgba), // %3 |
| 309 "+r"(width) // %4 | 309 "+r"(width) // %4 |
| 310 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 310 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 311 [kUVToG]"r"(&yuvconstants->kUVToG), | 311 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 312 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 312 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 313 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 313 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 314 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 314 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 315 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 315 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 316 ); | 316 ); |
| 317 } | 317 } |
| 318 | 318 |
| 319 void I422ToRGB24Row_NEON(const uint8* src_y, | 319 void I422ToRGB24Row_NEON(const uint8* src_y, |
| 320 const uint8* src_u, | 320 const uint8* src_u, |
| 321 const uint8* src_v, | 321 const uint8* src_v, |
| 322 uint8* dst_rgb24, | 322 uint8* dst_rgb24, |
| 323 struct YuvConstants* yuvconstants, | 323 const struct YuvConstants* yuvconstants, |
| 324 int width) { | 324 int width) { |
| 325 asm volatile ( | 325 asm volatile ( |
| 326 YUVTORGB_SETUP | 326 YUVTORGB_SETUP |
| 327 "1: \n" | 327 "1: \n" |
| 328 READYUV422 | 328 READYUV422 |
| 329 YUVTORGB | 329 YUVTORGB |
| 330 "subs %4, %4, #8 \n" | 330 "subs %4, %4, #8 \n" |
| 331 MEMACCESS(3) | 331 MEMACCESS(3) |
| 332 "vst3.8 {d20, d21, d22}, [%3]! \n" | 332 "vst3.8 {d20, d21, d22}, [%3]! \n" |
| 333 "bgt 1b \n" | 333 "bgt 1b \n" |
| 334 : "+r"(src_y), // %0 | 334 : "+r"(src_y), // %0 |
| 335 "+r"(src_u), // %1 | 335 "+r"(src_u), // %1 |
| 336 "+r"(src_v), // %2 | 336 "+r"(src_v), // %2 |
| 337 "+r"(dst_rgb24), // %3 | 337 "+r"(dst_rgb24), // %3 |
| 338 "+r"(width) // %4 | 338 "+r"(width) // %4 |
| 339 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 339 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 340 [kUVToG]"r"(&yuvconstants->kUVToG), | 340 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 341 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 341 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 342 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 342 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 343 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 343 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 344 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 344 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 345 ); | 345 ); |
| 346 } | 346 } |
| 347 | 347 |
| 348 void I422ToRAWRow_NEON(const uint8* src_y, | 348 void I422ToRAWRow_NEON(const uint8* src_y, |
| 349 const uint8* src_u, | 349 const uint8* src_u, |
| 350 const uint8* src_v, | 350 const uint8* src_v, |
| 351 uint8* dst_raw, | 351 uint8* dst_raw, |
| 352 struct YuvConstants* yuvconstants, | 352 const struct YuvConstants* yuvconstants, |
| 353 int width) { | 353 int width) { |
| 354 asm volatile ( | 354 asm volatile ( |
| 355 YUVTORGB_SETUP | 355 YUVTORGB_SETUP |
| 356 "1: \n" | 356 "1: \n" |
| 357 READYUV422 | 357 READYUV422 |
| 358 YUVTORGB | 358 YUVTORGB |
| 359 "subs %4, %4, #8 \n" | 359 "subs %4, %4, #8 \n" |
| 360 "vswp.u8 d20, d22 \n" | 360 "vswp.u8 d20, d22 \n" |
| 361 MEMACCESS(3) | 361 MEMACCESS(3) |
| 362 "vst3.8 {d20, d21, d22}, [%3]! \n" | 362 "vst3.8 {d20, d21, d22}, [%3]! \n" |
| (...skipping 21 matching lines...) Expand all Loading... |
| 384 "vmovl.u8 q10, d22 \n" /* R */ \ | 384 "vmovl.u8 q10, d22 \n" /* R */ \ |
| 385 "vshl.u16 q9, q9, #5 \n" /* G */ \ | 385 "vshl.u16 q9, q9, #5 \n" /* G */ \ |
| 386 "vshl.u16 q10, q10, #11 \n" /* R */ \ | 386 "vshl.u16 q10, q10, #11 \n" /* R */ \ |
| 387 "vorr q0, q8, q9 \n" /* BG */ \ | 387 "vorr q0, q8, q9 \n" /* BG */ \ |
| 388 "vorr q0, q0, q10 \n" /* BGR */ | 388 "vorr q0, q0, q10 \n" /* BGR */ |
| 389 | 389 |
| 390 void I422ToRGB565Row_NEON(const uint8* src_y, | 390 void I422ToRGB565Row_NEON(const uint8* src_y, |
| 391 const uint8* src_u, | 391 const uint8* src_u, |
| 392 const uint8* src_v, | 392 const uint8* src_v, |
| 393 uint8* dst_rgb565, | 393 uint8* dst_rgb565, |
| 394 struct YuvConstants* yuvconstants, | 394 const struct YuvConstants* yuvconstants, |
| 395 int width) { | 395 int width) { |
| 396 asm volatile ( | 396 asm volatile ( |
| 397 YUVTORGB_SETUP | 397 YUVTORGB_SETUP |
| 398 "1: \n" | 398 "1: \n" |
| 399 READYUV422 | 399 READYUV422 |
| 400 YUVTORGB | 400 YUVTORGB |
| 401 "subs %4, %4, #8 \n" | 401 "subs %4, %4, #8 \n" |
| 402 ARGBTORGB565 | 402 ARGBTORGB565 |
| 403 MEMACCESS(3) | 403 MEMACCESS(3) |
| 404 "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. | 404 "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. |
| (...skipping 24 matching lines...) Expand all Loading... |
| 429 "vshl.u16 q10, q10, #10 \n" /* R */ \ | 429 "vshl.u16 q10, q10, #10 \n" /* R */ \ |
| 430 "vshl.u16 q11, q11, #15 \n" /* A */ \ | 430 "vshl.u16 q11, q11, #15 \n" /* A */ \ |
| 431 "vorr q0, q8, q9 \n" /* BG */ \ | 431 "vorr q0, q8, q9 \n" /* BG */ \ |
| 432 "vorr q1, q10, q11 \n" /* RA */ \ | 432 "vorr q1, q10, q11 \n" /* RA */ \ |
| 433 "vorr q0, q0, q1 \n" /* BGRA */ | 433 "vorr q0, q0, q1 \n" /* BGRA */ |
| 434 | 434 |
| 435 void I422ToARGB1555Row_NEON(const uint8* src_y, | 435 void I422ToARGB1555Row_NEON(const uint8* src_y, |
| 436 const uint8* src_u, | 436 const uint8* src_u, |
| 437 const uint8* src_v, | 437 const uint8* src_v, |
| 438 uint8* dst_argb1555, | 438 uint8* dst_argb1555, |
| 439 struct YuvConstants* yuvconstants, | 439 const struct YuvConstants* yuvconstants, |
| 440 int width) { | 440 int width) { |
| 441 asm volatile ( | 441 asm volatile ( |
| 442 YUVTORGB_SETUP | 442 YUVTORGB_SETUP |
| 443 "1: \n" | 443 "1: \n" |
| 444 READYUV422 | 444 READYUV422 |
| 445 YUVTORGB | 445 YUVTORGB |
| 446 "subs %4, %4, #8 \n" | 446 "subs %4, %4, #8 \n" |
| 447 "vmov.u8 d23, #255 \n" | 447 "vmov.u8 d23, #255 \n" |
| 448 ARGBTOARGB1555 | 448 ARGBTOARGB1555 |
| 449 MEMACCESS(3) | 449 MEMACCESS(3) |
| (...skipping 19 matching lines...) Expand all Loading... |
| 469 "vshr.u8 d22, d22, #4 \n" /* R */ \ | 469 "vshr.u8 d22, d22, #4 \n" /* R */ \ |
| 470 "vbic.32 d23, d23, d4 \n" /* A */ \ | 470 "vbic.32 d23, d23, d4 \n" /* A */ \ |
| 471 "vorr d0, d20, d21 \n" /* BG */ \ | 471 "vorr d0, d20, d21 \n" /* BG */ \ |
| 472 "vorr d1, d22, d23 \n" /* RA */ \ | 472 "vorr d1, d22, d23 \n" /* RA */ \ |
| 473 "vzip.u8 d0, d1 \n" /* BGRA */ | 473 "vzip.u8 d0, d1 \n" /* BGRA */ |
| 474 | 474 |
| 475 void I422ToARGB4444Row_NEON(const uint8* src_y, | 475 void I422ToARGB4444Row_NEON(const uint8* src_y, |
| 476 const uint8* src_u, | 476 const uint8* src_u, |
| 477 const uint8* src_v, | 477 const uint8* src_v, |
| 478 uint8* dst_argb4444, | 478 uint8* dst_argb4444, |
| 479 struct YuvConstants* yuvconstants, | 479 const struct YuvConstants* yuvconstants, |
| 480 int width) { | 480 int width) { |
| 481 asm volatile ( | 481 asm volatile ( |
| 482 YUVTORGB_SETUP | 482 YUVTORGB_SETUP |
| 483 "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. | 483 "vmov.u8 d4, #0x0f \n" // bits to clear with vbic. |
| 484 "1: \n" | 484 "1: \n" |
| 485 READYUV422 | 485 READYUV422 |
| 486 YUVTORGB | 486 YUVTORGB |
| 487 "subs %4, %4, #8 \n" | 487 "subs %4, %4, #8 \n" |
| 488 "vmov.u8 d23, #255 \n" | 488 "vmov.u8 d23, #255 \n" |
| 489 ARGBTOARGB4444 | 489 ARGBTOARGB4444 |
| (...skipping 23 matching lines...) Expand all Loading... |
| 513 READYUV400 | 513 READYUV400 |
| 514 YUVTORGB | 514 YUVTORGB |
| 515 "subs %2, %2, #8 \n" | 515 "subs %2, %2, #8 \n" |
| 516 "vmov.u8 d23, #255 \n" | 516 "vmov.u8 d23, #255 \n" |
| 517 MEMACCESS(1) | 517 MEMACCESS(1) |
| 518 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" | 518 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" |
| 519 "bgt 1b \n" | 519 "bgt 1b \n" |
| 520 : "+r"(src_y), // %0 | 520 : "+r"(src_y), // %0 |
| 521 "+r"(dst_argb), // %1 | 521 "+r"(dst_argb), // %1 |
| 522 "+r"(width) // %2 | 522 "+r"(width) // %2 |
| 523 : [kUVToRB]"r"(&kYuvConstants.kUVToRB), | 523 : [kUVToRB]"r"(&kYuvIConstants.kUVToRB), |
| 524 [kUVToG]"r"(&kYuvConstants.kUVToG), | 524 [kUVToG]"r"(&kYuvIConstants.kUVToG), |
| 525 [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR), | 525 [kUVBiasBGR]"r"(&kYuvIConstants.kUVBiasBGR), |
| 526 [kYToRgb]"r"(&kYuvConstants.kYToRgb) | 526 [kYToRgb]"r"(&kYuvIConstants.kYToRgb) |
| 527 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 527 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 528 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 528 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 529 ); | 529 ); |
| 530 } | 530 } |
| 531 | 531 |
| 532 void J400ToARGBRow_NEON(const uint8* src_y, | 532 void J400ToARGBRow_NEON(const uint8* src_y, |
| 533 uint8* dst_argb, | 533 uint8* dst_argb, |
| 534 int width) { | 534 int width) { |
| 535 asm volatile ( | 535 asm volatile ( |
| 536 "vmov.u8 d23, #255 \n" | 536 "vmov.u8 d23, #255 \n" |
| (...skipping 10 matching lines...) Expand all Loading... |
| 547 "+r"(dst_argb), // %1 | 547 "+r"(dst_argb), // %1 |
| 548 "+r"(width) // %2 | 548 "+r"(width) // %2 |
| 549 : | 549 : |
| 550 : "cc", "memory", "d20", "d21", "d22", "d23" | 550 : "cc", "memory", "d20", "d21", "d22", "d23" |
| 551 ); | 551 ); |
| 552 } | 552 } |
| 553 | 553 |
| 554 void NV12ToARGBRow_NEON(const uint8* src_y, | 554 void NV12ToARGBRow_NEON(const uint8* src_y, |
| 555 const uint8* src_uv, | 555 const uint8* src_uv, |
| 556 uint8* dst_argb, | 556 uint8* dst_argb, |
| 557 struct YuvConstants* yuvconstants, | 557 const struct YuvConstants* yuvconstants, |
| 558 int width) { | 558 int width) { |
| 559 asm volatile ( | 559 asm volatile ( |
| 560 YUVTORGB_SETUP | 560 YUVTORGB_SETUP |
| 561 "1: \n" | 561 "1: \n" |
| 562 READNV12 | 562 READNV12 |
| 563 YUVTORGB | 563 YUVTORGB |
| 564 "subs %3, %3, #8 \n" | 564 "subs %3, %3, #8 \n" |
| 565 "vmov.u8 d23, #255 \n" | 565 "vmov.u8 d23, #255 \n" |
| 566 MEMACCESS(2) | 566 MEMACCESS(2) |
| 567 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" | 567 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" |
| 568 "bgt 1b \n" | 568 "bgt 1b \n" |
| 569 : "+r"(src_y), // %0 | 569 : "+r"(src_y), // %0 |
| 570 "+r"(src_uv), // %1 | 570 "+r"(src_uv), // %1 |
| 571 "+r"(dst_argb), // %2 | 571 "+r"(dst_argb), // %2 |
| 572 "+r"(width) // %3 | 572 "+r"(width) // %3 |
| 573 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 573 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 574 [kUVToG]"r"(&yuvconstants->kUVToG), | 574 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 575 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 575 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 576 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 576 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 577 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 577 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 578 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 578 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 579 ); | 579 ); |
| 580 } | 580 } |
| 581 | 581 |
| 582 void NV21ToARGBRow_NEON(const uint8* src_y, | 582 void NV21ToARGBRow_NEON(const uint8* src_y, |
| 583 const uint8* src_vu, | 583 const uint8* src_vu, |
| 584 uint8* dst_argb, | 584 uint8* dst_argb, |
| 585 struct YuvConstants* yuvconstants, | 585 const struct YuvConstants* yuvconstants, |
| 586 int width) { | 586 int width) { |
| 587 asm volatile ( | 587 asm volatile ( |
| 588 YUVTORGB_SETUP | 588 YUVTORGB_SETUP |
| 589 "1: \n" | 589 "1: \n" |
| 590 READNV21 | 590 READNV21 |
| 591 YUVTORGB | 591 YUVTORGB |
| 592 "subs %3, %3, #8 \n" | 592 "subs %3, %3, #8 \n" |
| 593 "vmov.u8 d23, #255 \n" | 593 "vmov.u8 d23, #255 \n" |
| 594 MEMACCESS(2) | 594 MEMACCESS(2) |
| 595 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" | 595 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" |
| 596 "bgt 1b \n" | 596 "bgt 1b \n" |
| 597 : "+r"(src_y), // %0 | 597 : "+r"(src_y), // %0 |
| 598 "+r"(src_vu), // %1 | 598 "+r"(src_vu), // %1 |
| 599 "+r"(dst_argb), // %2 | 599 "+r"(dst_argb), // %2 |
| 600 "+r"(width) // %3 | 600 "+r"(width) // %3 |
| 601 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 601 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 602 [kUVToG]"r"(&yuvconstants->kUVToG), | 602 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 603 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 603 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 604 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 604 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 605 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 605 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 606 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 606 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 607 ); | 607 ); |
| 608 } | 608 } |
| 609 | 609 |
| 610 void NV12ToRGB565Row_NEON(const uint8* src_y, | 610 void NV12ToRGB565Row_NEON(const uint8* src_y, |
| 611 const uint8* src_uv, | 611 const uint8* src_uv, |
| 612 uint8* dst_rgb565, | 612 uint8* dst_rgb565, |
| 613 struct YuvConstants* yuvconstants, | 613 const struct YuvConstants* yuvconstants, |
| 614 int width) { | 614 int width) { |
| 615 asm volatile ( | 615 asm volatile ( |
| 616 YUVTORGB_SETUP | 616 YUVTORGB_SETUP |
| 617 "1: \n" | 617 "1: \n" |
| 618 READNV12 | 618 READNV12 |
| 619 YUVTORGB | 619 YUVTORGB |
| 620 "subs %3, %3, #8 \n" | 620 "subs %3, %3, #8 \n" |
| 621 ARGBTORGB565 | 621 ARGBTORGB565 |
| 622 MEMACCESS(2) | 622 MEMACCESS(2) |
| 623 "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. | 623 "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. |
| 624 "bgt 1b \n" | 624 "bgt 1b \n" |
| 625 : "+r"(src_y), // %0 | 625 : "+r"(src_y), // %0 |
| 626 "+r"(src_uv), // %1 | 626 "+r"(src_uv), // %1 |
| 627 "+r"(dst_rgb565), // %2 | 627 "+r"(dst_rgb565), // %2 |
| 628 "+r"(width) // %3 | 628 "+r"(width) // %3 |
| 629 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 629 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 630 [kUVToG]"r"(&yuvconstants->kUVToG), | 630 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 631 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 631 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 632 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 632 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 633 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 633 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 634 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 634 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 635 ); | 635 ); |
| 636 } | 636 } |
| 637 | 637 |
| 638 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, | 638 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, |
| 639 uint8* dst_argb, | 639 uint8* dst_argb, |
| 640 struct YuvConstants* yuvconstants, | 640 const struct YuvConstants* yuvconstants, |
| 641 int width) { | 641 int width) { |
| 642 asm volatile ( | 642 asm volatile ( |
| 643 YUVTORGB_SETUP | 643 YUVTORGB_SETUP |
| 644 "1: \n" | 644 "1: \n" |
| 645 READYUY2 | 645 READYUY2 |
| 646 YUVTORGB | 646 YUVTORGB |
| 647 "subs %2, %2, #8 \n" | 647 "subs %2, %2, #8 \n" |
| 648 "vmov.u8 d23, #255 \n" | 648 "vmov.u8 d23, #255 \n" |
| 649 MEMACCESS(1) | 649 MEMACCESS(1) |
| 650 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" | 650 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" |
| 651 "bgt 1b \n" | 651 "bgt 1b \n" |
| 652 : "+r"(src_yuy2), // %0 | 652 : "+r"(src_yuy2), // %0 |
| 653 "+r"(dst_argb), // %1 | 653 "+r"(dst_argb), // %1 |
| 654 "+r"(width) // %2 | 654 "+r"(width) // %2 |
| 655 : [kUVToRB]"r"(&yuvconstants->kUVToRB), | 655 : [kUVToRB]"r"(&yuvconstants->kUVToRB), |
| 656 [kUVToG]"r"(&yuvconstants->kUVToG), | 656 [kUVToG]"r"(&yuvconstants->kUVToG), |
| 657 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), | 657 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), |
| 658 [kYToRgb]"r"(&yuvconstants->kYToRgb) | 658 [kYToRgb]"r"(&yuvconstants->kYToRgb) |
| 659 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", | 659 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", |
| 660 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | 660 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" |
| 661 ); | 661 ); |
| 662 } | 662 } |
| 663 | 663 |
| 664 void UYVYToARGBRow_NEON(const uint8* src_uyvy, | 664 void UYVYToARGBRow_NEON(const uint8* src_uyvy, |
| 665 uint8* dst_argb, | 665 uint8* dst_argb, |
| 666 struct YuvConstants* yuvconstants, | 666 const struct YuvConstants* yuvconstants, |
| 667 int width) { | 667 int width) { |
| 668 asm volatile ( | 668 asm volatile ( |
| 669 YUVTORGB_SETUP | 669 YUVTORGB_SETUP |
| 670 "1: \n" | 670 "1: \n" |
| 671 READUYVY | 671 READUYVY |
| 672 YUVTORGB | 672 YUVTORGB |
| 673 "subs %2, %2, #8 \n" | 673 "subs %2, %2, #8 \n" |
| 674 "vmov.u8 d23, #255 \n" | 674 "vmov.u8 d23, #255 \n" |
| 675 MEMACCESS(1) | 675 MEMACCESS(1) |
| 676 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" | 676 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" |
| (...skipping 2274 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2951 "r"(6) // %5 | 2951 "r"(6) // %5 |
| 2952 : "cc", "memory", "q0", "q1" // Clobber List | 2952 : "cc", "memory", "q0", "q1" // Clobber List |
| 2953 ); | 2953 ); |
| 2954 } | 2954 } |
| 2955 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2955 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
| 2956 | 2956 |
| 2957 #ifdef __cplusplus | 2957 #ifdef __cplusplus |
| 2958 } // extern "C" | 2958 } // extern "C" |
| 2959 } // namespace libyuv | 2959 } // namespace libyuv |
| 2960 #endif | 2960 #endif |
| OLD | NEW |