| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 #include "libyuv/rotate_row.h" | 12 #include "libyuv/rotate_row.h" |
| 13 | 13 |
| 14 #include "libyuv/basic_types.h" | 14 #include "libyuv/basic_types.h" |
| 15 | 15 |
| 16 #ifdef __cplusplus | 16 #ifdef __cplusplus |
| 17 namespace libyuv { | 17 namespace libyuv { |
| 18 extern "C" { | 18 extern "C" { |
| 19 #endif | 19 #endif |
| 20 | 20 |
| 21 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ | 21 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ |
| 22 !defined(__aarch64__) | 22 !defined(__aarch64__) |
| 23 | 23 |
| 24 static uvec8 kVTbl4x4Transpose = | 24 static uvec8 kVTbl4x4Transpose = |
| 25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; | 25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; |
| 26 | 26 |
| 27 void TransposeWx8_NEON(const uint8* src, int src_stride, | 27 void TransposeWx8_NEON(const uint8* src, int src_stride, |
| 28 uint8* dst, int dst_stride, | 28 uint8* dst, int dst_stride, |
| 29 int width) { | 29 int width) { |
| 30 const uint8* src_temp = NULL; | 30 const uint8* src_temp; |
| 31 asm volatile ( | 31 asm volatile ( |
| 32 // loops are on blocks of 8. loop will stop when | 32 // loops are on blocks of 8. loop will stop when |
| 33 // counter gets to or below 0. starting the counter | 33 // counter gets to or below 0. starting the counter |
| 34 // at w-8 allow for this | 34 // at w-8 allow for this |
| 35 "sub %5, #8 \n" | 35 "sub %5, #8 \n" |
| 36 | 36 |
| 37 // handle 8x8 blocks. this should be the majority of the plane | 37 // handle 8x8 blocks. this should be the majority of the plane |
| 38 "1: \n" | 38 "1: \n" |
| 39 "mov %0, %1 \n" | 39 "mov %0, %1 \n" |
| 40 | 40 |
| (...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 MEMACCESS(1) | 222 MEMACCESS(1) |
| 223 "vld1.8 {d0[6]}, [%1], %2 \n" | 223 "vld1.8 {d0[6]}, [%1], %2 \n" |
| 224 MEMACCESS(1) | 224 MEMACCESS(1) |
| 225 "vld1.8 {d0[7]}, [%1] \n" | 225 "vld1.8 {d0[7]}, [%1] \n" |
| 226 | 226 |
| 227 MEMACCESS(3) | 227 MEMACCESS(3) |
| 228 "vst1.64 {d0}, [%3] \n" | 228 "vst1.64 {d0}, [%3] \n" |
| 229 | 229 |
| 230 "4: \n" | 230 "4: \n" |
| 231 | 231 |
| 232 : "+r"(src_temp), // %0 | 232 : "=&r"(src_temp), // %0 |
| 233 "+r"(src), // %1 | 233 "+r"(src), // %1 |
| 234 "+r"(src_stride), // %2 | 234 "+r"(src_stride), // %2 |
| 235 "+r"(dst), // %3 | 235 "+r"(dst), // %3 |
| 236 "+r"(dst_stride), // %4 | 236 "+r"(dst_stride), // %4 |
| 237 "+r"(width) // %5 | 237 "+r"(width) // %5 |
| 238 : "r"(&kVTbl4x4Transpose) // %6 | 238 : "r"(&kVTbl4x4Transpose) // %6 |
| 239 : "memory", "cc", "q0", "q1", "q2", "q3" | 239 : "memory", "cc", "q0", "q1", "q2", "q3" |
| 240 ); | 240 ); |
| 241 } | 241 } |
| 242 | 242 |
| 243 static uvec8 kVTbl4x4TransposeDi = | 243 static uvec8 kVTbl4x4TransposeDi = |
| 244 { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; | 244 { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; |
| 245 | 245 |
| 246 void TransposeUVWx8_NEON(const uint8* src, int src_stride, | 246 void TransposeUVWx8_NEON(const uint8* src, int src_stride, |
| 247 uint8* dst_a, int dst_stride_a, | 247 uint8* dst_a, int dst_stride_a, |
| 248 uint8* dst_b, int dst_stride_b, | 248 uint8* dst_b, int dst_stride_b, |
| 249 int width) { | 249 int width) { |
| 250 const uint8* src_temp = NULL; | 250 const uint8* src_temp; |
| 251 asm volatile ( | 251 asm volatile ( |
| 252 // loops are on blocks of 8. loop will stop when | 252 // loops are on blocks of 8. loop will stop when |
| 253 // counter gets to or below 0. starting the counter | 253 // counter gets to or below 0. starting the counter |
| 254 // at w-8 allow for this | 254 // at w-8 allow for this |
| 255 "sub %7, #8 \n" | 255 "sub %7, #8 \n" |
| 256 | 256 |
| 257 // handle 8x8 blocks. this should be the majority of the plane | 257 // handle 8x8 blocks. this should be the majority of the plane |
| 258 "1: \n" | 258 "1: \n" |
| 259 "mov %0, %1 \n" | 259 "mov %0, %1 \n" |
| 260 | 260 |
| (...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 505 MEMACCESS(1) | 505 MEMACCESS(1) |
| 506 "vld2.8 {d0[7], d1[7]}, [%1] \n" | 506 "vld2.8 {d0[7], d1[7]}, [%1] \n" |
| 507 | 507 |
| 508 MEMACCESS(3) | 508 MEMACCESS(3) |
| 509 "vst1.64 {d0}, [%3] \n" | 509 "vst1.64 {d0}, [%3] \n" |
| 510 MEMACCESS(5) | 510 MEMACCESS(5) |
| 511 "vst1.64 {d1}, [%5] \n" | 511 "vst1.64 {d1}, [%5] \n" |
| 512 | 512 |
| 513 "4: \n" | 513 "4: \n" |
| 514 | 514 |
| 515 : "+r"(src_temp), // %0 | 515 : "=&r"(src_temp), // %0 |
| 516 "+r"(src), // %1 | 516 "+r"(src), // %1 |
| 517 "+r"(src_stride), // %2 | 517 "+r"(src_stride), // %2 |
| 518 "+r"(dst_a), // %3 | 518 "+r"(dst_a), // %3 |
| 519 "+r"(dst_stride_a), // %4 | 519 "+r"(dst_stride_a), // %4 |
| 520 "+r"(dst_b), // %5 | 520 "+r"(dst_b), // %5 |
| 521 "+r"(dst_stride_b), // %6 | 521 "+r"(dst_stride_b), // %6 |
| 522 "+r"(width) // %7 | 522 "+r"(width) // %7 |
| 523 : "r"(&kVTbl4x4TransposeDi) // %8 | 523 : "r"(&kVTbl4x4TransposeDi) // %8 |
| 524 : "memory", "cc", | 524 : "memory", "cc", |
| 525 "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" | 525 "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" |
| 526 ); | 526 ); |
| 527 } | 527 } |
| 528 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 528 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
| 529 | 529 |
| 530 #ifdef __cplusplus | 530 #ifdef __cplusplus |
| 531 } // extern "C" | 531 } // extern "C" |
| 532 } // namespace libyuv | 532 } // namespace libyuv |
| 533 #endif | 533 #endif |
| OLD | NEW |