OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 554 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
565 | 565 |
566 // TODO(Yang Zhang): Investigate less load instructions for | 566 // TODO(Yang Zhang): Investigate less load instructions for |
567 // the x/dx stepping | 567 // the x/dx stepping |
568 #define LOAD2_DATA8_LANE(n) \ | 568 #define LOAD2_DATA8_LANE(n) \ |
569 "lsr %5, %3, #16 \n" \ | 569 "lsr %5, %3, #16 \n" \ |
570 "add %6, %1, %5 \n" \ | 570 "add %6, %1, %5 \n" \ |
571 "add %3, %3, %4 \n" \ | 571 "add %3, %3, %4 \n" \ |
572 MEMACCESS(6) \ | 572 MEMACCESS(6) \ |
573 "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n" | 573 "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n" |
574 | 574 |
575 // The NEON version mimics this formula: | 575 // The NEON version mimics this formula (from row_common.cc): |
576 // #define BLENDER(a, b, f) (uint8)((int)(a) + | 576 // #define BLENDER(a, b, f) (uint8)((int)(a) + \ |
577 // ((int)(f) * ((int)(b) - (int)(a)) >> 16)) | 577 // ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) |
578 | 578 |
579 void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, | 579 void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, |
580 int dst_width, int x, int dx) { | 580 int dst_width, int x, int dx) { |
581 int dx_offset[4] = {0, 1, 2, 3}; | 581 int dx_offset[4] = {0, 1, 2, 3}; |
582 int* tmp = dx_offset; | 582 int* tmp = dx_offset; |
583 const uint8* src_tmp = src_ptr; | 583 const uint8* src_tmp = src_ptr; |
584 asm volatile ( | 584 asm volatile ( |
585 "vdup.32 q0, %3 \n" // x | 585 "vdup.32 q0, %3 \n" // x |
586 "vdup.32 q1, %4 \n" // dx | 586 "vdup.32 q1, %4 \n" // dx |
587 "vld1.32 {q2}, [%5] \n" // 0 1 2 3 | 587 "vld1.32 {q2}, [%5] \n" // 0 1 2 3 |
(...skipping 424 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1012 } | 1012 } |
1013 | 1013 |
1014 #undef LOAD2_DATA32_LANE | 1014 #undef LOAD2_DATA32_LANE |
1015 | 1015 |
1016 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 1016 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
1017 | 1017 |
1018 #ifdef __cplusplus | 1018 #ifdef __cplusplus |
1019 } // extern "C" | 1019 } // extern "C" |
1020 } // namespace libyuv | 1020 } // namespace libyuv |
1021 #endif | 1021 #endif |
OLD | NEW |