Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: source/scale_neon.cc

Issue 2084533006: YUV scale filter columns improved filtering accuracy (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: bump version Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/scale_gcc.cc ('k') | source/scale_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 554 matching lines...) Expand 10 before | Expand all | Expand 10 after
565 565
566 // TODO(Yang Zhang): Investigate less load instructions for 566 // TODO(Yang Zhang): Investigate less load instructions for
567 // the x/dx stepping 567 // the x/dx stepping
568 #define LOAD2_DATA8_LANE(n) \ 568 #define LOAD2_DATA8_LANE(n) \
569 "lsr %5, %3, #16 \n" \ 569 "lsr %5, %3, #16 \n" \
570 "add %6, %1, %5 \n" \ 570 "add %6, %1, %5 \n" \
571 "add %3, %3, %4 \n" \ 571 "add %3, %3, %4 \n" \
572 MEMACCESS(6) \ 572 MEMACCESS(6) \
573 "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n" 573 "vld2.8 {d6["#n"], d7["#n"]}, [%6] \n"
574 574
575 // The NEON version mimics this formula:
576 // #define BLENDER(a, b, f) (uint8)((int)(a) +
577 // ((int)(f) * ((int)(b) - (int)(a)) >> 16))
578
575 void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr, 579 void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
576 int dst_width, int x, int dx) { 580 int dst_width, int x, int dx) {
577 int dx_offset[4] = {0, 1, 2, 3}; 581 int dx_offset[4] = {0, 1, 2, 3};
578 int* tmp = dx_offset; 582 int* tmp = dx_offset;
579 const uint8* src_tmp = src_ptr; 583 const uint8* src_tmp = src_ptr;
580 asm volatile ( 584 asm volatile (
581 "vdup.32 q0, %3 \n" // x 585 "vdup.32 q0, %3 \n" // x
582 "vdup.32 q1, %4 \n" // dx 586 "vdup.32 q1, %4 \n" // dx
583 "vld1.32 {q2}, [%5] \n" // 0 1 2 3 587 "vld1.32 {q2}, [%5] \n" // 0 1 2 3
584 "vshl.i32 q3, q1, #2 \n" // 4 * dx 588 "vshl.i32 q3, q1, #2 \n" // 4 * dx
(...skipping 423 matching lines...) Expand 10 before | Expand all | Expand 10 after
1008 } 1012 }
1009 1013
1010 #undef LOAD2_DATA32_LANE 1014 #undef LOAD2_DATA32_LANE
1011 1015
1012 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 1016 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
1013 1017
1014 #ifdef __cplusplus 1018 #ifdef __cplusplus
1015 } // extern "C" 1019 } // extern "C"
1016 } // namespace libyuv 1020 } // namespace libyuv
1017 #endif 1021 #endif
OLDNEW
« no previous file with comments | « source/scale_gcc.cc ('k') | source/scale_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698