Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Side by Side Diff: source/row_neon.cc

Issue 1535833003: avx2 interpolate use 8 bit (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: gcc version of interpolate Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 2241 matching lines...) Expand 10 before | Expand all | Expand 10 after
2252 "+r"(width) // %2 2252 "+r"(width) // %2
2253 : 2253 :
2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2255 ); 2255 );
2256 } 2256 }
2257 2257
2258 // Bilinear filter 16x2 -> 16x1 2258 // Bilinear filter 16x2 -> 16x1
2259 void InterpolateRow_NEON(uint8* dst_ptr, 2259 void InterpolateRow_NEON(uint8* dst_ptr,
2260 const uint8* src_ptr, ptrdiff_t src_stride, 2260 const uint8* src_ptr, ptrdiff_t src_stride,
2261 int dst_width, int source_y_fraction) { 2261 int dst_width, int source_y_fraction) {
2262 int y1_fraction = source_y_fraction >> 1; 2262 int y1_fraction = source_y_fraction;
2263 asm volatile ( 2263 asm volatile (
2264 "cmp %4, #0 \n" 2264 "cmp %4, #0 \n"
2265 "beq 100f \n" 2265 "beq 100f \n"
2266 "add %2, %1 \n" 2266 "add %2, %1 \n"
2267 "cmp %4, #64 \n" 2267 "cmp %4, #128 \n"
2268 "beq 50f \n" 2268 "beq 50f \n"
2269 2269
2270 "vdup.8 d5, %4 \n" 2270 "vdup.8 d5, %4 \n"
2271 "rsb %4, #128 \n" 2271 "rsb %4, #256 \n"
2272 "vdup.8 d4, %4 \n" 2272 "vdup.8 d4, %4 \n"
2273 // General purpose row blend. 2273 // General purpose row blend.
2274 "1: \n" 2274 "1: \n"
2275 MEMACCESS(1) 2275 MEMACCESS(1)
2276 "vld1.8 {q0}, [%1]! \n" 2276 "vld1.8 {q0}, [%1]! \n"
2277 MEMACCESS(2) 2277 MEMACCESS(2)
2278 "vld1.8 {q1}, [%2]! \n" 2278 "vld1.8 {q1}, [%2]! \n"
2279 "subs %3, %3, #16 \n" 2279 "subs %3, %3, #16 \n"
2280 "vmull.u8 q13, d0, d4 \n" 2280 "vmull.u8 q13, d0, d4 \n"
2281 "vmull.u8 q14, d1, d4 \n" 2281 "vmull.u8 q14, d1, d4 \n"
2282 "vmlal.u8 q13, d2, d5 \n" 2282 "vmlal.u8 q13, d2, d5 \n"
2283 "vmlal.u8 q14, d3, d5 \n" 2283 "vmlal.u8 q14, d3, d5 \n"
2284 "vrshrn.u16 d0, q13, #7 \n" 2284 "vrshrn.u16 d0, q13, #8 \n"
2285 "vrshrn.u16 d1, q14, #7 \n" 2285 "vrshrn.u16 d1, q14, #8 \n"
2286 MEMACCESS(0) 2286 MEMACCESS(0)
2287 "vst1.8 {q0}, [%0]! \n" 2287 "vst1.8 {q0}, [%0]! \n"
2288 "bgt 1b \n" 2288 "bgt 1b \n"
2289 "b 99f \n" 2289 "b 99f \n"
2290 2290
2291 // Blend 50 / 50. 2291 // Blend 50 / 50.
2292 "50: \n" 2292 "50: \n"
2293 MEMACCESS(1) 2293 MEMACCESS(1)
2294 "vld1.8 {q0}, [%1]! \n" 2294 "vld1.8 {q0}, [%1]! \n"
2295 MEMACCESS(2) 2295 MEMACCESS(2)
(...skipping 583 matching lines...) Expand 10 before | Expand all | Expand 10 after
2879 "r"(6) // %5 2879 "r"(6) // %5
2880 : "cc", "memory", "q0", "q1" // Clobber List 2880 : "cc", "memory", "q0", "q1" // Clobber List
2881 ); 2881 );
2882 } 2882 }
2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
2884 2884
2885 #ifdef __cplusplus 2885 #ifdef __cplusplus
2886 } // extern "C" 2886 } // extern "C"
2887 } // namespace libyuv 2887 } // namespace libyuv
2888 #endif 2888 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698