Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(130)

Side by Side Diff: source/row_neon64.cc

Issue 1535833003: avx2 interpolate use 8 bit (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: gcc version of interpolate Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 2318 matching lines...) Expand 10 before | Expand all | Expand 10 after
2329 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" 2329 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"
2330 ); 2330 );
2331 } 2331 }
2332 #endif // HAS_RAWTOYROW_NEON 2332 #endif // HAS_RAWTOYROW_NEON
2333 2333
2334 // Bilinear filter 16x2 -> 16x1 2334 // Bilinear filter 16x2 -> 16x1
2335 #ifdef HAS_INTERPOLATEROW_NEON 2335 #ifdef HAS_INTERPOLATEROW_NEON
2336 void InterpolateRow_NEON(uint8* dst_ptr, 2336 void InterpolateRow_NEON(uint8* dst_ptr,
2337 const uint8* src_ptr, ptrdiff_t src_stride, 2337 const uint8* src_ptr, ptrdiff_t src_stride,
2338 int dst_width, int source_y_fraction) { 2338 int dst_width, int source_y_fraction) {
2339 int y1_fraction = source_y_fraction >> 1; 2339 int y1_fraction = source_y_fraction;
2340 int y0_fraction = 128 - y1_fraction; 2340 int y0_fraction = 256 - y1_fraction;
2341 const uint8* src_ptr1 = src_ptr + src_stride; 2341 const uint8* src_ptr1 = src_ptr + src_stride;
2342 asm volatile ( 2342 asm volatile (
2343 "cmp %w4, #0 \n" 2343 "cmp %w4, #0 \n"
2344 "b.eq 100f \n" 2344 "b.eq 100f \n"
2345 "cmp %w4, #64 \n" 2345 "cmp %w4, #128 \n"
2346 "b.eq 50f \n" 2346 "b.eq 50f \n"
2347 2347
2348 "dup v5.16b, %w4 \n" 2348 "dup v5.16b, %w4 \n"
2349 "dup v4.16b, %w5 \n" 2349 "dup v4.16b, %w5 \n"
2350 // General purpose row blend. 2350 // General purpose row blend.
2351 "1: \n" 2351 "1: \n"
2352 MEMACCESS(1) 2352 MEMACCESS(1)
2353 "ld1 {v0.16b}, [%1], #16 \n" 2353 "ld1 {v0.16b}, [%1], #16 \n"
2354 MEMACCESS(2) 2354 MEMACCESS(2)
2355 "ld1 {v1.16b}, [%2], #16 \n" 2355 "ld1 {v1.16b}, [%2], #16 \n"
2356 "subs %w3, %w3, #16 \n" 2356 "subs %w3, %w3, #16 \n"
2357 "umull v2.8h, v0.8b, v4.8b \n" 2357 "umull v2.8h, v0.8b, v4.8b \n"
2358 "umull2 v3.8h, v0.16b, v4.16b \n" 2358 "umull2 v3.8h, v0.16b, v4.16b \n"
2359 "umlal v2.8h, v1.8b, v5.8b \n" 2359 "umlal v2.8h, v1.8b, v5.8b \n"
2360 "umlal2 v3.8h, v1.16b, v5.16b \n" 2360 "umlal2 v3.8h, v1.16b, v5.16b \n"
2361 "rshrn v0.8b, v2.8h, #7 \n" 2361 "rshrn v0.8b, v2.8h, #8 \n"
2362 "rshrn2 v0.16b, v3.8h, #7 \n" 2362 "rshrn2 v0.16b, v3.8h, #8 \n"
2363 MEMACCESS(0) 2363 MEMACCESS(0)
2364 "st1 {v0.16b}, [%0], #16 \n" 2364 "st1 {v0.16b}, [%0], #16 \n"
2365 "b.gt 1b \n" 2365 "b.gt 1b \n"
2366 "b 99f \n" 2366 "b 99f \n"
2367 2367
2368 // Blend 50 / 50. 2368 // Blend 50 / 50.
2369 "50: \n" 2369 "50: \n"
2370 MEMACCESS(1) 2370 MEMACCESS(1)
2371 "ld1 {v0.16b}, [%1], #16 \n" 2371 "ld1 {v0.16b}, [%1], #16 \n"
2372 MEMACCESS(2) 2372 MEMACCESS(2)
(...skipping 623 matching lines...) Expand 10 before | Expand all | Expand 10 after
2996 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 2996 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
2997 ); 2997 );
2998 } 2998 }
2999 #endif // HAS_SOBELYROW_NEON 2999 #endif // HAS_SOBELYROW_NEON
3000 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 3000 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
3001 3001
3002 #ifdef __cplusplus 3002 #ifdef __cplusplus
3003 } // extern "C" 3003 } // extern "C"
3004 } // namespace libyuv 3004 } // namespace libyuv
3005 #endif 3005 #endif
OLDNEW
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698