OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2318 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2329 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" | 2329 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16" |
2330 ); | 2330 ); |
2331 } | 2331 } |
2332 #endif // HAS_RAWTOYROW_NEON | 2332 #endif // HAS_RAWTOYROW_NEON |
2333 | 2333 |
2334 // Bilinear filter 16x2 -> 16x1 | 2334 // Bilinear filter 16x2 -> 16x1 |
2335 #ifdef HAS_INTERPOLATEROW_NEON | 2335 #ifdef HAS_INTERPOLATEROW_NEON |
2336 void InterpolateRow_NEON(uint8* dst_ptr, | 2336 void InterpolateRow_NEON(uint8* dst_ptr, |
2337 const uint8* src_ptr, ptrdiff_t src_stride, | 2337 const uint8* src_ptr, ptrdiff_t src_stride, |
2338 int dst_width, int source_y_fraction) { | 2338 int dst_width, int source_y_fraction) { |
2339 int y1_fraction = source_y_fraction; | 2339 int y1_fraction = source_y_fraction >> 1; |
2340 int y0_fraction = 256 - y1_fraction; | 2340 int y0_fraction = 128 - y1_fraction; |
2341 const uint8* src_ptr1 = src_ptr + src_stride; | 2341 const uint8* src_ptr1 = src_ptr + src_stride; |
2342 asm volatile ( | 2342 asm volatile ( |
2343 "cmp %w4, #0 \n" | 2343 "cmp %w4, #0 \n" |
2344 "b.eq 100f \n" | 2344 "b.eq 100f \n" |
2345 "cmp %w4, #64 \n" | 2345 "cmp %w4, #64 \n" |
2346 "b.eq 75f \n" | |
2347 "cmp %w4, #128 \n" | |
2348 "b.eq 50f \n" | 2346 "b.eq 50f \n" |
2349 "cmp %w4, #192 \n" | |
2350 "b.eq 25f \n" | |
2351 | 2347 |
2352 "dup v5.16b, %w4 \n" | 2348 "dup v5.16b, %w4 \n" |
2353 "dup v4.16b, %w5 \n" | 2349 "dup v4.16b, %w5 \n" |
2354 // General purpose row blend. | 2350 // General purpose row blend. |
2355 "1: \n" | 2351 "1: \n" |
2356 MEMACCESS(1) | 2352 MEMACCESS(1) |
2357 "ld1 {v0.16b}, [%1], #16 \n" | 2353 "ld1 {v0.16b}, [%1], #16 \n" |
2358 MEMACCESS(2) | 2354 MEMACCESS(2) |
2359 "ld1 {v1.16b}, [%2], #16 \n" | 2355 "ld1 {v1.16b}, [%2], #16 \n" |
2360 "subs %w3, %w3, #16 \n" | 2356 "subs %w3, %w3, #16 \n" |
2361 "umull v2.8h, v0.8b, v4.8b \n" | 2357 "umull v2.8h, v0.8b, v4.8b \n" |
2362 "umull2 v3.8h, v0.16b, v4.16b \n" | 2358 "umull2 v3.8h, v0.16b, v4.16b \n" |
2363 "umlal v2.8h, v1.8b, v5.8b \n" | 2359 "umlal v2.8h, v1.8b, v5.8b \n" |
2364 "umlal2 v3.8h, v1.16b, v5.16b \n" | 2360 "umlal2 v3.8h, v1.16b, v5.16b \n" |
2365 "rshrn v0.8b, v2.8h, #8 \n" | 2361 "rshrn v0.8b, v2.8h, #7 \n" |
2366 "rshrn2 v0.16b, v3.8h, #8 \n" | 2362 "rshrn2 v0.16b, v3.8h, #7 \n" |
2367 MEMACCESS(0) | 2363 MEMACCESS(0) |
2368 "st1 {v0.16b}, [%0], #16 \n" | 2364 "st1 {v0.16b}, [%0], #16 \n" |
2369 "b.gt 1b \n" | 2365 "b.gt 1b \n" |
2370 "b 99f \n" | 2366 "b 99f \n" |
2371 | 2367 |
2372 // Blend 25 / 75. | |
2373 "25: \n" | |
2374 MEMACCESS(1) | |
2375 "ld1 {v0.16b}, [%1], #16 \n" | |
2376 MEMACCESS(2) | |
2377 "ld1 {v1.16b}, [%2], #16 \n" | |
2378 "subs %w3, %w3, #16 \n" | |
2379 "urhadd v0.16b, v0.16b, v1.16b \n" | |
2380 "urhadd v0.16b, v0.16b, v1.16b \n" | |
2381 MEMACCESS(0) | |
2382 "st1 {v0.16b}, [%0], #16 \n" | |
2383 "b.gt 25b \n" | |
2384 "b 99f \n" | |
2385 | |
2386 // Blend 50 / 50. | 2368 // Blend 50 / 50. |
2387 "50: \n" | 2369 "50: \n" |
2388 MEMACCESS(1) | 2370 MEMACCESS(1) |
2389 "ld1 {v0.16b}, [%1], #16 \n" | 2371 "ld1 {v0.16b}, [%1], #16 \n" |
2390 MEMACCESS(2) | 2372 MEMACCESS(2) |
2391 "ld1 {v1.16b}, [%2], #16 \n" | 2373 "ld1 {v1.16b}, [%2], #16 \n" |
2392 "subs %w3, %w3, #16 \n" | 2374 "subs %w3, %w3, #16 \n" |
2393 "urhadd v0.16b, v0.16b, v1.16b \n" | 2375 "urhadd v0.16b, v0.16b, v1.16b \n" |
2394 MEMACCESS(0) | 2376 MEMACCESS(0) |
2395 "st1 {v0.16b}, [%0], #16 \n" | 2377 "st1 {v0.16b}, [%0], #16 \n" |
2396 "b.gt 50b \n" | 2378 "b.gt 50b \n" |
2397 "b 99f \n" | 2379 "b 99f \n" |
2398 | 2380 |
2399 // Blend 75 / 25. | |
2400 "75: \n" | |
2401 MEMACCESS(1) | |
2402 "ld1 {v1.16b}, [%1], #16 \n" | |
2403 MEMACCESS(2) | |
2404 "ld1 {v0.16b}, [%2], #16 \n" | |
2405 "subs %w3, %w3, #16 \n" | |
2406 "urhadd v0.16b, v0.16b, v1.16b \n" | |
2407 "urhadd v0.16b, v0.16b, v1.16b \n" | |
2408 MEMACCESS(0) | |
2409 "st1 {v0.16b}, [%0], #16 \n" | |
2410 "b.gt 75b \n" | |
2411 "b 99f \n" | |
2412 | |
2413 // Blend 100 / 0 - Copy row unchanged. | 2381 // Blend 100 / 0 - Copy row unchanged. |
2414 "100: \n" | 2382 "100: \n" |
2415 MEMACCESS(1) | 2383 MEMACCESS(1) |
2416 "ld1 {v0.16b}, [%1], #16 \n" | 2384 "ld1 {v0.16b}, [%1], #16 \n" |
2417 "subs %w3, %w3, #16 \n" | 2385 "subs %w3, %w3, #16 \n" |
2418 MEMACCESS(0) | 2386 MEMACCESS(0) |
2419 "st1 {v0.16b}, [%0], #16 \n" | 2387 "st1 {v0.16b}, [%0], #16 \n" |
2420 "b.gt 100b \n" | 2388 "b.gt 100b \n" |
2421 | 2389 |
2422 "99: \n" | 2390 "99: \n" |
(...skipping 605 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3028 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 2996 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
3029 ); | 2997 ); |
3030 } | 2998 } |
3031 #endif // HAS_SOBELYROW_NEON | 2999 #endif // HAS_SOBELYROW_NEON |
3032 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 3000 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
3033 | 3001 |
3034 #ifdef __cplusplus | 3002 #ifdef __cplusplus |
3035 } // extern "C" | 3003 } // extern "C" |
3036 } // namespace libyuv | 3004 } // namespace libyuv |
3037 #endif | 3005 #endif |
OLD | NEW |