OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2252 "+r"(width) // %2 | 2252 "+r"(width) // %2 |
2253 : | 2253 : |
2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" | 2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" |
2255 ); | 2255 ); |
2256 } | 2256 } |
2257 | 2257 |
2258 // Bilinear filter 16x2 -> 16x1 | 2258 // Bilinear filter 16x2 -> 16x1 |
2259 void InterpolateRow_NEON(uint8* dst_ptr, | 2259 void InterpolateRow_NEON(uint8* dst_ptr, |
2260 const uint8* src_ptr, ptrdiff_t src_stride, | 2260 const uint8* src_ptr, ptrdiff_t src_stride, |
2261 int dst_width, int source_y_fraction) { | 2261 int dst_width, int source_y_fraction) { |
2262 int y1_fraction = source_y_fraction >> 1; | 2262 int y1_fraction = source_y_fraction; |
2263 asm volatile ( | 2263 asm volatile ( |
2264 "cmp %4, #0 \n" | 2264 "cmp %4, #0 \n" |
2265 "beq 100f \n" | 2265 "beq 100f \n" |
2266 "add %2, %1 \n" | 2266 "add %2, %1 \n" |
2267 "cmp %4, #64 \n" | 2267 "cmp %4, #128 \n" |
2268 "beq 50f \n" | 2268 "beq 50f \n" |
2269 | 2269 |
2270 "vdup.8 d5, %4 \n" | 2270 "vdup.8 d5, %4 \n" |
2271 "rsb %4, #128 \n" | 2271 "rsb %4, #256 \n" |
2272 "vdup.8 d4, %4 \n" | 2272 "vdup.8 d4, %4 \n" |
2273 // General purpose row blend. | 2273 // General purpose row blend. |
2274 "1: \n" | 2274 "1: \n" |
2275 MEMACCESS(1) | 2275 MEMACCESS(1) |
2276 "vld1.8 {q0}, [%1]! \n" | 2276 "vld1.8 {q0}, [%1]! \n" |
2277 MEMACCESS(2) | 2277 MEMACCESS(2) |
2278 "vld1.8 {q1}, [%2]! \n" | 2278 "vld1.8 {q1}, [%2]! \n" |
2279 "subs %3, %3, #16 \n" | 2279 "subs %3, %3, #16 \n" |
2280 "vmull.u8 q13, d0, d4 \n" | 2280 "vmull.u8 q13, d0, d4 \n" |
2281 "vmull.u8 q14, d1, d4 \n" | 2281 "vmull.u8 q14, d1, d4 \n" |
2282 "vmlal.u8 q13, d2, d5 \n" | 2282 "vmlal.u8 q13, d2, d5 \n" |
2283 "vmlal.u8 q14, d3, d5 \n" | 2283 "vmlal.u8 q14, d3, d5 \n" |
2284 "vrshrn.u16 d0, q13, #7 \n" | 2284 "vrshrn.u16 d0, q13, #8 \n" |
2285 "vrshrn.u16 d1, q14, #7 \n" | 2285 "vrshrn.u16 d1, q14, #8 \n" |
2286 MEMACCESS(0) | 2286 MEMACCESS(0) |
2287 "vst1.8 {q0}, [%0]! \n" | 2287 "vst1.8 {q0}, [%0]! \n" |
2288 "bgt 1b \n" | 2288 "bgt 1b \n" |
2289 "b 99f \n" | 2289 "b 99f \n" |
2290 | 2290 |
2291 // Blend 50 / 50. | 2291 // Blend 50 / 50. |
2292 "50: \n" | 2292 "50: \n" |
2293 MEMACCESS(1) | 2293 MEMACCESS(1) |
2294 "vld1.8 {q0}, [%1]! \n" | 2294 "vld1.8 {q0}, [%1]! \n" |
2295 MEMACCESS(2) | 2295 MEMACCESS(2) |
(...skipping 583 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2879 "r"(6) // %5 | 2879 "r"(6) // %5 |
2880 : "cc", "memory", "q0", "q1" // Clobber List | 2880 : "cc", "memory", "q0", "q1" // Clobber List |
2881 ); | 2881 ); |
2882 } | 2882 } |
2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
2884 | 2884 |
2885 #ifdef __cplusplus | 2885 #ifdef __cplusplus |
2886 } // extern "C" | 2886 } // extern "C" |
2887 } // namespace libyuv | 2887 } // namespace libyuv |
2888 #endif | 2888 #endif |
OLD | NEW |