| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 2241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2252 "+r"(width) // %2 | 2252 "+r"(width) // %2 |
| 2253 : | 2253 : |
| 2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" | 2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" |
| 2255 ); | 2255 ); |
| 2256 } | 2256 } |
| 2257 | 2257 |
| 2258 // Bilinear filter 16x2 -> 16x1 | 2258 // Bilinear filter 16x2 -> 16x1 |
| 2259 void InterpolateRow_NEON(uint8* dst_ptr, | 2259 void InterpolateRow_NEON(uint8* dst_ptr, |
| 2260 const uint8* src_ptr, ptrdiff_t src_stride, | 2260 const uint8* src_ptr, ptrdiff_t src_stride, |
| 2261 int dst_width, int source_y_fraction) { | 2261 int dst_width, int source_y_fraction) { |
| 2262 int y1_fraction = source_y_fraction >> 1; | 2262 int y1_fraction = source_y_fraction; |
| 2263 asm volatile ( | 2263 asm volatile ( |
| 2264 "cmp %4, #0 \n" | 2264 "cmp %4, #0 \n" |
| 2265 "beq 100f \n" | 2265 "beq 100f \n" |
| 2266 "add %2, %1 \n" | 2266 "add %2, %1 \n" |
| 2267 "cmp %4, #64 \n" | 2267 "cmp %4, #128 \n" |
| 2268 "beq 50f \n" | 2268 "beq 50f \n" |
| 2269 | 2269 |
| 2270 "vdup.8 d5, %4 \n" | 2270 "vdup.8 d5, %4 \n" |
| 2271 "rsb %4, #128 \n" | 2271 "rsb %4, #256 \n" |
| 2272 "vdup.8 d4, %4 \n" | 2272 "vdup.8 d4, %4 \n" |
| 2273 // General purpose row blend. | 2273 // General purpose row blend. |
| 2274 "1: \n" | 2274 "1: \n" |
| 2275 MEMACCESS(1) | 2275 MEMACCESS(1) |
| 2276 "vld1.8 {q0}, [%1]! \n" | 2276 "vld1.8 {q0}, [%1]! \n" |
| 2277 MEMACCESS(2) | 2277 MEMACCESS(2) |
| 2278 "vld1.8 {q1}, [%2]! \n" | 2278 "vld1.8 {q1}, [%2]! \n" |
| 2279 "subs %3, %3, #16 \n" | 2279 "subs %3, %3, #16 \n" |
| 2280 "vmull.u8 q13, d0, d4 \n" | 2280 "vmull.u8 q13, d0, d4 \n" |
| 2281 "vmull.u8 q14, d1, d4 \n" | 2281 "vmull.u8 q14, d1, d4 \n" |
| 2282 "vmlal.u8 q13, d2, d5 \n" | 2282 "vmlal.u8 q13, d2, d5 \n" |
| 2283 "vmlal.u8 q14, d3, d5 \n" | 2283 "vmlal.u8 q14, d3, d5 \n" |
| 2284 "vrshrn.u16 d0, q13, #7 \n" | 2284 "vrshrn.u16 d0, q13, #8 \n" |
| 2285 "vrshrn.u16 d1, q14, #7 \n" | 2285 "vrshrn.u16 d1, q14, #8 \n" |
| 2286 MEMACCESS(0) | 2286 MEMACCESS(0) |
| 2287 "vst1.8 {q0}, [%0]! \n" | 2287 "vst1.8 {q0}, [%0]! \n" |
| 2288 "bgt 1b \n" | 2288 "bgt 1b \n" |
| 2289 "b 99f \n" | 2289 "b 99f \n" |
| 2290 | 2290 |
| 2291 // Blend 50 / 50. | 2291 // Blend 50 / 50. |
| 2292 "50: \n" | 2292 "50: \n" |
| 2293 MEMACCESS(1) | 2293 MEMACCESS(1) |
| 2294 "vld1.8 {q0}, [%1]! \n" | 2294 "vld1.8 {q0}, [%1]! \n" |
| 2295 MEMACCESS(2) | 2295 MEMACCESS(2) |
| (...skipping 583 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2879 "r"(6) // %5 | 2879 "r"(6) // %5 |
| 2880 : "cc", "memory", "q0", "q1" // Clobber List | 2880 : "cc", "memory", "q0", "q1" // Clobber List |
| 2881 ); | 2881 ); |
| 2882 } | 2882 } |
| 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
| 2884 | 2884 |
| 2885 #ifdef __cplusplus | 2885 #ifdef __cplusplus |
| 2886 } // extern "C" | 2886 } // extern "C" |
| 2887 } // namespace libyuv | 2887 } // namespace libyuv |
| 2888 #endif | 2888 #endif |
| OLD | NEW |