OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2241 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2252 "+r"(width) // %2 | 2252 "+r"(width) // %2 |
2253 : | 2253 : |
2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" | 2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" |
2255 ); | 2255 ); |
2256 } | 2256 } |
2257 | 2257 |
2258 // Bilinear filter 16x2 -> 16x1 | 2258 // Bilinear filter 16x2 -> 16x1 |
2259 void InterpolateRow_NEON(uint8* dst_ptr, | 2259 void InterpolateRow_NEON(uint8* dst_ptr, |
2260 const uint8* src_ptr, ptrdiff_t src_stride, | 2260 const uint8* src_ptr, ptrdiff_t src_stride, |
2261 int dst_width, int source_y_fraction) { | 2261 int dst_width, int source_y_fraction) { |
| 2262 int y1_fraction = source_y_fraction >> 1; |
2262 asm volatile ( | 2263 asm volatile ( |
2263 "cmp %4, #0 \n" | 2264 "cmp %4, #0 \n" |
2264 "beq 100f \n" | 2265 "beq 100f \n" |
2265 "add %2, %1 \n" | 2266 "add %2, %1 \n" |
2266 "cmp %4, #64 \n" | 2267 "cmp %4, #64 \n" |
2267 "beq 75f \n" | |
2268 "cmp %4, #128 \n" | |
2269 "beq 50f \n" | 2268 "beq 50f \n" |
2270 "cmp %4, #192 \n" | |
2271 "beq 25f \n" | |
2272 | 2269 |
2273 "vdup.8 d5, %4 \n" | 2270 "vdup.8 d5, %4 \n" |
2274 "rsb %4, #256 \n" | 2271 "rsb %4, #128 \n" |
2275 "vdup.8 d4, %4 \n" | 2272 "vdup.8 d4, %4 \n" |
2276 // General purpose row blend. | 2273 // General purpose row blend. |
2277 "1: \n" | 2274 "1: \n" |
2278 MEMACCESS(1) | 2275 MEMACCESS(1) |
2279 "vld1.8 {q0}, [%1]! \n" | 2276 "vld1.8 {q0}, [%1]! \n" |
2280 MEMACCESS(2) | 2277 MEMACCESS(2) |
2281 "vld1.8 {q1}, [%2]! \n" | 2278 "vld1.8 {q1}, [%2]! \n" |
2282 "subs %3, %3, #16 \n" | 2279 "subs %3, %3, #16 \n" |
2283 "vmull.u8 q13, d0, d4 \n" | 2280 "vmull.u8 q13, d0, d4 \n" |
2284 "vmull.u8 q14, d1, d4 \n" | 2281 "vmull.u8 q14, d1, d4 \n" |
2285 "vmlal.u8 q13, d2, d5 \n" | 2282 "vmlal.u8 q13, d2, d5 \n" |
2286 "vmlal.u8 q14, d3, d5 \n" | 2283 "vmlal.u8 q14, d3, d5 \n" |
2287 "vrshrn.u16 d0, q13, #8 \n" | 2284 "vrshrn.u16 d0, q13, #7 \n" |
2288 "vrshrn.u16 d1, q14, #8 \n" | 2285 "vrshrn.u16 d1, q14, #7 \n" |
2289 MEMACCESS(0) | 2286 MEMACCESS(0) |
2290 "vst1.8 {q0}, [%0]! \n" | 2287 "vst1.8 {q0}, [%0]! \n" |
2291 "bgt 1b \n" | 2288 "bgt 1b \n" |
2292 "b 99f \n" | 2289 "b 99f \n" |
2293 | 2290 |
2294 // Blend 25 / 75. | |
2295 "25: \n" | |
2296 MEMACCESS(1) | |
2297 "vld1.8 {q0}, [%1]! \n" | |
2298 MEMACCESS(2) | |
2299 "vld1.8 {q1}, [%2]! \n" | |
2300 "subs %3, %3, #16 \n" | |
2301 "vrhadd.u8 q0, q1 \n" | |
2302 "vrhadd.u8 q0, q1 \n" | |
2303 MEMACCESS(0) | |
2304 "vst1.8 {q0}, [%0]! \n" | |
2305 "bgt 25b \n" | |
2306 "b 99f \n" | |
2307 | |
2308 // Blend 50 / 50. | 2291 // Blend 50 / 50. |
2309 "50: \n" | 2292 "50: \n" |
2310 MEMACCESS(1) | 2293 MEMACCESS(1) |
2311 "vld1.8 {q0}, [%1]! \n" | 2294 "vld1.8 {q0}, [%1]! \n" |
2312 MEMACCESS(2) | 2295 MEMACCESS(2) |
2313 "vld1.8 {q1}, [%2]! \n" | 2296 "vld1.8 {q1}, [%2]! \n" |
2314 "subs %3, %3, #16 \n" | 2297 "subs %3, %3, #16 \n" |
2315 "vrhadd.u8 q0, q1 \n" | 2298 "vrhadd.u8 q0, q1 \n" |
2316 MEMACCESS(0) | 2299 MEMACCESS(0) |
2317 "vst1.8 {q0}, [%0]! \n" | 2300 "vst1.8 {q0}, [%0]! \n" |
2318 "bgt 50b \n" | 2301 "bgt 50b \n" |
2319 "b 99f \n" | 2302 "b 99f \n" |
2320 | 2303 |
2321 // Blend 75 / 25. | |
2322 "75: \n" | |
2323 MEMACCESS(1) | |
2324 "vld1.8 {q1}, [%1]! \n" | |
2325 MEMACCESS(2) | |
2326 "vld1.8 {q0}, [%2]! \n" | |
2327 "subs %3, %3, #16 \n" | |
2328 "vrhadd.u8 q0, q1 \n" | |
2329 "vrhadd.u8 q0, q1 \n" | |
2330 MEMACCESS(0) | |
2331 "vst1.8 {q0}, [%0]! \n" | |
2332 "bgt 75b \n" | |
2333 "b 99f \n" | |
2334 | |
2335 // Blend 100 / 0 - Copy row unchanged. | 2304 // Blend 100 / 0 - Copy row unchanged. |
2336 "100: \n" | 2305 "100: \n" |
2337 MEMACCESS(1) | 2306 MEMACCESS(1) |
2338 "vld1.8 {q0}, [%1]! \n" | 2307 "vld1.8 {q0}, [%1]! \n" |
2339 "subs %3, %3, #16 \n" | 2308 "subs %3, %3, #16 \n" |
2340 MEMACCESS(0) | 2309 MEMACCESS(0) |
2341 "vst1.8 {q0}, [%0]! \n" | 2310 "vst1.8 {q0}, [%0]! \n" |
2342 "bgt 100b \n" | 2311 "bgt 100b \n" |
2343 | 2312 |
2344 "99: \n" | 2313 "99: \n" |
2345 : "+r"(dst_ptr), // %0 | 2314 : "+r"(dst_ptr), // %0 |
2346 "+r"(src_ptr), // %1 | 2315 "+r"(src_ptr), // %1 |
2347 "+r"(src_stride), // %2 | 2316 "+r"(src_stride), // %2 |
2348 "+r"(dst_width), // %3 | 2317 "+r"(dst_width), // %3 |
2349 "+r"(source_y_fraction) // %4 | 2318 "+r"(y1_fraction) // %4 |
2350 : | 2319 : |
2351 : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" | 2320 : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" |
2352 ); | 2321 ); |
2353 } | 2322 } |
2354 | 2323 |
2355 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr | 2324 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr |
2356 void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, | 2325 void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, |
2357 uint8* dst_argb, int width) { | 2326 uint8* dst_argb, int width) { |
2358 asm volatile ( | 2327 asm volatile ( |
2359 "subs %3, #8 \n" | 2328 "subs %3, #8 \n" |
(...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2910 "r"(6) // %5 | 2879 "r"(6) // %5 |
2911 : "cc", "memory", "q0", "q1" // Clobber List | 2880 : "cc", "memory", "q0", "q1" // Clobber List |
2912 ); | 2881 ); |
2913 } | 2882 } |
2914 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
2915 | 2884 |
2916 #ifdef __cplusplus | 2885 #ifdef __cplusplus |
2917 } // extern "C" | 2886 } // extern "C" |
2918 } // namespace libyuv | 2887 } // namespace libyuv |
2919 #endif | 2888 #endif |
OLD | NEW |