Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Side by Side Diff: source/row_neon.cc

Issue 1533643005: Add rounding to InterpolateRow for improved quality and consistency. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: lint fix in unittest Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 2241 matching lines...) Expand 10 before | Expand all | Expand 10 after
2252 "+r"(width) // %2 2252 "+r"(width) // %2
2253 : 2253 :
2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8" 2254 : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"
2255 ); 2255 );
2256 } 2256 }
2257 2257
2258 // Bilinear filter 16x2 -> 16x1 2258 // Bilinear filter 16x2 -> 16x1
2259 void InterpolateRow_NEON(uint8* dst_ptr, 2259 void InterpolateRow_NEON(uint8* dst_ptr,
2260 const uint8* src_ptr, ptrdiff_t src_stride, 2260 const uint8* src_ptr, ptrdiff_t src_stride,
2261 int dst_width, int source_y_fraction) { 2261 int dst_width, int source_y_fraction) {
2262 int y1_fraction = source_y_fraction >> 1;
2262 asm volatile ( 2263 asm volatile (
2263 "cmp %4, #0 \n" 2264 "cmp %4, #0 \n"
2264 "beq 100f \n" 2265 "beq 100f \n"
2265 "add %2, %1 \n" 2266 "add %2, %1 \n"
2266 "cmp %4, #64 \n" 2267 "cmp %4, #64 \n"
2267 "beq 75f \n"
2268 "cmp %4, #128 \n"
2269 "beq 50f \n" 2268 "beq 50f \n"
2270 "cmp %4, #192 \n"
2271 "beq 25f \n"
2272 2269
2273 "vdup.8 d5, %4 \n" 2270 "vdup.8 d5, %4 \n"
2274 "rsb %4, #256 \n" 2271 "rsb %4, #128 \n"
2275 "vdup.8 d4, %4 \n" 2272 "vdup.8 d4, %4 \n"
2276 // General purpose row blend. 2273 // General purpose row blend.
2277 "1: \n" 2274 "1: \n"
2278 MEMACCESS(1) 2275 MEMACCESS(1)
2279 "vld1.8 {q0}, [%1]! \n" 2276 "vld1.8 {q0}, [%1]! \n"
2280 MEMACCESS(2) 2277 MEMACCESS(2)
2281 "vld1.8 {q1}, [%2]! \n" 2278 "vld1.8 {q1}, [%2]! \n"
2282 "subs %3, %3, #16 \n" 2279 "subs %3, %3, #16 \n"
2283 "vmull.u8 q13, d0, d4 \n" 2280 "vmull.u8 q13, d0, d4 \n"
2284 "vmull.u8 q14, d1, d4 \n" 2281 "vmull.u8 q14, d1, d4 \n"
2285 "vmlal.u8 q13, d2, d5 \n" 2282 "vmlal.u8 q13, d2, d5 \n"
2286 "vmlal.u8 q14, d3, d5 \n" 2283 "vmlal.u8 q14, d3, d5 \n"
2287 "vrshrn.u16 d0, q13, #8 \n" 2284 "vrshrn.u16 d0, q13, #7 \n"
2288 "vrshrn.u16 d1, q14, #8 \n" 2285 "vrshrn.u16 d1, q14, #7 \n"
2289 MEMACCESS(0) 2286 MEMACCESS(0)
2290 "vst1.8 {q0}, [%0]! \n" 2287 "vst1.8 {q0}, [%0]! \n"
2291 "bgt 1b \n" 2288 "bgt 1b \n"
2292 "b 99f \n" 2289 "b 99f \n"
2293 2290
2294 // Blend 25 / 75.
2295 "25: \n"
2296 MEMACCESS(1)
2297 "vld1.8 {q0}, [%1]! \n"
2298 MEMACCESS(2)
2299 "vld1.8 {q1}, [%2]! \n"
2300 "subs %3, %3, #16 \n"
2301 "vrhadd.u8 q0, q1 \n"
2302 "vrhadd.u8 q0, q1 \n"
2303 MEMACCESS(0)
2304 "vst1.8 {q0}, [%0]! \n"
2305 "bgt 25b \n"
2306 "b 99f \n"
2307
2308 // Blend 50 / 50. 2291 // Blend 50 / 50.
2309 "50: \n" 2292 "50: \n"
2310 MEMACCESS(1) 2293 MEMACCESS(1)
2311 "vld1.8 {q0}, [%1]! \n" 2294 "vld1.8 {q0}, [%1]! \n"
2312 MEMACCESS(2) 2295 MEMACCESS(2)
2313 "vld1.8 {q1}, [%2]! \n" 2296 "vld1.8 {q1}, [%2]! \n"
2314 "subs %3, %3, #16 \n" 2297 "subs %3, %3, #16 \n"
2315 "vrhadd.u8 q0, q1 \n" 2298 "vrhadd.u8 q0, q1 \n"
2316 MEMACCESS(0) 2299 MEMACCESS(0)
2317 "vst1.8 {q0}, [%0]! \n" 2300 "vst1.8 {q0}, [%0]! \n"
2318 "bgt 50b \n" 2301 "bgt 50b \n"
2319 "b 99f \n" 2302 "b 99f \n"
2320 2303
2321 // Blend 75 / 25.
2322 "75: \n"
2323 MEMACCESS(1)
2324 "vld1.8 {q1}, [%1]! \n"
2325 MEMACCESS(2)
2326 "vld1.8 {q0}, [%2]! \n"
2327 "subs %3, %3, #16 \n"
2328 "vrhadd.u8 q0, q1 \n"
2329 "vrhadd.u8 q0, q1 \n"
2330 MEMACCESS(0)
2331 "vst1.8 {q0}, [%0]! \n"
2332 "bgt 75b \n"
2333 "b 99f \n"
2334
2335 // Blend 100 / 0 - Copy row unchanged. 2304 // Blend 100 / 0 - Copy row unchanged.
2336 "100: \n" 2305 "100: \n"
2337 MEMACCESS(1) 2306 MEMACCESS(1)
2338 "vld1.8 {q0}, [%1]! \n" 2307 "vld1.8 {q0}, [%1]! \n"
2339 "subs %3, %3, #16 \n" 2308 "subs %3, %3, #16 \n"
2340 MEMACCESS(0) 2309 MEMACCESS(0)
2341 "vst1.8 {q0}, [%0]! \n" 2310 "vst1.8 {q0}, [%0]! \n"
2342 "bgt 100b \n" 2311 "bgt 100b \n"
2343 2312
2344 "99: \n" 2313 "99: \n"
2345 : "+r"(dst_ptr), // %0 2314 : "+r"(dst_ptr), // %0
2346 "+r"(src_ptr), // %1 2315 "+r"(src_ptr), // %1
2347 "+r"(src_stride), // %2 2316 "+r"(src_stride), // %2
2348 "+r"(dst_width), // %3 2317 "+r"(dst_width), // %3
2349 "+r"(source_y_fraction) // %4 2318 "+r"(y1_fraction) // %4
2350 : 2319 :
2351 : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14" 2320 : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"
2352 ); 2321 );
2353 } 2322 }
2354 2323
2355 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr 2324 // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
2356 void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1, 2325 void ARGBBlendRow_NEON(const uint8* src_argb0, const uint8* src_argb1,
2357 uint8* dst_argb, int width) { 2326 uint8* dst_argb, int width) {
2358 asm volatile ( 2327 asm volatile (
2359 "subs %3, #8 \n" 2328 "subs %3, #8 \n"
(...skipping 550 matching lines...) Expand 10 before | Expand all | Expand 10 after
2910 "r"(6) // %5 2879 "r"(6) // %5
2911 : "cc", "memory", "q0", "q1" // Clobber List 2880 : "cc", "memory", "q0", "q1" // Clobber List
2912 ); 2881 );
2913 } 2882 }
2914 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
2915 2884
2916 #ifdef __cplusplus 2885 #ifdef __cplusplus
2917 } // extern "C" 2886 } // extern "C"
2918 } // namespace libyuv 2887 } // namespace libyuv
2919 #endif 2888 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698