Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(311)

Side by Side Diff: src/opts/SkBitmapProcState_filter_neon.h

Issue 280403005: Always inline Filter_32_*_neon functions (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2012 The Android Open Source Project 3 * Copyright 2012 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #include <arm_neon.h> 10 #include <arm_neon.h>
11 #include "SkColorPriv.h" 11 #include "SkColorPriv.h"
12 12
13 /* 13 /*
14 * Filter_32_opaque 14 * Filter_32_opaque
15 * 15 *
16 * There is no hard-n-fast rule that the filtering must produce 16 * There is no hard-n-fast rule that the filtering must produce
17 * exact results for the color components, but if the 4 incoming colors are 17 * exact results for the color components, but if the 4 incoming colors are
18 * all opaque, then the output color must also be opaque. Subsequent parts of 18 * all opaque, then the output color must also be opaque. Subsequent parts of
19 * the drawing pipeline may rely on this (e.g. which blitrow proc to use). 19 * the drawing pipeline may rely on this (e.g. which blitrow proc to use).
20 *
20 */ 21 */
21 22 // Chrome on Android uses -Os so we need to force these inline. Otherwise
22 static inline void Filter_32_opaque_neon(unsigned x, unsigned y, 23 // calling the function in the inner loops will cause significant overhead on
23 SkPMColor a00, SkPMColor a01, 24 // some platforms.
24 SkPMColor a10, SkPMColor a11, 25 static SK_ALWAYS_INLINE void Filter_32_opaque_neon(unsigned x, unsigned y,
25 SkPMColor *dst) { 26 SkPMColor a00, SkPMColor a01,
27 SkPMColor a10, SkPMColor a11,
28 SkPMColor *dst) {
26 uint8x8_t vy, vconst16_8, v16_y, vres; 29 uint8x8_t vy, vconst16_8, v16_y, vres;
27 uint16x4_t vx, vconst16_16, v16_x, tmp; 30 uint16x4_t vx, vconst16_16, v16_x, tmp;
28 uint32x2_t va0, va1; 31 uint32x2_t va0, va1;
29 uint16x8_t tmp1, tmp2; 32 uint16x8_t tmp1, tmp2;
30 33
31 vy = vdup_n_u8(y); // duplicate y into vy 34 vy = vdup_n_u8(y); // duplicate y into vy
32 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8 35 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8
33 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y 36 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y
34 37
35 va0 = vdup_n_u32(a00); // duplicate a00 38 va0 = vdup_n_u32(a00); // duplicate a00
(...skipping 10 matching lines...) Expand all
46 49
47 tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x 50 tmp = vmul_u16(vget_high_u16(tmp1), vx); // tmp = a01 * x
48 tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x 51 tmp = vmla_u16(tmp, vget_high_u16(tmp2), vx); // tmp += a11 * x
49 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x) 52 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)
50 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x) 53 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)
51 54
52 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8 55 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8
53 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result 56 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result
54 } 57 }
55 58
56 static inline void Filter_32_alpha_neon(unsigned x, unsigned y, 59 static SK_ALWAYS_INLINE void Filter_32_alpha_neon(unsigned x, unsigned y,
57 SkPMColor a00, SkPMColor a01, 60 SkPMColor a00, SkPMColor a01,
58 SkPMColor a10, SkPMColor a11, 61 SkPMColor a10, SkPMColor a11,
59 SkPMColor *dst, uint16_t scale) { 62 SkPMColor *dst,
63 uint16_t scale) {
60 uint8x8_t vy, vconst16_8, v16_y, vres; 64 uint8x8_t vy, vconst16_8, v16_y, vres;
61 uint16x4_t vx, vconst16_16, v16_x, tmp, vscale; 65 uint16x4_t vx, vconst16_16, v16_x, tmp, vscale;
62 uint32x2_t va0, va1; 66 uint32x2_t va0, va1;
63 uint16x8_t tmp1, tmp2; 67 uint16x8_t tmp1, tmp2;
64 68
65 vy = vdup_n_u8(y); // duplicate y into vy 69 vy = vdup_n_u8(y); // duplicate y into vy
66 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8 70 vconst16_8 = vmov_n_u8(16); // set up constant in vconst16_8
67 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y 71 v16_y = vsub_u8(vconst16_8, vy); // v16_y = 16-y
68 72
69 va0 = vdup_n_u32(a00); // duplicate a00 73 va0 = vdup_n_u32(a00); // duplicate a00
(...skipping 13 matching lines...) Expand all
83 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x) 87 tmp = vmla_u16(tmp, vget_low_u16(tmp1), v16_x); // tmp += a00 * (16-x)
84 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x) 88 tmp = vmla_u16(tmp, vget_low_u16(tmp2), v16_x); // tmp += a10 * (16-x)
85 89
86 vscale = vdup_n_u16(scale); // duplicate scale 90 vscale = vdup_n_u16(scale); // duplicate scale
87 tmp = vshr_n_u16(tmp, 8); // shift down result by 8 91 tmp = vshr_n_u16(tmp, 8); // shift down result by 8
88 tmp = vmul_u16(tmp, vscale); // multiply result by scale 92 tmp = vmul_u16(tmp, vscale); // multiply result by scale
89 93
90 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8 94 vres = vshrn_n_u16(vcombine_u16(tmp, vcreate_u16(0)), 8); // shift down resu lt by 8
91 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result 95 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); // store result
92 } 96 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698