Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(442)

Side by Side Diff: src/opts/SkBlitMask_opts_arm_neon.cpp

Issue 23719002: ARM Skia NEON patches - 16/17 - Blitmask (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Address review comments Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1
2 #include "SkBlitMask.h"
3 #include "SkColor_opts_neon.h"
4
5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB,
mtklein 2013/11/26 14:44:01 Typically we make static methods all_lowercase_lik
kevin.petit.not.used.account 2013/11/26 16:23:32 I did that for consistency with all the static fun
6 const void* SK_RESTRICT maskPtr, size_t maskRB,
7 SkColor, int width, int height) {
8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
10
11 maskRB -= width;
12 dstRB -= (width << 2);
13 do {
14 int w = width;
15 while (w >= 8) {
16 uint8x8_t vmask = vld1_u8(mask);
17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device);
19
20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale);
21 vdevice.val[NEON_A] += vmask;
22
23 vst4_u8((uint8_t*)device, vdevice);
24
25 mask += 8;
26 device += 8;
27 w -= 8;
28 }
29 while (w-- > 0) {
30 unsigned aa = *mask++;
31 *device = (aa << SK_A32_SHIFT)
32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
33 device += 1;
34 };
35 device = (uint32_t*)((char*)device + dstRB);
36 mask += maskRB;
37 } while (--height != 0);
38 }
39
40 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB,
mtklein 2013/11/26 14:44:01 Can you use templates to share the bulk of the cod
kevin.petit.not.used.account 2013/11/26 16:23:32 Done. You're right, that was a bit silly to not ha
41 const void* SK_RESTRICT maskPtr, size_t maskRB,
42 SkColor color, int width, int height) {
43 SkPMColor pmc = SkPreMultiplyColor(color);
44 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst;
45 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
46 uint8x8x4_t vpmc;
47
48 maskRB -= width;
49 dstRB -= (width << 2);
50 if (width >= 8) {
51 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
52 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
53 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
54 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
55 }
56 do {
57 int w = width;
58 while (w >= 8) {
59 uint8x8_t vmask = vld1_u8(mask);
60 uint16x8_t vmask256 = SkAlpha255To256_neon8(vmask);
61 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask);
62 uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
63
64 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
65 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
66 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
67 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
68 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
69 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
70 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
71 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
72
73 vst4_u8((uint8_t*)device, vdev);
74
75 mask += 8;
76 device += 8;
77 w -= 8;
78 }
79 while (w-- > 0) {
80 unsigned aa = *mask++;
81 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa))
82 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa));
83 device += 1;
84 };
85 device = (uint32_t*)((char*)device + dstRB);
86 mask += maskRB;
87 } while (--height != 0);
88 }
89
90 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB,
91 const void* SK_RESTRICT maskPtr, size_t maskRB,
92 SkColor color, int width, int height) {
93 SkPMColor pmc = SkPreMultiplyColor(color);
94 size_t dstOffset = dstRB - (width << 2);
95 size_t maskOffset = maskRB - width;
96 SkPMColor* SK_RESTRICT device = (SkPMColor *)dst;
97 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr;
98 uint8x8x4_t vpmc;
99
100 if (width >= 8) {
101 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc));
102 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc));
103 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc));
104 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc));
105 }
106 do {
107 int w = width;
108 while (w >= 8) {
109 uint8x8_t vmask = vld1_u8(mask);
110 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask);
111 uint8x8x4_t vdev = vld4_u8((uint8_t*)device);
112 vscale = vsubw_u8(vdupq_n_u16(256),
113 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256));
114
115 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)
116 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale);
117 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256)
118 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale);
119 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256)
120 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale);
121 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256)
122 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale);
123
124 vst4_u8((uint8_t*)device, vdev);
125
126 mask += 8;
127 device += 8;
128 w -= 8;
129 }
130 while (w--) {
131 unsigned aa = *mask++;
132 *device = SkBlendARGB32(pmc, *device, aa);
133 device += 1;
134 };
135 device = (uint32_t*)((char*)device + dstOffset);
136 mask += maskOffset;
137 } while (--height != 0);
138 }
139
140 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) {
141 if (SK_ColorBLACK == color) {
142 return D32_A8_Black_neon;
143 } else if (0xFF == SkColorGetA(color)) {
144 return D32_A8_Opaque_neon;
145 } else {
146 return D32_A8_Color_neon;
147 }
148 }
149
150 ////////////////////////////////////////////////////////////////////////////////
151
152 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[],
153 SkColor color, int width,
154 SkPMColor opaqueDst) {
155 int colR = SkColorGetR(color);
156 int colG = SkColorGetG(color);
157 int colB = SkColorGetB(color);
158
159 uint8x8_t vcolR, vcolG, vcolB;
160 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB;
161
162 if (width >= 8) {
163 vcolR = vdup_n_u8(colR);
164 vcolG = vdup_n_u8(colG);
165 vcolB = vdup_n_u8(colB);
166 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst));
167 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst));
168 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst));
169 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst));
170 }
171
172 while (width >= 8) {
173 uint8x8x4_t vdst;
174 uint16x8_t vmask;
175 uint16x8_t vmaskR, vmaskG, vmaskB;
176 uint8x8_t vsel_trans, vsel_opq;
177
178 vdst = vld4_u8((uint8_t*)dst);
179 vmask = vld1q_u16(src);
180
181 // Prepare compare masks
182 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0)));
183 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF)));
184
185 // Get all the color masks on 5 bits
186 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
187 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
188 SK_B16_BITS + SK_R16_BITS + 1);
189 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
190
191 // Upscale to 0..32
192 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
193 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
194 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
195
196 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF) );
197 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]);
198
199 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
200 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
201 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
202
203 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]);
204 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]);
205 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]);
206
207 vst4_u8((uint8_t*)dst, vdst);
208
209 dst += 8;
210 src += 8;
211 width -= 8;
212 }
213
214 // Leftovers
215 for (int i = 0; i < width; i++) {
216 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i],
217 opaqueDst);
218 }
219 }
220
221 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[],
222 SkColor color, int width, SkPMColor) {
223 int colA = SkColorGetA(color);
224 int colR = SkColorGetR(color);
225 int colG = SkColorGetG(color);
226 int colB = SkColorGetB(color);
227
228 colA = SkAlpha255To256(colA);
229
230 uint8x8_t vcolR, vcolG, vcolB;
231 uint16x8_t vcolA;
232
233 if (width >= 8) {
234 vcolA = vdupq_n_u16(colA);
235 vcolR = vdup_n_u8(colR);
236 vcolG = vdup_n_u8(colG);
237 vcolB = vdup_n_u8(colB);
238 }
239
240 while (width >= 8) {
241 uint8x8x4_t vdst;
242 uint16x8_t vmask;
243 uint16x8_t vmaskR, vmaskG, vmaskB;
244
245 vdst = vld4_u8((uint8_t*)dst);
246 vmask = vld1q_u16(src);
247
248 // Get all the color masks on 5 bits
249 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT);
250 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS),
251 SK_B16_BITS + SK_R16_BITS + 1);
252 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK);
253
254 // Upscale to 0..32
255 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4);
256 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4);
257 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4);
258
259 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8);
260 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8);
261 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8);
262
263 vdst.val[NEON_A] = vdup_n_u8(0xFF);
264 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR);
265 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG);
266 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB);
267
268 vst4_u8((uint8_t*)dst, vdst);
269
270 dst += 8;
271 src += 8;
272 width -= 8;
273 }
274
275 for (int i = 0; i < width; i++) {
276 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]);
277 }
278 }
279
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698