OLD | NEW |
---|---|
(Empty) | |
1 | |
2 #include "SkBlitMask.h" | |
3 #include "SkColor_opts_neon.h" | |
4 | |
5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB, | |
mtklein
2013/11/26 14:44:01
Typically we make static methods all_lowercase_lik
kevin.petit.not.used.account
2013/11/26 16:23:32
I did that for consistency with all the static fun
| |
6 const void* SK_RESTRICT maskPtr, size_t maskRB, | |
7 SkColor, int width, int height) { | |
8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; | |
9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; | |
10 | |
11 maskRB -= width; | |
12 dstRB -= (width << 2); | |
13 do { | |
14 int w = width; | |
15 while (w >= 8) { | |
16 uint8x8_t vmask = vld1_u8(mask); | |
17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); | |
18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); | |
19 | |
20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale); | |
21 vdevice.val[NEON_A] += vmask; | |
22 | |
23 vst4_u8((uint8_t*)device, vdevice); | |
24 | |
25 mask += 8; | |
26 device += 8; | |
27 w -= 8; | |
28 } | |
29 while (w-- > 0) { | |
30 unsigned aa = *mask++; | |
31 *device = (aa << SK_A32_SHIFT) | |
32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); | |
33 device += 1; | |
34 }; | |
35 device = (uint32_t*)((char*)device + dstRB); | |
36 mask += maskRB; | |
37 } while (--height != 0); | |
38 } | |
39 | |
40 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB, | |
mtklein
2013/11/26 14:44:01
Can you use templates to share the bulk of the cod
kevin.petit.not.used.account
2013/11/26 16:23:32
Done. You're right, that was a bit silly to not ha
| |
41 const void* SK_RESTRICT maskPtr, size_t maskRB, | |
42 SkColor color, int width, int height) { | |
43 SkPMColor pmc = SkPreMultiplyColor(color); | |
44 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; | |
45 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; | |
46 uint8x8x4_t vpmc; | |
47 | |
48 maskRB -= width; | |
49 dstRB -= (width << 2); | |
50 if (width >= 8) { | |
51 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); | |
52 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); | |
53 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); | |
54 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); | |
55 } | |
56 do { | |
57 int w = width; | |
58 while (w >= 8) { | |
59 uint8x8_t vmask = vld1_u8(mask); | |
60 uint16x8_t vmask256 = SkAlpha255To256_neon8(vmask); | |
61 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); | |
62 uint8x8x4_t vdev = vld4_u8((uint8_t*)device); | |
63 | |
64 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) | |
65 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); | |
66 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) | |
67 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); | |
68 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) | |
69 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); | |
70 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) | |
71 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); | |
72 | |
73 vst4_u8((uint8_t*)device, vdev); | |
74 | |
75 mask += 8; | |
76 device += 8; | |
77 w -= 8; | |
78 } | |
79 while (w-- > 0) { | |
80 unsigned aa = *mask++; | |
81 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) | |
82 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); | |
83 device += 1; | |
84 }; | |
85 device = (uint32_t*)((char*)device + dstRB); | |
86 mask += maskRB; | |
87 } while (--height != 0); | |
88 } | |
89 | |
90 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB, | |
91 const void* SK_RESTRICT maskPtr, size_t maskRB, | |
92 SkColor color, int width, int height) { | |
93 SkPMColor pmc = SkPreMultiplyColor(color); | |
94 size_t dstOffset = dstRB - (width << 2); | |
95 size_t maskOffset = maskRB - width; | |
96 SkPMColor* SK_RESTRICT device = (SkPMColor *)dst; | |
97 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; | |
98 uint8x8x4_t vpmc; | |
99 | |
100 if (width >= 8) { | |
101 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); | |
102 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); | |
103 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); | |
104 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); | |
105 } | |
106 do { | |
107 int w = width; | |
108 while (w >= 8) { | |
109 uint8x8_t vmask = vld1_u8(mask); | |
110 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); | |
111 uint8x8x4_t vdev = vld4_u8((uint8_t*)device); | |
112 vscale = vsubw_u8(vdupq_n_u16(256), | |
113 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); | |
114 | |
115 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) | |
116 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); | |
117 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) | |
118 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); | |
119 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) | |
120 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); | |
121 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) | |
122 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); | |
123 | |
124 vst4_u8((uint8_t*)device, vdev); | |
125 | |
126 mask += 8; | |
127 device += 8; | |
128 w -= 8; | |
129 } | |
130 while (w--) { | |
131 unsigned aa = *mask++; | |
132 *device = SkBlendARGB32(pmc, *device, aa); | |
133 device += 1; | |
134 }; | |
135 device = (uint32_t*)((char*)device + dstOffset); | |
136 mask += maskOffset; | |
137 } while (--height != 0); | |
138 } | |
139 | |
140 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) { | |
141 if (SK_ColorBLACK == color) { | |
142 return D32_A8_Black_neon; | |
143 } else if (0xFF == SkColorGetA(color)) { | |
144 return D32_A8_Opaque_neon; | |
145 } else { | |
146 return D32_A8_Color_neon; | |
147 } | |
148 } | |
149 | |
150 //////////////////////////////////////////////////////////////////////////////// | |
151 | |
152 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], | |
153 SkColor color, int width, | |
154 SkPMColor opaqueDst) { | |
155 int colR = SkColorGetR(color); | |
156 int colG = SkColorGetG(color); | |
157 int colB = SkColorGetB(color); | |
158 | |
159 uint8x8_t vcolR, vcolG, vcolB; | |
160 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB; | |
161 | |
162 if (width >= 8) { | |
163 vcolR = vdup_n_u8(colR); | |
164 vcolG = vdup_n_u8(colG); | |
165 vcolB = vdup_n_u8(colB); | |
166 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst)); | |
167 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst)); | |
168 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst)); | |
169 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst)); | |
170 } | |
171 | |
172 while (width >= 8) { | |
173 uint8x8x4_t vdst; | |
174 uint16x8_t vmask; | |
175 uint16x8_t vmaskR, vmaskG, vmaskB; | |
176 uint8x8_t vsel_trans, vsel_opq; | |
177 | |
178 vdst = vld4_u8((uint8_t*)dst); | |
179 vmask = vld1q_u16(src); | |
180 | |
181 // Prepare compare masks | |
182 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0))); | |
183 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF))); | |
184 | |
185 // Get all the color masks on 5 bits | |
186 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); | |
187 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), | |
188 SK_B16_BITS + SK_R16_BITS + 1); | |
189 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); | |
190 | |
191 // Upscale to 0..32 | |
192 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); | |
193 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); | |
194 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); | |
195 | |
196 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF) ); | |
197 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]); | |
198 | |
199 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); | |
200 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); | |
201 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); | |
202 | |
203 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]); | |
204 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]); | |
205 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]); | |
206 | |
207 vst4_u8((uint8_t*)dst, vdst); | |
208 | |
209 dst += 8; | |
210 src += 8; | |
211 width -= 8; | |
212 } | |
213 | |
214 // Leftovers | |
215 for (int i = 0; i < width; i++) { | |
216 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i], | |
217 opaqueDst); | |
218 } | |
219 } | |
220 | |
221 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[], | |
222 SkColor color, int width, SkPMColor) { | |
223 int colA = SkColorGetA(color); | |
224 int colR = SkColorGetR(color); | |
225 int colG = SkColorGetG(color); | |
226 int colB = SkColorGetB(color); | |
227 | |
228 colA = SkAlpha255To256(colA); | |
229 | |
230 uint8x8_t vcolR, vcolG, vcolB; | |
231 uint16x8_t vcolA; | |
232 | |
233 if (width >= 8) { | |
234 vcolA = vdupq_n_u16(colA); | |
235 vcolR = vdup_n_u8(colR); | |
236 vcolG = vdup_n_u8(colG); | |
237 vcolB = vdup_n_u8(colB); | |
238 } | |
239 | |
240 while (width >= 8) { | |
241 uint8x8x4_t vdst; | |
242 uint16x8_t vmask; | |
243 uint16x8_t vmaskR, vmaskG, vmaskB; | |
244 | |
245 vdst = vld4_u8((uint8_t*)dst); | |
246 vmask = vld1q_u16(src); | |
247 | |
248 // Get all the color masks on 5 bits | |
249 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); | |
250 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), | |
251 SK_B16_BITS + SK_R16_BITS + 1); | |
252 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); | |
253 | |
254 // Upscale to 0..32 | |
255 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); | |
256 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); | |
257 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); | |
258 | |
259 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8); | |
260 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8); | |
261 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8); | |
262 | |
263 vdst.val[NEON_A] = vdup_n_u8(0xFF); | |
264 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); | |
265 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); | |
266 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); | |
267 | |
268 vst4_u8((uint8_t*)dst, vdst); | |
269 | |
270 dst += 8; | |
271 src += 8; | |
272 width -= 8; | |
273 } | |
274 | |
275 for (int i = 0; i < width; i++) { | |
276 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); | |
277 } | |
278 } | |
279 | |
OLD | NEW |