OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
10 | 10 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
53 r = src[i] >> 16, | 53 r = src[i] >> 16, |
54 g = src[i] >> 8, | 54 g = src[i] >> 8, |
55 b = src[i] >> 0; | 55 b = src[i] >> 0; |
56 dst[i] = (uint32_t)a << 24 | 56 dst[i] = (uint32_t)a << 24 |
57 | (uint32_t)b << 16 | 57 | (uint32_t)b << 16 |
58 | (uint32_t)g << 8 | 58 | (uint32_t)g << 8 |
59 | (uint32_t)r << 0; | 59 | (uint32_t)r << 0; |
60 } | 60 } |
61 } | 61 } |
62 | 62 |
63 static void xxx_xxxa_portable(uint32_t dst[], const uint32_t src[], int count) { | |
64 int i8 = 0; | |
65 const uint8_t* src8 = (uint8_t*) src; | |
msarett
2016/01/21 22:09:07
It's a little strange that we pass in src as a poi
| |
66 for (int i32 = 0; i32 < count; i32++) { | |
67 uint8_t b = src8[i8++], | |
68 g = src8[i8++], | |
69 r = src8[i8++]; | |
70 dst[i32] = (uint32_t) b << 0 | |
71 | (uint32_t) g << 8 | |
72 | (uint32_t) r << 16 | |
73 | (uint32_t)0xFF << 24; | |
74 } | |
75 } | |
76 | |
77 static void xxx_swaprb_xxxa_portable(uint32_t dst[], const uint32_t src[], int c ount) { | |
78 int i8 = 0; | |
79 const uint8_t* src8 = (uint8_t*) src; | |
80 for (int i32 = 0; i32 < count; i32++) { | |
81 uint8_t b = src8[i8++], | |
82 g = src8[i8++], | |
83 r = src8[i8++]; | |
84 dst[i32] = (uint32_t) r << 0 | |
85 | (uint32_t) g << 8 | |
86 | (uint32_t) b << 16 | |
87 | (uint32_t)0xFF << 24; | |
88 } | |
89 } | |
90 | |
63 #if defined(SK_ARM_HAS_NEON) | 91 #if defined(SK_ARM_HAS_NEON) |
64 | 92 |
65 // Rounded divide by 255, (x + 127) / 255 | 93 // Rounded divide by 255, (x + 127) / 255 |
66 static uint8x8_t div255_round(uint16x8_t x) { | 94 static uint8x8_t div255_round(uint16x8_t x) { |
67 // result = (x + 127) / 255 | 95 // result = (x + 127) / 255 |
68 // result = (x + 127) / 256 + error1 | 96 // result = (x + 127) / 256 + error1 |
69 // | 97 // |
70 // error1 = (x + 127) / (255 * 256) | 98 // error1 = (x + 127) / (255 * 256) |
71 // error1 = (x + 127) / (256 * 256) + error2 | 99 // error1 = (x + 127) / (256 * 256) + error2 |
72 // | 100 // |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
161 // Store 8 pixels. | 189 // Store 8 pixels. |
162 vst4_u8((uint8_t*) dst, bgra); | 190 vst4_u8((uint8_t*) dst, bgra); |
163 src += 8; | 191 src += 8; |
164 dst += 8; | 192 dst += 8; |
165 count -= 8; | 193 count -= 8; |
166 } | 194 } |
167 | 195 |
168 swaprb_xxxa_portable(dst, src, count); | 196 swaprb_xxxa_portable(dst, src, count); |
169 } | 197 } |
170 | 198 |
199 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
200 const uint8_t* src8 = (const uint8_t*) src; | |
201 while (count >= 16) { | |
202 // Load 16 pixels. | |
203 uint8x16x3_t bgr = vld3q_u8(src8); | |
204 | |
205 // Insert an opaque alpha channel. | |
206 uint8x16x4_t bgra; | |
207 bgra.val[0] = bgr.val[0]; | |
208 bgra.val[1] = bgr.val[1]; | |
209 bgra.val[2] = bgr.val[2]; | |
210 bgra.val[3] = vdupq_n_u8(0xFF); | |
211 | |
212 // Store 16 pixels. | |
213 vst4q_u8((uint8_t*) dst, bgra); | |
214 src8 += 48; | |
215 dst += 16; | |
216 count -= 16; | |
217 } | |
218 | |
219 if (count >= 8) { | |
220 // Load 8 pixels. | |
221 uint8x8x3_t bgr = vld3_u8(src8); | |
222 | |
223 // Insert an opaque alpha channel. | |
224 uint8x8x4_t bgra; | |
225 bgra.val[0] = bgr.val[0]; | |
226 bgra.val[1] = bgr.val[1]; | |
227 bgra.val[2] = bgr.val[2]; | |
228 bgra.val[3] = vdup_n_u8(0xFF); | |
229 | |
230 // Store 8 pixels. | |
231 vst4_u8((uint8_t*) dst, bgra); | |
232 src8 += 24; | |
233 dst += 8; | |
234 count -= 8; | |
235 } | |
236 | |
237 xxx_xxxa_portable(dst, src, count); | |
238 } | |
239 | |
240 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
241 const uint8_t* src8 = (const uint8_t*) src; | |
242 while (count >= 16) { | |
243 // Load 16 pixels. | |
244 uint8x16x3_t bgr = vld3q_u8(src8); | |
245 | |
246 // Swap r and b and insert an opaque alpha channel. | |
247 uint8x16x4_t rgba; | |
248 rgba.val[0] = bgr.val[2]; | |
249 rgba.val[1] = bgr.val[1]; | |
250 rgba.val[2] = bgr.val[0]; | |
251 rgba.val[3] = vdupq_n_u8(0xFF); | |
252 | |
253 // Store 16 pixels. | |
254 vst4q_u8((uint8_t*) dst, rgba); | |
255 src8 += 48; | |
256 dst += 16; | |
257 count -= 16; | |
258 } | |
259 | |
260 if (count >= 8) { | |
261 // Load 8 pixels. | |
262 uint8x8x3_t bgr = vld3_u8(src8); | |
263 | |
264 // Swap r and b and insert an opaque alpha channel. | |
265 uint8x8x4_t rgba; | |
266 rgba.val[0] = bgr.val[2]; | |
267 rgba.val[1] = bgr.val[1]; | |
268 rgba.val[2] = bgr.val[0]; | |
269 rgba.val[3] = vdup_n_u8(0xFF); | |
270 | |
271 // Store 8 pixels. | |
272 vst4_u8((uint8_t*) dst, rgba); | |
273 src8 += 24; | |
274 dst += 8; | |
275 count -= 8; | |
276 } | |
277 | |
278 xxx_swaprb_xxxa_portable(dst, src, count); | |
279 } | |
280 | |
171 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 281 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
172 | 282 |
173 template <bool kSwapRB> | 283 template <bool kSwapRB> |
174 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) { | 284 static void premul_xxxa_should_swaprb(uint32_t dst[], const uint32_t src[], int count) { |
175 | 285 |
176 auto premul8 = [](__m128i* lo, __m128i* hi) { | 286 auto premul8 = [](__m128i* lo, __m128i* hi) { |
177 const __m128i zeros = _mm_setzero_si128(); | 287 const __m128i zeros = _mm_setzero_si128(); |
178 const __m128i _128 = _mm_set1_epi16(128); | 288 const __m128i _128 = _mm_set1_epi16(128); |
179 const __m128i _257 = _mm_set1_epi16(257); | 289 const __m128i _257 = _mm_set1_epi16(257); |
180 __m128i planar; | 290 __m128i planar; |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
257 _mm_storeu_si128((__m128i*) dst, rgba); | 367 _mm_storeu_si128((__m128i*) dst, rgba); |
258 | 368 |
259 src += 4; | 369 src += 4; |
260 dst += 4; | 370 dst += 4; |
261 count -= 4; | 371 count -= 4; |
262 } | 372 } |
263 | 373 |
264 swaprb_xxxa_portable(dst, src, count); | 374 swaprb_xxxa_portable(dst, src, count); |
265 } | 375 } |
266 | 376 |
377 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
378 xxx_xxxa_portable(dst, src, count); | |
379 } | |
380 | |
381 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
382 xxx_swaprb_xxxa_portable(dst, src, count); | |
383 } | |
384 | |
267 #else | 385 #else |
268 | 386 |
269 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 387 static void premul_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
270 premul_xxxa_portable(dst, src, count); | 388 premul_xxxa_portable(dst, src, count); |
271 } | 389 } |
272 | 390 |
273 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 391 static void premul_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
274 premul_swaprb_xxxa_portable(dst, src, count); | 392 premul_swaprb_xxxa_portable(dst, src, count); |
275 } | 393 } |
276 | 394 |
277 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | 395 static void swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { |
278 swaprb_xxxa_portable(dst, src, count); | 396 swaprb_xxxa_portable(dst, src, count); |
279 } | 397 } |
280 | 398 |
399 static void xxx_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
400 xxx_xxxa_portable(dst, src, count); | |
401 } | |
402 | |
403 static void xxx_swaprb_xxxa(uint32_t dst[], const uint32_t src[], int count) { | |
404 xxx_swaprb_xxxa_portable(dst, src, count); | |
405 } | |
406 | |
281 #endif | 407 #endif |
282 | 408 |
283 } | 409 } |
284 | 410 |
285 #endif // SkSwizzler_opts_DEFINED | 411 #endif // SkSwizzler_opts_DEFINED |
OLD | NEW |