OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
10 | 10 |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { | 91 static void gray_to_RGB1_portable(uint32_t dst[], const void* vsrc, int count) { |
92 const uint8_t* src = (const uint8_t*)vsrc; | 92 const uint8_t* src = (const uint8_t*)vsrc; |
93 for (int i = 0; i < count; i++) { | 93 for (int i = 0; i < count; i++) { |
94 dst[i] = (uint32_t)0xFF << 24 | 94 dst[i] = (uint32_t)0xFF << 24 |
95 | (uint32_t)src[i] << 16 | 95 | (uint32_t)src[i] << 16 |
96 | (uint32_t)src[i] << 8 | 96 | (uint32_t)src[i] << 8 |
97 | (uint32_t)src[i] << 0; | 97 | (uint32_t)src[i] << 0; |
98 } | 98 } |
99 } | 99 } |
100 | 100 |
101 static void grayA_to_RGBA_portable(uint32_t dst[], const void* vsrc, int count) { | |
102 const uint8_t* src = (const uint8_t*)vsrc; | |
103 for (int i = 0; i < count; i++) { | |
104 uint8_t g = src[0], | |
105 a = src[1]; | |
106 src += 2; | |
107 dst[i] = (uint32_t)a << 24 | |
108 | (uint32_t)g << 16 | |
109 | (uint32_t)g << 8 | |
110 | (uint32_t)g << 0; | |
111 } | |
112 } | |
113 | |
114 static void grayA_to_rgbA_portable(uint32_t dst[], const void* vsrc, int count) { | |
115 const uint8_t* src = (const uint8_t*)vsrc; | |
116 for (int i = 0; i < count; i++) { | |
117 uint8_t g = src[0], | |
118 a = src[1]; | |
119 src += 2; | |
120 g = (g*a+127)/255; | |
121 dst[i] = (uint32_t)a << 24 | |
122 | (uint32_t)g << 16 | |
123 | (uint32_t)g << 8 | |
124 | (uint32_t)g << 0; | |
125 } | |
126 } | |
127 | |
101 #if defined(SK_ARM_HAS_NEON) | 128 #if defined(SK_ARM_HAS_NEON) |
102 | 129 |
103 // Rounded divide by 255, (x + 127) / 255 | 130 // Rounded divide by 255, (x + 127) / 255 |
104 static uint8x8_t div255_round(uint16x8_t x) { | 131 static uint8x8_t div255_round(uint16x8_t x) { |
105 // result = (x + 127) / 255 | 132 // result = (x + 127) / 255 |
106 // result = (x + 127) / 256 + error1 | 133 // result = (x + 127) / 256 + error1 |
107 // | 134 // |
108 // error1 = (x + 127) / (255 * 256) | 135 // error1 = (x + 127) / (255 * 256) |
109 // error1 = (x + 127) / (256 * 256) + error2 | 136 // error1 = (x + 127) / (256 * 256) + error2 |
110 // | 137 // |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
304 // Store 8 pixels. | 331 // Store 8 pixels. |
305 vst4_u8((uint8_t*) dst, rgba); | 332 vst4_u8((uint8_t*) dst, rgba); |
306 src += 8; | 333 src += 8; |
307 dst += 8; | 334 dst += 8; |
308 count -= 8; | 335 count -= 8; |
309 } | 336 } |
310 | 337 |
311 gray_to_RGB1_portable(dst, src, count); | 338 gray_to_RGB1_portable(dst, src, count); |
312 } | 339 } |
313 | 340 |
341 template <bool kPremul> | |
342 static void expand_grayA(uint32_t dst[], const void* vsrc, int count) { | |
343 const uint8_t* src = (const uint8_t*) vsrc; | |
344 while (count >= 16) { | |
345 // Load 16 pixels. | |
346 uint8x16x2_t ga = vld2q_u8(src); | |
mtklein
2016/02/03 01:08:15
We sure are getting to use all the vldN / vstN, eh
msarett
2016/02/03 14:48:51
Yeah it's fun to have good uses for all the instru
| |
347 | |
348 // Premultiply if requested. | |
349 if (kPremul) { | |
350 ga.val[0] = vcombine_u8( | |
351 scale(vget_low_u8(ga.val[0]), vget_low_u8(ga.val[1])), | |
352 scale(vget_high_u8(ga.val[0]), vget_high_u8(ga.val[1]))); | |
353 } | |
354 | |
355 // Set each of the color channels. | |
356 uint8x16x4_t rgba; | |
357 rgba.val[0] = ga.val[0]; | |
358 rgba.val[1] = ga.val[0]; | |
359 rgba.val[2] = ga.val[0]; | |
360 rgba.val[3] = ga.val[1]; | |
361 | |
362 // Store 16 pixels. | |
363 vst4q_u8((uint8_t*) dst, rgba); | |
364 src += 16*2; | |
365 dst += 16; | |
366 count -= 16; | |
367 } | |
368 | |
369 if (count >= 8) { | |
370 // Load 8 pixels. | |
371 uint8x8x2_t ga = vld2_u8(src); | |
372 | |
373 // Premultiply if requested. | |
374 if (kPremul) { | |
375 ga.val[0] = scale(ga.val[0], ga.val[1]); | |
376 } | |
377 | |
378 // Set each of the color channels. | |
379 uint8x8x4_t rgba; | |
380 rgba.val[0] = ga.val[0]; | |
381 rgba.val[1] = ga.val[0]; | |
382 rgba.val[2] = ga.val[0]; | |
383 rgba.val[3] = ga.val[1]; | |
384 | |
385 // Store 8 pixels. | |
386 vst4_u8((uint8_t*) dst, rgba); | |
387 src += 8*2; | |
388 dst += 8; | |
389 count -= 8; | |
390 } | |
391 | |
392 auto proc = kPremul ? grayA_to_rgbA_portable : grayA_to_RGBA_portable; | |
393 proc(dst, src, count); | |
394 } | |
395 | |
396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | |
397 expand_grayA<false>(dst, src, count); | |
398 } | |
399 | |
400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | |
401 expand_grayA<true>(dst, src, count); | |
402 } | |
403 | |
314 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
315 | 405 |
316 template <bool kSwapRB> | 406 template <bool kSwapRB> |
317 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { | 407 static void premul_should_swapRB(uint32_t* dst, const void* vsrc, int count) { |
318 auto src = (const uint32_t*)vsrc; | 408 auto src = (const uint32_t*)vsrc; |
319 | 409 |
320 auto premul8 = [](__m128i* lo, __m128i* hi) { | 410 auto premul8 = [](__m128i* lo, __m128i* hi) { |
321 const __m128i zeros = _mm_setzero_si128(); | 411 const __m128i zeros = _mm_setzero_si128(); |
322 const __m128i _128 = _mm_set1_epi16(128); | 412 const __m128i _128 = _mm_set1_epi16(128); |
323 const __m128i _257 = _mm_set1_epi16(257); | 413 const __m128i _257 = _mm_set1_epi16(257); |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
475 _mm_storeu_si128((__m128i*) (dst + 12), ggga3); | 565 _mm_storeu_si128((__m128i*) (dst + 12), ggga3); |
476 | 566 |
477 src += 16; | 567 src += 16; |
478 dst += 16; | 568 dst += 16; |
479 count -= 16; | 569 count -= 16; |
480 } | 570 } |
481 | 571 |
482 gray_to_RGB1_portable(dst, src, count); | 572 gray_to_RGB1_portable(dst, src, count); |
483 } | 573 } |
484 | 574 |
575 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | |
576 grayA_to_RGBA_portable(dst, src, count); | |
577 } | |
578 | |
579 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | |
580 grayA_to_rgbA_portable(dst, src, count); | |
581 } | |
582 | |
485 #else | 583 #else |
486 | 584 |
487 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { | 585 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { |
488 RGBA_to_rgbA_portable(dst, src, count); | 586 RGBA_to_rgbA_portable(dst, src, count); |
489 } | 587 } |
490 | 588 |
491 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { | 589 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { |
492 RGBA_to_bgrA_portable(dst, src, count); | 590 RGBA_to_bgrA_portable(dst, src, count); |
493 } | 591 } |
494 | 592 |
495 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { | 593 static void RGBA_to_BGRA(uint32_t* dst, const void* src, int count) { |
496 RGBA_to_BGRA_portable(dst, src, count); | 594 RGBA_to_BGRA_portable(dst, src, count); |
497 } | 595 } |
498 | 596 |
499 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { | 597 static void RGB_to_RGB1(uint32_t dst[], const void* src, int count) { |
500 RGB_to_RGB1_portable(dst, src, count); | 598 RGB_to_RGB1_portable(dst, src, count); |
501 } | 599 } |
502 | 600 |
503 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { | 601 static void RGB_to_BGR1(uint32_t dst[], const void* src, int count) { |
504 RGB_to_BGR1_portable(dst, src, count); | 602 RGB_to_BGR1_portable(dst, src, count); |
505 } | 603 } |
506 | 604 |
507 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { | 605 static void gray_to_RGB1(uint32_t dst[], const void* src, int count) { |
508 gray_to_RGB1_portable(dst, src, count); | 606 gray_to_RGB1_portable(dst, src, count); |
509 } | 607 } |
510 | 608 |
609 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | |
610 grayA_to_RGBA_portable(dst, src, count); | |
611 } | |
612 | |
613 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | |
614 grayA_to_rgbA_portable(dst, src, count); | |
615 } | |
616 | |
511 #endif | 617 #endif |
512 | 618 |
513 } | 619 } |
514 | 620 |
515 #endif // SkSwizzler_opts_DEFINED | 621 #endif // SkSwizzler_opts_DEFINED |
OLD | NEW |