OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
10 | 10 |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
118 a = src[1]; | 118 a = src[1]; |
119 src += 2; | 119 src += 2; |
120 g = (g*a+127)/255; | 120 g = (g*a+127)/255; |
121 dst[i] = (uint32_t)a << 24 | 121 dst[i] = (uint32_t)a << 24 |
122 | (uint32_t)g << 16 | 122 | (uint32_t)g << 16 |
123 | (uint32_t)g << 8 | 123 | (uint32_t)g << 8 |
124 | (uint32_t)g << 0; | 124 | (uint32_t)g << 0; |
125 } | 125 } |
126 } | 126 } |
127 | 127 |
128 static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) { | |
129 const uint32_t* src = (const uint32_t*)vsrc; | |
130 for (int i = 0; i < count; i++) { | |
131 uint8_t k = src[i] >> 24, | |
132 y = src[i] >> 16, | |
133 m = src[i] >> 8, | |
134 c = src[i] >> 0; | |
135 // See comments in SkSwizzler.cpp for details on the conversion formula. | |
136 uint8_t b = (y*k+127)/255, | |
137 g = (m*k+127)/255, | |
138 r = (c*k+127)/255; | |
139 dst[i] = (uint32_t)0xFF << 24 | |
140 | (uint32_t) b << 16 | |
141 | (uint32_t) g << 8 | |
142 | (uint32_t) r << 0; | |
143 } | |
144 } | |
145 | |
146 static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) { | |
147 const uint32_t* src = (const uint32_t*)vsrc; | |
148 for (int i = 0; i < count; i++) { | |
149 uint8_t k = src[i] >> 24, | |
150 y = src[i] >> 16, | |
151 m = src[i] >> 8, | |
152 c = src[i] >> 0; | |
153 uint8_t b = (y*k+127)/255, | |
154 g = (m*k+127)/255, | |
155 r = (c*k+127)/255; | |
156 dst[i] = (uint32_t)0xFF << 24 | |
157 | (uint32_t) r << 16 | |
158 | (uint32_t) g << 8 | |
159 | (uint32_t) b << 0; | |
160 } | |
161 } | |
162 | |
128 #if defined(SK_ARM_HAS_NEON) | 163 #if defined(SK_ARM_HAS_NEON) |
129 | 164 |
130 // Rounded divide by 255, (x + 127) / 255 | 165 // Rounded divide by 255, (x + 127) / 255 |
131 static uint8x8_t div255_round(uint16x8_t x) { | 166 static uint8x8_t div255_round(uint16x8_t x) { |
132 // result = (x + 127) / 255 | 167 // result = (x + 127) / 255 |
133 // result = (x + 127) / 256 + error1 | 168 // result = (x + 127) / 256 + error1 |
134 // | 169 // |
135 // error1 = (x + 127) / (255 * 256) | 170 // error1 = (x + 127) / (255 * 256) |
136 // error1 = (x + 127) / (256 * 256) + error2 | 171 // error1 = (x + 127) / (256 * 256) + error2 |
137 // | 172 // |
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
394 } | 429 } |
395 | 430 |
396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | 431 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { |
397 expand_grayA<false>(dst, src, count); | 432 expand_grayA<false>(dst, src, count); |
398 } | 433 } |
399 | 434 |
400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | 435 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { |
401 expand_grayA<true>(dst, src, count); | 436 expand_grayA<true>(dst, src, count); |
402 } | 437 } |
403 | 438 |
439 template <bool kSwapRB> | |
mtklein
2016/02/08 17:31:26
Let's do some renaming to get "swap" out of these
msarett
2016/02/08 18:09:47
Done.
| |
440 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) { | |
441 auto src = (const uint32_t*)vsrc; | |
442 while (count >= 8) { | |
443 // Load 8 cmyk pixels. | |
444 uint8x8x4_t pixels = vld4_u8((const uint8_t*) src); | |
445 | |
446 uint8x8_t k = pixels.val[3], | |
447 y = pixels.val[2], | |
448 m = pixels.val[1], | |
449 c = pixels.val[0]; | |
450 | |
451 // Scale to r, g, b. | |
452 uint8x8_t b = scale(y, k); | |
453 uint8x8_t g = scale(m, k); | |
454 uint8x8_t r = scale(c, k); | |
455 | |
456 // Store 8 rgba pixels. | |
457 if (kSwapRB) { | |
458 pixels.val[3] = vdup_n_u8(0xFF); | |
459 pixels.val[2] = r; | |
460 pixels.val[1] = g; | |
461 pixels.val[0] = b; | |
462 } else { | |
463 pixels.val[3] = vdup_n_u8(0xFF); | |
464 pixels.val[2] = b; | |
465 pixels.val[1] = g; | |
466 pixels.val[0] = r; | |
467 } | |
468 vst4_u8((uint8_t*) dst, pixels); | |
469 src += 8; | |
470 dst += 8; | |
471 count -= 8; | |
472 } | |
473 | |
474 auto proc = kSwapRB ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1 _portable; | |
475 proc(dst, src, count); | |
476 } | |
477 | |
478 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { | |
479 cmyk_should_swapRB<false>(dst, src, count); | |
480 } | |
481 | |
482 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { | |
483 cmyk_should_swapRB<true>(dst, src, count); | |
484 } | |
485 | |
404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 486 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
405 | 487 |
406 // Scale a byte by another. | 488 // Scale a byte by another. |
407 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. | 489 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. |
408 static __m128i scale(__m128i x, __m128i y) { | 490 static __m128i scale(__m128i x, __m128i y) { |
409 const __m128i _128 = _mm_set1_epi16(128); | 491 const __m128i _128 = _mm_set1_epi16(128); |
410 const __m128i _257 = _mm_set1_epi16(257); | 492 const __m128i _257 = _mm_set1_epi16(257); |
411 | 493 |
412 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. | 494 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. |
413 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); | 495 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
624 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); | 706 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); |
625 | 707 |
626 src += 8*2; | 708 src += 8*2; |
627 dst += 8; | 709 dst += 8; |
628 count -= 8; | 710 count -= 8; |
629 } | 711 } |
630 | 712 |
631 grayA_to_rgbA_portable(dst, src, count); | 713 grayA_to_rgbA_portable(dst, src, count); |
632 } | 714 } |
633 | 715 |
716 template <bool kSwapRB> | |
717 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) { | |
718 auto src = (const uint32_t*)vsrc; | |
719 | |
720 auto convert8 = [](__m128i* lo, __m128i* hi) { | |
721 const __m128i zeros = _mm_setzero_si128(); | |
722 __m128i planar; | |
723 if (kSwapRB) { | |
724 planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15); | |
725 } else { | |
726 planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15); | |
727 } | |
728 | |
729 // Swizzle the pixels to 8-bit planar. | |
730 *lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yy yykkkk | |
731 *hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YY YYKKKK | |
732 __m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mm mmMMMM | |
733 yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kk kkKKKK | |
734 | |
735 // Unpack to 16-bit planar. | |
736 __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_ C_C_C_ | |
737 m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_ M_M_M_ | |
738 y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_ Y_Y_Y_ | |
739 k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_ K_K_K_ | |
740 | |
741 // Scale to r, g, b. | |
742 __m128i r = scale(c, k), | |
743 g = scale(m, k), | |
744 b = scale(y, k); | |
745 | |
746 // Repack into interlaced pixels. | |
747 __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RG RGRGRG | |
748 ba = _mm_or_si128(b, _mm_set1_epi16(0xFF00)); // b1b1b1b1 B1 B1B1B1 | |
749 *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rg bargba | |
750 *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RG B1RGB1 | |
751 }; | |
752 | |
753 while (count >= 8) { | |
754 __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)), | |
755 hi = _mm_loadu_si128((const __m128i*) (src + 4)); | |
756 | |
757 convert8(&lo, &hi); | |
758 | |
759 _mm_storeu_si128((__m128i*) (dst + 0), lo); | |
760 _mm_storeu_si128((__m128i*) (dst + 4), hi); | |
761 | |
762 src += 8; | |
763 dst += 8; | |
764 count -= 8; | |
765 } | |
766 | |
767 if (count >= 4) { | |
768 __m128i lo = _mm_loadu_si128((const __m128i*) src), | |
769 hi = _mm_setzero_si128(); | |
770 | |
771 convert8(&lo, &hi); | |
772 | |
773 _mm_storeu_si128((__m128i*) dst, lo); | |
774 | |
775 src += 4; | |
776 dst += 4; | |
777 count -= 4; | |
778 } | |
779 | |
780 auto proc = kSwapRB ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1 _portable; | |
781 proc(dst, src, count); | |
782 } | |
783 | |
784 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { | |
785 cmyk_should_swapRB<false>(dst, src, count); | |
786 } | |
787 | |
788 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { | |
789 cmyk_should_swapRB<true>(dst, src, count); | |
790 } | |
791 | |
634 #else | 792 #else |
635 | 793 |
636 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { | 794 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { |
637 RGBA_to_rgbA_portable(dst, src, count); | 795 RGBA_to_rgbA_portable(dst, src, count); |
638 } | 796 } |
639 | 797 |
640 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { | 798 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { |
641 RGBA_to_bgrA_portable(dst, src, count); | 799 RGBA_to_bgrA_portable(dst, src, count); |
642 } | 800 } |
643 | 801 |
(...skipping 14 matching lines...) Expand all Loading... | |
658 } | 816 } |
659 | 817 |
660 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | 818 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { |
661 grayA_to_RGBA_portable(dst, src, count); | 819 grayA_to_RGBA_portable(dst, src, count); |
662 } | 820 } |
663 | 821 |
664 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | 822 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { |
665 grayA_to_rgbA_portable(dst, src, count); | 823 grayA_to_rgbA_portable(dst, src, count); |
666 } | 824 } |
667 | 825 |
826 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { | |
827 inverted_CMYK_to_RGB1_portable(dst, src, count); | |
828 } | |
829 | |
830 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { | |
831 inverted_CMYK_to_BGR1_portable(dst, src, count); | |
832 } | |
833 | |
668 #endif | 834 #endif |
669 | 835 |
670 } | 836 } |
671 | 837 |
672 #endif // SkSwizzler_opts_DEFINED | 838 #endif // SkSwizzler_opts_DEFINED |
OLD | NEW |