OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
10 | 10 |
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
118 a = src[1]; | 118 a = src[1]; |
119 src += 2; | 119 src += 2; |
120 g = (g*a+127)/255; | 120 g = (g*a+127)/255; |
121 dst[i] = (uint32_t)a << 24 | 121 dst[i] = (uint32_t)a << 24 |
122 | (uint32_t)g << 16 | 122 | (uint32_t)g << 16 |
123 | (uint32_t)g << 8 | 123 | (uint32_t)g << 8 |
124 | (uint32_t)g << 0; | 124 | (uint32_t)g << 0; |
125 } | 125 } |
126 } | 126 } |
127 | 127 |
| 128 static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int
count) { |
| 129 const uint32_t* src = (const uint32_t*)vsrc; |
| 130 for (int i = 0; i < count; i++) { |
| 131 uint8_t k = src[i] >> 24, |
| 132 y = src[i] >> 16, |
| 133 m = src[i] >> 8, |
| 134 c = src[i] >> 0; |
| 135 // See comments in SkSwizzler.cpp for details on the conversion formula. |
| 136 uint8_t b = (y*k+127)/255, |
| 137 g = (m*k+127)/255, |
| 138 r = (c*k+127)/255; |
| 139 dst[i] = (uint32_t)0xFF << 24 |
| 140 | (uint32_t) b << 16 |
| 141 | (uint32_t) g << 8 |
| 142 | (uint32_t) r << 0; |
| 143 } |
| 144 } |
| 145 |
| 146 static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int
count) { |
| 147 const uint32_t* src = (const uint32_t*)vsrc; |
| 148 for (int i = 0; i < count; i++) { |
| 149 uint8_t k = src[i] >> 24, |
| 150 y = src[i] >> 16, |
| 151 m = src[i] >> 8, |
| 152 c = src[i] >> 0; |
| 153 uint8_t b = (y*k+127)/255, |
| 154 g = (m*k+127)/255, |
| 155 r = (c*k+127)/255; |
| 156 dst[i] = (uint32_t)0xFF << 24 |
| 157 | (uint32_t) r << 16 |
| 158 | (uint32_t) g << 8 |
| 159 | (uint32_t) b << 0; |
| 160 } |
| 161 } |
| 162 |
128 #if defined(SK_ARM_HAS_NEON) | 163 #if defined(SK_ARM_HAS_NEON) |
129 | 164 |
130 // Rounded divide by 255, (x + 127) / 255 | 165 // Rounded divide by 255, (x + 127) / 255 |
131 static uint8x8_t div255_round(uint16x8_t x) { | 166 static uint8x8_t div255_round(uint16x8_t x) { |
132 // result = (x + 127) / 255 | 167 // result = (x + 127) / 255 |
133 // result = (x + 127) / 256 + error1 | 168 // result = (x + 127) / 256 + error1 |
134 // | 169 // |
135 // error1 = (x + 127) / (255 * 256) | 170 // error1 = (x + 127) / (255 * 256) |
136 // error1 = (x + 127) / (256 * 256) + error2 | 171 // error1 = (x + 127) / (256 * 256) + error2 |
137 // | 172 // |
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
394 } | 429 } |
395 | 430 |
396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | 431 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { |
397 expand_grayA<false>(dst, src, count); | 432 expand_grayA<false>(dst, src, count); |
398 } | 433 } |
399 | 434 |
400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | 435 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { |
401 expand_grayA<true>(dst, src, count); | 436 expand_grayA<true>(dst, src, count); |
402 } | 437 } |
403 | 438 |
| 439 enum Format { kRGB1, kBGR1 }; |
| 440 template <Format format> |
| 441 static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) { |
| 442 auto src = (const uint32_t*)vsrc; |
| 443 while (count >= 8) { |
| 444 // Load 8 cmyk pixels. |
| 445 uint8x8x4_t pixels = vld4_u8((const uint8_t*) src); |
| 446 |
| 447 uint8x8_t k = pixels.val[3], |
| 448 y = pixels.val[2], |
| 449 m = pixels.val[1], |
| 450 c = pixels.val[0]; |
| 451 |
| 452 // Scale to r, g, b. |
| 453 uint8x8_t b = scale(y, k); |
| 454 uint8x8_t g = scale(m, k); |
| 455 uint8x8_t r = scale(c, k); |
| 456 |
| 457 // Store 8 rgba pixels. |
| 458 if (kBGR1 == format) { |
| 459 pixels.val[3] = vdup_n_u8(0xFF); |
| 460 pixels.val[2] = r; |
| 461 pixels.val[1] = g; |
| 462 pixels.val[0] = b; |
| 463 } else { |
| 464 pixels.val[3] = vdup_n_u8(0xFF); |
| 465 pixels.val[2] = b; |
| 466 pixels.val[1] = g; |
| 467 pixels.val[0] = r; |
| 468 } |
| 469 vst4_u8((uint8_t*) dst, pixels); |
| 470 src += 8; |
| 471 dst += 8; |
| 472 count -= 8; |
| 473 } |
| 474 |
| 475 auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CM
YK_to_RGB1_portable; |
| 476 proc(dst, src, count); |
| 477 } |
| 478 |
| 479 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { |
| 480 inverted_cmyk_to<kRGB1>(dst, src, count); |
| 481 } |
| 482 |
| 483 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { |
| 484 inverted_cmyk_to<kBGR1>(dst, src, count); |
| 485 } |
| 486 |
404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 487 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
405 | 488 |
406 // Scale a byte by another. | 489 // Scale a byte by another. |
407 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. | 490 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. |
408 static __m128i scale(__m128i x, __m128i y) { | 491 static __m128i scale(__m128i x, __m128i y) { |
409 const __m128i _128 = _mm_set1_epi16(128); | 492 const __m128i _128 = _mm_set1_epi16(128); |
410 const __m128i _257 = _mm_set1_epi16(257); | 493 const __m128i _257 = _mm_set1_epi16(257); |
411 | 494 |
412 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. | 495 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. |
413 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); | 496 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
624 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); | 707 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); |
625 | 708 |
626 src += 8*2; | 709 src += 8*2; |
627 dst += 8; | 710 dst += 8; |
628 count -= 8; | 711 count -= 8; |
629 } | 712 } |
630 | 713 |
631 grayA_to_rgbA_portable(dst, src, count); | 714 grayA_to_rgbA_portable(dst, src, count); |
632 } | 715 } |
633 | 716 |
| 717 enum Format { kRGB1, kBGR1 }; |
| 718 template <Format format> |
| 719 static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) { |
| 720 auto src = (const uint32_t*)vsrc; |
| 721 |
| 722 auto convert8 = [](__m128i* lo, __m128i* hi) { |
| 723 const __m128i zeros = _mm_setzero_si128(); |
| 724 __m128i planar; |
| 725 if (kBGR1 == format) { |
| 726 planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15); |
| 727 } else { |
| 728 planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15); |
| 729 } |
| 730 |
| 731 // Swizzle the pixels to 8-bit planar. |
| 732 *lo = _mm_shuffle_epi8(*lo, planar); //
ccccmmmm yyyykkkk |
| 733 *hi = _mm_shuffle_epi8(*hi, planar); //
CCCCMMMM YYYYKKKK |
| 734 __m128i cm = _mm_unpacklo_epi32(*lo, *hi), //
ccccCCCC mmmmMMMM |
| 735 yk = _mm_unpackhi_epi32(*lo, *hi); //
yyyyYYYY kkkkKKKK |
| 736 |
| 737 // Unpack to 16-bit planar. |
| 738 __m128i c = _mm_unpacklo_epi8(cm, zeros), //
c_c_c_c_ C_C_C_C_ |
| 739 m = _mm_unpackhi_epi8(cm, zeros), //
m_m_m_m_ M_M_M_M_ |
| 740 y = _mm_unpacklo_epi8(yk, zeros), //
y_y_y_y_ Y_Y_Y_Y_ |
| 741 k = _mm_unpackhi_epi8(yk, zeros); //
k_k_k_k_ K_K_K_K_ |
| 742 |
| 743 // Scale to r, g, b. |
| 744 __m128i r = scale(c, k), |
| 745 g = scale(m, k), |
| 746 b = scale(y, k); |
| 747 |
| 748 // Repack into interlaced pixels. |
| 749 __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), //
rgrgrgrg RGRGRGRG |
| 750 ba = _mm_or_si128(b, _mm_set1_epi16((uint16_t) 0xFF00)); //
b1b1b1b1 B1B1B1B1 |
| 751 *lo = _mm_unpacklo_epi16(rg, ba); //
rgbargba rgbargba |
| 752 *hi = _mm_unpackhi_epi16(rg, ba); //
RGB1RGB1 RGB1RGB1 |
| 753 }; |
| 754 |
| 755 while (count >= 8) { |
| 756 __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)), |
| 757 hi = _mm_loadu_si128((const __m128i*) (src + 4)); |
| 758 |
| 759 convert8(&lo, &hi); |
| 760 |
| 761 _mm_storeu_si128((__m128i*) (dst + 0), lo); |
| 762 _mm_storeu_si128((__m128i*) (dst + 4), hi); |
| 763 |
| 764 src += 8; |
| 765 dst += 8; |
| 766 count -= 8; |
| 767 } |
| 768 |
| 769 if (count >= 4) { |
| 770 __m128i lo = _mm_loadu_si128((const __m128i*) src), |
| 771 hi = _mm_setzero_si128(); |
| 772 |
| 773 convert8(&lo, &hi); |
| 774 |
| 775 _mm_storeu_si128((__m128i*) dst, lo); |
| 776 |
| 777 src += 4; |
| 778 dst += 4; |
| 779 count -= 4; |
| 780 } |
| 781 |
| 782 auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CM
YK_to_RGB1_portable; |
| 783 proc(dst, src, count); |
| 784 } |
| 785 |
| 786 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { |
| 787 inverted_cmyk_to<kRGB1>(dst, src, count); |
| 788 } |
| 789 |
| 790 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { |
| 791 inverted_cmyk_to<kBGR1>(dst, src, count); |
| 792 } |
| 793 |
634 #else | 794 #else |
635 | 795 |
636 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { | 796 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { |
637 RGBA_to_rgbA_portable(dst, src, count); | 797 RGBA_to_rgbA_portable(dst, src, count); |
638 } | 798 } |
639 | 799 |
640 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { | 800 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { |
641 RGBA_to_bgrA_portable(dst, src, count); | 801 RGBA_to_bgrA_portable(dst, src, count); |
642 } | 802 } |
643 | 803 |
(...skipping 14 matching lines...) Expand all Loading... |
658 } | 818 } |
659 | 819 |
660 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | 820 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { |
661 grayA_to_RGBA_portable(dst, src, count); | 821 grayA_to_RGBA_portable(dst, src, count); |
662 } | 822 } |
663 | 823 |
664 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | 824 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { |
665 grayA_to_rgbA_portable(dst, src, count); | 825 grayA_to_rgbA_portable(dst, src, count); |
666 } | 826 } |
667 | 827 |
| 828 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { |
| 829 inverted_CMYK_to_RGB1_portable(dst, src, count); |
| 830 } |
| 831 |
| 832 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { |
| 833 inverted_CMYK_to_BGR1_portable(dst, src, count); |
| 834 } |
| 835 |
668 #endif | 836 #endif |
669 | 837 |
670 } | 838 } |
671 | 839 |
672 #endif // SkSwizzler_opts_DEFINED | 840 #endif // SkSwizzler_opts_DEFINED |
OLD | NEW |