Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkSwizzler_opts_DEFINED | 8 #ifndef SkSwizzler_opts_DEFINED |
| 9 #define SkSwizzler_opts_DEFINED | 9 #define SkSwizzler_opts_DEFINED |
| 10 | 10 |
| (...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 118 a = src[1]; | 118 a = src[1]; |
| 119 src += 2; | 119 src += 2; |
| 120 g = (g*a+127)/255; | 120 g = (g*a+127)/255; |
| 121 dst[i] = (uint32_t)a << 24 | 121 dst[i] = (uint32_t)a << 24 |
| 122 | (uint32_t)g << 16 | 122 | (uint32_t)g << 16 |
| 123 | (uint32_t)g << 8 | 123 | (uint32_t)g << 8 |
| 124 | (uint32_t)g << 0; | 124 | (uint32_t)g << 0; |
| 125 } | 125 } |
| 126 } | 126 } |
| 127 | 127 |
| 128 static void CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) { | |
| 129 const uint32_t* src = (const uint32_t*)vsrc; | |
| 130 for (int i = 0; i < count; i++) { | |
| 131 uint8_t k = src[i] >> 24, | |
| 132 y = src[i] >> 16, | |
| 133 m = src[i] >> 8, | |
| 134 c = src[i] >> 0; | |
| 135 uint8_t b = (y*k+127)/255, | |
|
scroggo
2016/02/08 14:59:43
This pattern appears a lot. Should it be a macro?
mtklein
2016/02/08 15:08:47
Don't think so. When we put it in a macro, people
| |
| 136 g = (m*k+127)/255, | |
| 137 r = (c*k+127)/255; | |
|
mtklein
2016/02/08 15:14:26
This is really the math? Having never seen it bef
msarett
2016/02/08 15:23:08
The short answer is that libjpeg-turbo actually ou
mtklein
2016/02/08 16:47:15
Let's go with a name change (inverted_CMYK_to_...)
msarett
2016/02/08 17:22:42
Done.
| |
| 138 dst[i] = (uint32_t)0xFF << 24 | |
| 139 | (uint32_t) b << 16 | |
| 140 | (uint32_t) g << 8 | |
| 141 | (uint32_t) r << 0; | |
| 142 } | |
| 143 } | |
| 144 | |
| 145 static void CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) { | |
| 146 const uint32_t* src = (const uint32_t*)vsrc; | |
| 147 for (int i = 0; i < count; i++) { | |
| 148 uint8_t k = src[i] >> 24, | |
| 149 y = src[i] >> 16, | |
| 150 m = src[i] >> 8, | |
| 151 c = src[i] >> 0; | |
| 152 uint8_t b = (y*k+127)/255, | |
| 153 g = (m*k+127)/255, | |
| 154 r = (c*k+127)/255; | |
| 155 dst[i] = (uint32_t)0xFF << 24 | |
| 156 | (uint32_t) r << 16 | |
| 157 | (uint32_t) g << 8 | |
| 158 | (uint32_t) b << 0; | |
| 159 } | |
| 160 } | |
| 161 | |
| 128 #if defined(SK_ARM_HAS_NEON) | 162 #if defined(SK_ARM_HAS_NEON) |
| 129 | 163 |
| 130 // Rounded divide by 255, (x + 127) / 255 | 164 // Rounded divide by 255, (x + 127) / 255 |
| 131 static uint8x8_t div255_round(uint16x8_t x) { | 165 static uint8x8_t div255_round(uint16x8_t x) { |
| 132 // result = (x + 127) / 255 | 166 // result = (x + 127) / 255 |
| 133 // result = (x + 127) / 256 + error1 | 167 // result = (x + 127) / 256 + error1 |
| 134 // | 168 // |
| 135 // error1 = (x + 127) / (255 * 256) | 169 // error1 = (x + 127) / (255 * 256) |
| 136 // error1 = (x + 127) / (256 * 256) + error2 | 170 // error1 = (x + 127) / (256 * 256) + error2 |
| 137 // | 171 // |
| (...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 394 } | 428 } |
| 395 | 429 |
| 396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | 430 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { |
| 397 expand_grayA<false>(dst, src, count); | 431 expand_grayA<false>(dst, src, count); |
| 398 } | 432 } |
| 399 | 433 |
| 400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | 434 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { |
| 401 expand_grayA<true>(dst, src, count); | 435 expand_grayA<true>(dst, src, count); |
| 402 } | 436 } |
| 403 | 437 |
| 438 template <bool kSwapRB> | |
| 439 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) { | |
| 440 auto src = (const uint32_t*)vsrc; | |
| 441 while (count >= 8) { | |
| 442 // Load 8 cmyk pixels. | |
| 443 uint8x8x4_t pixels = vld4_u8((const uint8_t*) src); | |
| 444 | |
| 445 uint8x8_t k = pixels.val[3], | |
| 446 y = pixels.val[2], | |
| 447 m = pixels.val[1], | |
| 448 c = pixels.val[0]; | |
| 449 | |
| 450 // Scale to r, g, b. | |
| 451 uint8x8_t b = scale(y, k); | |
| 452 uint8x8_t g = scale(m, k); | |
| 453 uint8x8_t r = scale(c, k); | |
| 454 | |
| 455 // Store 8 rgba pixels. | |
| 456 if (kSwapRB) { | |
| 457 pixels.val[3] = vdup_n_u8(0xFF); | |
| 458 pixels.val[2] = r; | |
| 459 pixels.val[1] = g; | |
| 460 pixels.val[0] = b; | |
| 461 } else { | |
| 462 pixels.val[3] = vdup_n_u8(0xFF); | |
| 463 pixels.val[2] = b; | |
| 464 pixels.val[1] = g; | |
| 465 pixels.val[0] = r; | |
| 466 } | |
| 467 vst4_u8((uint8_t*) dst, pixels); | |
| 468 src += 8; | |
| 469 dst += 8; | |
| 470 count -= 8; | |
| 471 } | |
| 472 | |
| 473 auto proc = kSwapRB ? CMYK_to_BGR1_portable : CMYK_to_RGB1_portable; | |
| 474 proc(dst, src, count); | |
| 475 } | |
| 476 | |
| 477 static void CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { | |
| 478 cmyk_should_swapRB<false>(dst, src, count); | |
| 479 } | |
| 480 | |
| 481 static void CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { | |
| 482 cmyk_should_swapRB<true>(dst, src, count); | |
| 483 } | |
| 484 | |
| 404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 485 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 405 | 486 |
| 406 // Scale a byte by another. | 487 // Scale a byte by another. |
| 407 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. | 488 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. |
| 408 static __m128i scale(__m128i x, __m128i y) { | 489 static __m128i scale(__m128i x, __m128i y) { |
| 409 const __m128i _128 = _mm_set1_epi16(128); | 490 const __m128i _128 = _mm_set1_epi16(128); |
| 410 const __m128i _257 = _mm_set1_epi16(257); | 491 const __m128i _257 = _mm_set1_epi16(257); |
| 411 | 492 |
| 412 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. | 493 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. |
| 413 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); | 494 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); |
| (...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 624 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); | 705 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); |
| 625 | 706 |
| 626 src += 8*2; | 707 src += 8*2; |
| 627 dst += 8; | 708 dst += 8; |
| 628 count -= 8; | 709 count -= 8; |
| 629 } | 710 } |
| 630 | 711 |
| 631 grayA_to_rgbA_portable(dst, src, count); | 712 grayA_to_rgbA_portable(dst, src, count); |
| 632 } | 713 } |
| 633 | 714 |
| 715 template <bool kSwapRB> | |
| 716 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) { | |
| 717 auto src = (const uint32_t*)vsrc; | |
| 718 | |
| 719 auto convert8 = [](__m128i* lo, __m128i* hi) { | |
| 720 const __m128i zeros = _mm_setzero_si128(); | |
| 721 __m128i planar; | |
| 722 if (kSwapRB) { | |
| 723 planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15); | |
| 724 } else { | |
| 725 planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15); | |
| 726 } | |
| 727 | |
| 728 // Swizzle the pixels to 8-bit planar. | |
| 729 *lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yy yykkkk | |
| 730 *hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YY YYKKKK | |
| 731 __m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mm mmMMMM | |
| 732 yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kk kkKKKK | |
| 733 | |
| 734 // Unpack to 16-bit planar. | |
| 735 __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_ C_C_C_ | |
| 736 m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_ M_M_M_ | |
| 737 y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_ Y_Y_Y_ | |
| 738 k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_ K_K_K_ | |
| 739 | |
| 740 // Scale to r, g, b. | |
| 741 __m128i r = scale(c, k), | |
| 742 g = scale(m, k), | |
| 743 b = scale(y, k); | |
| 744 | |
| 745 // Repack into interlaced pixels. | |
| 746 __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RG RGRGRG | |
| 747 ba = _mm_or_si128(b, _mm_set1_epi16(0xFF00)); // b1b1b1b1 B1 B1B1B1 | |
| 748 *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rg bargba | |
| 749 *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RG B1RGB1 | |
| 750 }; | |
| 751 | |
| 752 while (count >= 8) { | |
| 753 __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)), | |
| 754 hi = _mm_loadu_si128((const __m128i*) (src + 4)); | |
| 755 | |
| 756 convert8(&lo, &hi); | |
| 757 | |
| 758 _mm_storeu_si128((__m128i*) (dst + 0), lo); | |
| 759 _mm_storeu_si128((__m128i*) (dst + 4), hi); | |
| 760 | |
| 761 src += 8; | |
| 762 dst += 8; | |
| 763 count -= 8; | |
| 764 } | |
| 765 | |
| 766 if (count >= 4) { | |
| 767 __m128i lo = _mm_loadu_si128((const __m128i*) src), | |
| 768 hi = _mm_setzero_si128(); | |
| 769 | |
| 770 convert8(&lo, &hi); | |
| 771 | |
| 772 _mm_storeu_si128((__m128i*) dst, lo); | |
| 773 | |
| 774 src += 4; | |
| 775 dst += 4; | |
| 776 count -= 4; | |
| 777 } | |
| 778 | |
| 779 auto proc = kSwapRB ? CMYK_to_BGR1_portable : CMYK_to_RGB1_portable; | |
| 780 proc(dst, src, count); | |
| 781 } | |
| 782 | |
| 783 static void CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { | |
| 784 cmyk_should_swapRB<false>(dst, src, count); | |
| 785 } | |
| 786 | |
| 787 static void CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { | |
| 788 cmyk_should_swapRB<true>(dst, src, count); | |
| 789 } | |
| 790 | |
| 634 #else | 791 #else |
| 635 | 792 |
| 636 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { | 793 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { |
| 637 RGBA_to_rgbA_portable(dst, src, count); | 794 RGBA_to_rgbA_portable(dst, src, count); |
| 638 } | 795 } |
| 639 | 796 |
| 640 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { | 797 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { |
| 641 RGBA_to_bgrA_portable(dst, src, count); | 798 RGBA_to_bgrA_portable(dst, src, count); |
| 642 } | 799 } |
| 643 | 800 |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 658 } | 815 } |
| 659 | 816 |
| 660 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { | 817 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { |
| 661 grayA_to_RGBA_portable(dst, src, count); | 818 grayA_to_RGBA_portable(dst, src, count); |
| 662 } | 819 } |
| 663 | 820 |
| 664 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { | 821 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { |
| 665 grayA_to_rgbA_portable(dst, src, count); | 822 grayA_to_rgbA_portable(dst, src, count); |
| 666 } | 823 } |
| 667 | 824 |
| 825 static void CMYK_to_RGB1(uint32_t dst[], const void* src, int count) { | |
| 826 CMYK_to_RGB1_portable(dst, src, count); | |
| 827 } | |
| 828 | |
| 829 static void CMYK_to_BGR1(uint32_t dst[], const void* src, int count) { | |
| 830 CMYK_to_BGR1_portable(dst, src, count); | |
| 831 } | |
| 832 | |
| 668 #endif | 833 #endif |
| 669 | 834 |
| 670 } | 835 } |
| 671 | 836 |
| 672 #endif // SkSwizzler_opts_DEFINED | 837 #endif // SkSwizzler_opts_DEFINED |
| OLD | NEW |