Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(103)

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1676773003: Optimize CMYK->RGBA (BGRA) transform for jpeg decodes (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Windows Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkSwizzler_opts_DEFINED 8 #ifndef SkSwizzler_opts_DEFINED
9 #define SkSwizzler_opts_DEFINED 9 #define SkSwizzler_opts_DEFINED
10 10
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
118 a = src[1]; 118 a = src[1];
119 src += 2; 119 src += 2;
120 g = (g*a+127)/255; 120 g = (g*a+127)/255;
121 dst[i] = (uint32_t)a << 24 121 dst[i] = (uint32_t)a << 24
122 | (uint32_t)g << 16 122 | (uint32_t)g << 16
123 | (uint32_t)g << 8 123 | (uint32_t)g << 8
124 | (uint32_t)g << 0; 124 | (uint32_t)g << 0;
125 } 125 }
126 } 126 }
127 127
128 static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) {
129 const uint32_t* src = (const uint32_t*)vsrc;
130 for (int i = 0; i < count; i++) {
131 uint8_t k = src[i] >> 24,
132 y = src[i] >> 16,
133 m = src[i] >> 8,
134 c = src[i] >> 0;
135 // See comments in SkSwizzler.cpp for details on the conversion formula.
136 uint8_t b = (y*k+127)/255,
137 g = (m*k+127)/255,
138 r = (c*k+127)/255;
139 dst[i] = (uint32_t)0xFF << 24
140 | (uint32_t) b << 16
141 | (uint32_t) g << 8
142 | (uint32_t) r << 0;
143 }
144 }
145
146 static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) {
147 const uint32_t* src = (const uint32_t*)vsrc;
148 for (int i = 0; i < count; i++) {
149 uint8_t k = src[i] >> 24,
150 y = src[i] >> 16,
151 m = src[i] >> 8,
152 c = src[i] >> 0;
153 uint8_t b = (y*k+127)/255,
154 g = (m*k+127)/255,
155 r = (c*k+127)/255;
156 dst[i] = (uint32_t)0xFF << 24
157 | (uint32_t) r << 16
158 | (uint32_t) g << 8
159 | (uint32_t) b << 0;
160 }
161 }
162
128 #if defined(SK_ARM_HAS_NEON) 163 #if defined(SK_ARM_HAS_NEON)
129 164
130 // Rounded divide by 255, (x + 127) / 255 165 // Rounded divide by 255, (x + 127) / 255
131 static uint8x8_t div255_round(uint16x8_t x) { 166 static uint8x8_t div255_round(uint16x8_t x) {
132 // result = (x + 127) / 255 167 // result = (x + 127) / 255
133 // result = (x + 127) / 256 + error1 168 // result = (x + 127) / 256 + error1
134 // 169 //
135 // error1 = (x + 127) / (255 * 256) 170 // error1 = (x + 127) / (255 * 256)
136 // error1 = (x + 127) / (256 * 256) + error2 171 // error1 = (x + 127) / (256 * 256) + error2
137 // 172 //
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after
394 } 429 }
395 430
396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { 431 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
397 expand_grayA<false>(dst, src, count); 432 expand_grayA<false>(dst, src, count);
398 } 433 }
399 434
400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { 435 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
401 expand_grayA<true>(dst, src, count); 436 expand_grayA<true>(dst, src, count);
402 } 437 }
403 438
439 enum Format { kRGB1, kBGR1 };
440 template <Format format>
441 static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) {
442 auto src = (const uint32_t*)vsrc;
443 while (count >= 8) {
444 // Load 8 cmyk pixels.
445 uint8x8x4_t pixels = vld4_u8((const uint8_t*) src);
446
447 uint8x8_t k = pixels.val[3],
448 y = pixels.val[2],
449 m = pixels.val[1],
450 c = pixels.val[0];
451
452 // Scale to r, g, b.
453 uint8x8_t b = scale(y, k);
454 uint8x8_t g = scale(m, k);
455 uint8x8_t r = scale(c, k);
456
457 // Store 8 rgba pixels.
458 if (kBGR1 == format) {
459 pixels.val[3] = vdup_n_u8(0xFF);
460 pixels.val[2] = r;
461 pixels.val[1] = g;
462 pixels.val[0] = b;
463 } else {
464 pixels.val[3] = vdup_n_u8(0xFF);
465 pixels.val[2] = b;
466 pixels.val[1] = g;
467 pixels.val[0] = r;
468 }
469 vst4_u8((uint8_t*) dst, pixels);
470 src += 8;
471 dst += 8;
472 count -= 8;
473 }
474
475 auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CM YK_to_RGB1_portable;
476 proc(dst, src, count);
477 }
478
479 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
480 inverted_cmyk_to<kRGB1>(dst, src, count);
481 }
482
483 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
484 inverted_cmyk_to<kBGR1>(dst, src, count);
485 }
486
404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 487 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
405 488
406 // Scale a byte by another. 489 // Scale a byte by another.
407 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. 490 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits.
408 static __m128i scale(__m128i x, __m128i y) { 491 static __m128i scale(__m128i x, __m128i y) {
409 const __m128i _128 = _mm_set1_epi16(128); 492 const __m128i _128 = _mm_set1_epi16(128);
410 const __m128i _257 = _mm_set1_epi16(257); 493 const __m128i _257 = _mm_set1_epi16(257);
411 494
412 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. 495 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255.
413 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); 496 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257);
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
624 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); 707 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi);
625 708
626 src += 8*2; 709 src += 8*2;
627 dst += 8; 710 dst += 8;
628 count -= 8; 711 count -= 8;
629 } 712 }
630 713
631 grayA_to_rgbA_portable(dst, src, count); 714 grayA_to_rgbA_portable(dst, src, count);
632 } 715 }
633 716
717 enum Format { kRGB1, kBGR1 };
718 template <Format format>
719 static void inverted_cmyk_to(uint32_t* dst, const void* vsrc, int count) {
720 auto src = (const uint32_t*)vsrc;
721
722 auto convert8 = [](__m128i* lo, __m128i* hi) {
723 const __m128i zeros = _mm_setzero_si128();
724 __m128i planar;
725 if (kBGR1 == format) {
726 planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15);
727 } else {
728 planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15);
729 }
730
731 // Swizzle the pixels to 8-bit planar.
732 *lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yyyykkkk
733 *hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YYYYKKKK
734 __m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mmmmMMMM
735 yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kkkkKKKK
736
737 // Unpack to 16-bit planar.
738 __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_C_C_C_
739 m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_M_M_M_
740 y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_Y_Y_Y_
741 k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_K_K_K_
742
743 // Scale to r, g, b.
744 __m128i r = scale(c, k),
745 g = scale(m, k),
746 b = scale(y, k);
747
748 // Repack into interlaced pixels.
749 __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RGRGRGRG
750 ba = _mm_or_si128(b, _mm_set1_epi16((uint16_t) 0xFF00)); // b1b1b1b1 B1B1B1B1
751 *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rgbargba
752 *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RGB1RGB1
753 };
754
755 while (count >= 8) {
756 __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)),
757 hi = _mm_loadu_si128((const __m128i*) (src + 4));
758
759 convert8(&lo, &hi);
760
761 _mm_storeu_si128((__m128i*) (dst + 0), lo);
762 _mm_storeu_si128((__m128i*) (dst + 4), hi);
763
764 src += 8;
765 dst += 8;
766 count -= 8;
767 }
768
769 if (count >= 4) {
770 __m128i lo = _mm_loadu_si128((const __m128i*) src),
771 hi = _mm_setzero_si128();
772
773 convert8(&lo, &hi);
774
775 _mm_storeu_si128((__m128i*) dst, lo);
776
777 src += 4;
778 dst += 4;
779 count -= 4;
780 }
781
782 auto proc = (kBGR1 == format) ? inverted_CMYK_to_BGR1_portable : inverted_CM YK_to_RGB1_portable;
783 proc(dst, src, count);
784 }
785
786 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
787 inverted_cmyk_to<kRGB1>(dst, src, count);
788 }
789
790 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
791 inverted_cmyk_to<kBGR1>(dst, src, count);
792 }
793
634 #else 794 #else
635 795
636 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 796 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
637 RGBA_to_rgbA_portable(dst, src, count); 797 RGBA_to_rgbA_portable(dst, src, count);
638 } 798 }
639 799
640 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 800 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
641 RGBA_to_bgrA_portable(dst, src, count); 801 RGBA_to_bgrA_portable(dst, src, count);
642 } 802 }
643 803
(...skipping 14 matching lines...) Expand all
658 } 818 }
659 819
660 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { 820 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
661 grayA_to_RGBA_portable(dst, src, count); 821 grayA_to_RGBA_portable(dst, src, count);
662 } 822 }
663 823
664 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { 824 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
665 grayA_to_rgbA_portable(dst, src, count); 825 grayA_to_rgbA_portable(dst, src, count);
666 } 826 }
667 827
828 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
829 inverted_CMYK_to_RGB1_portable(dst, src, count);
830 }
831
832 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
833 inverted_CMYK_to_BGR1_portable(dst, src, count);
834 }
835
668 #endif 836 #endif
669 837
670 } 838 }
671 839
672 #endif // SkSwizzler_opts_DEFINED 840 #endif // SkSwizzler_opts_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698