Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1676773003: Optimize CMYK->RGBA (BGRA) transform for jpeg decodes (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Rename inverted_CMYK Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkSwizzler_opts_DEFINED 8 #ifndef SkSwizzler_opts_DEFINED
9 #define SkSwizzler_opts_DEFINED 9 #define SkSwizzler_opts_DEFINED
10 10
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
118 a = src[1]; 118 a = src[1];
119 src += 2; 119 src += 2;
120 g = (g*a+127)/255; 120 g = (g*a+127)/255;
121 dst[i] = (uint32_t)a << 24 121 dst[i] = (uint32_t)a << 24
122 | (uint32_t)g << 16 122 | (uint32_t)g << 16
123 | (uint32_t)g << 8 123 | (uint32_t)g << 8
124 | (uint32_t)g << 0; 124 | (uint32_t)g << 0;
125 } 125 }
126 } 126 }
127 127
128 static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) {
129 const uint32_t* src = (const uint32_t*)vsrc;
130 for (int i = 0; i < count; i++) {
131 uint8_t k = src[i] >> 24,
132 y = src[i] >> 16,
133 m = src[i] >> 8,
134 c = src[i] >> 0;
135 // See comments in SkSwizzler.cpp for details on the conversion formula.
136 uint8_t b = (y*k+127)/255,
137 g = (m*k+127)/255,
138 r = (c*k+127)/255;
139 dst[i] = (uint32_t)0xFF << 24
140 | (uint32_t) b << 16
141 | (uint32_t) g << 8
142 | (uint32_t) r << 0;
143 }
144 }
145
146 static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) {
147 const uint32_t* src = (const uint32_t*)vsrc;
148 for (int i = 0; i < count; i++) {
149 uint8_t k = src[i] >> 24,
150 y = src[i] >> 16,
151 m = src[i] >> 8,
152 c = src[i] >> 0;
153 uint8_t b = (y*k+127)/255,
154 g = (m*k+127)/255,
155 r = (c*k+127)/255;
156 dst[i] = (uint32_t)0xFF << 24
157 | (uint32_t) r << 16
158 | (uint32_t) g << 8
159 | (uint32_t) b << 0;
160 }
161 }
162
128 #if defined(SK_ARM_HAS_NEON) 163 #if defined(SK_ARM_HAS_NEON)
129 164
130 // Rounded divide by 255, (x + 127) / 255 165 // Rounded divide by 255, (x + 127) / 255
131 static uint8x8_t div255_round(uint16x8_t x) { 166 static uint8x8_t div255_round(uint16x8_t x) {
132 // result = (x + 127) / 255 167 // result = (x + 127) / 255
133 // result = (x + 127) / 256 + error1 168 // result = (x + 127) / 256 + error1
134 // 169 //
135 // error1 = (x + 127) / (255 * 256) 170 // error1 = (x + 127) / (255 * 256)
136 // error1 = (x + 127) / (256 * 256) + error2 171 // error1 = (x + 127) / (256 * 256) + error2
137 // 172 //
(...skipping 256 matching lines...) Expand 10 before | Expand all | Expand 10 after
394 } 429 }
395 430
396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { 431 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
397 expand_grayA<false>(dst, src, count); 432 expand_grayA<false>(dst, src, count);
398 } 433 }
399 434
400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { 435 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
401 expand_grayA<true>(dst, src, count); 436 expand_grayA<true>(dst, src, count);
402 } 437 }
403 438
439 template <bool kSwapRB>
mtklein 2016/02/08 17:31:26 Let's do some renaming to get "swap" out of these
msarett 2016/02/08 18:09:47 Done.
440 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) {
441 auto src = (const uint32_t*)vsrc;
442 while (count >= 8) {
443 // Load 8 cmyk pixels.
444 uint8x8x4_t pixels = vld4_u8((const uint8_t*) src);
445
446 uint8x8_t k = pixels.val[3],
447 y = pixels.val[2],
448 m = pixels.val[1],
449 c = pixels.val[0];
450
451 // Scale to r, g, b.
452 uint8x8_t b = scale(y, k);
453 uint8x8_t g = scale(m, k);
454 uint8x8_t r = scale(c, k);
455
456 // Store 8 rgba pixels.
457 if (kSwapRB) {
458 pixels.val[3] = vdup_n_u8(0xFF);
459 pixels.val[2] = r;
460 pixels.val[1] = g;
461 pixels.val[0] = b;
462 } else {
463 pixels.val[3] = vdup_n_u8(0xFF);
464 pixels.val[2] = b;
465 pixels.val[1] = g;
466 pixels.val[0] = r;
467 }
468 vst4_u8((uint8_t*) dst, pixels);
469 src += 8;
470 dst += 8;
471 count -= 8;
472 }
473
474 auto proc = kSwapRB ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1 _portable;
475 proc(dst, src, count);
476 }
477
478 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
479 cmyk_should_swapRB<false>(dst, src, count);
480 }
481
482 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
483 cmyk_should_swapRB<true>(dst, src, count);
484 }
485
404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 486 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
405 487
406 // Scale a byte by another. 488 // Scale a byte by another.
407 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits. 489 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits.
408 static __m128i scale(__m128i x, __m128i y) { 490 static __m128i scale(__m128i x, __m128i y) {
409 const __m128i _128 = _mm_set1_epi16(128); 491 const __m128i _128 = _mm_set1_epi16(128);
410 const __m128i _257 = _mm_set1_epi16(257); 492 const __m128i _257 = _mm_set1_epi16(257);
411 493
412 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255. 494 // (x+127)/255 == ((x+128)*257)>>16 for 0 <= x <= 255*255.
413 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257); 495 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257);
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
624 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi); 706 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi);
625 707
626 src += 8*2; 708 src += 8*2;
627 dst += 8; 709 dst += 8;
628 count -= 8; 710 count -= 8;
629 } 711 }
630 712
631 grayA_to_rgbA_portable(dst, src, count); 713 grayA_to_rgbA_portable(dst, src, count);
632 } 714 }
633 715
716 template <bool kSwapRB>
717 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) {
718 auto src = (const uint32_t*)vsrc;
719
720 auto convert8 = [](__m128i* lo, __m128i* hi) {
721 const __m128i zeros = _mm_setzero_si128();
722 __m128i planar;
723 if (kSwapRB) {
724 planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15);
725 } else {
726 planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15);
727 }
728
729 // Swizzle the pixels to 8-bit planar.
730 *lo = _mm_shuffle_epi8(*lo, planar); // ccccmmmm yy yykkkk
731 *hi = _mm_shuffle_epi8(*hi, planar); // CCCCMMMM YY YYKKKK
732 __m128i cm = _mm_unpacklo_epi32(*lo, *hi), // ccccCCCC mm mmMMMM
733 yk = _mm_unpackhi_epi32(*lo, *hi); // yyyyYYYY kk kkKKKK
734
735 // Unpack to 16-bit planar.
736 __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_ C_C_C_
737 m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_ M_M_M_
738 y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_ Y_Y_Y_
739 k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_ K_K_K_
740
741 // Scale to r, g, b.
742 __m128i r = scale(c, k),
743 g = scale(m, k),
744 b = scale(y, k);
745
746 // Repack into interlaced pixels.
747 __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RG RGRGRG
748 ba = _mm_or_si128(b, _mm_set1_epi16(0xFF00)); // b1b1b1b1 B1 B1B1B1
749 *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rg bargba
750 *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RG B1RGB1
751 };
752
753 while (count >= 8) {
754 __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)),
755 hi = _mm_loadu_si128((const __m128i*) (src + 4));
756
757 convert8(&lo, &hi);
758
759 _mm_storeu_si128((__m128i*) (dst + 0), lo);
760 _mm_storeu_si128((__m128i*) (dst + 4), hi);
761
762 src += 8;
763 dst += 8;
764 count -= 8;
765 }
766
767 if (count >= 4) {
768 __m128i lo = _mm_loadu_si128((const __m128i*) src),
769 hi = _mm_setzero_si128();
770
771 convert8(&lo, &hi);
772
773 _mm_storeu_si128((__m128i*) dst, lo);
774
775 src += 4;
776 dst += 4;
777 count -= 4;
778 }
779
780 auto proc = kSwapRB ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1 _portable;
781 proc(dst, src, count);
782 }
783
784 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
785 cmyk_should_swapRB<false>(dst, src, count);
786 }
787
788 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
789 cmyk_should_swapRB<true>(dst, src, count);
790 }
791
634 #else 792 #else
635 793
636 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) { 794 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {
637 RGBA_to_rgbA_portable(dst, src, count); 795 RGBA_to_rgbA_portable(dst, src, count);
638 } 796 }
639 797
640 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) { 798 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {
641 RGBA_to_bgrA_portable(dst, src, count); 799 RGBA_to_bgrA_portable(dst, src, count);
642 } 800 }
643 801
(...skipping 14 matching lines...) Expand all
658 } 816 }
659 817
660 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) { 818 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {
661 grayA_to_RGBA_portable(dst, src, count); 819 grayA_to_RGBA_portable(dst, src, count);
662 } 820 }
663 821
664 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) { 822 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {
665 grayA_to_rgbA_portable(dst, src, count); 823 grayA_to_rgbA_portable(dst, src, count);
666 } 824 }
667 825
826 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {
827 inverted_CMYK_to_RGB1_portable(dst, src, count);
828 }
829
830 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {
831 inverted_CMYK_to_BGR1_portable(dst, src, count);
832 }
833
668 #endif 834 #endif
669 835
670 } 836 }
671 837
672 #endif // SkSwizzler_opts_DEFINED 838 #endif // SkSwizzler_opts_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698