src/opts/SkSwizzler_opts.h - Issue 1676773003: Optimize CMYK->RGBA (BGRA) transform for jpeg decodes

Side by Side Diff: src/opts/SkSwizzler_opts.h

Issue 1676773003: Optimize CMYK->RGBA (BGRA) transform for jpeg decodes (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Rename inverted_CMYK Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2016 Google Inc.	2 * Copyright 2016 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkSwizzler_opts_DEFINED	8 #ifndef SkSwizzler_opts_DEFINED

9 #define SkSwizzler_opts_DEFINED	9 #define SkSwizzler_opts_DEFINED

10	10

(...skipping 107 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
118 a = src[1];	118 a = src[1];

119 src += 2;	119 src += 2;

120 g = (g*a+127)/255;	120 g = (g*a+127)/255;

121 dst[i] = (uint32_t)a << 24	121 dst[i] = (uint32_t)a << 24

122 \| (uint32_t)g << 16	122 \| (uint32_t)g << 16

123 \| (uint32_t)g << 8	123 \| (uint32_t)g << 8

124 \| (uint32_t)g << 0;	124 \| (uint32_t)g << 0;

125 }	125 }

126 }	126 }

127	127

	128 static void inverted_CMYK_to_RGB1_portable(uint32_t* dst, const void* vsrc, int count) {

	129 const uint32_t* src = (const uint32_t*)vsrc;

	130 for (int i = 0; i < count; i++) {

	131 uint8_t k = src[i] >> 24,

	132 y = src[i] >> 16,

	133 m = src[i] >> 8,

	134 c = src[i] >> 0;

	135 // See comments in SkSwizzler.cpp for details on the conversion formula.

	136 uint8_t b = (y*k+127)/255,

	137 g = (m*k+127)/255,

	138 r = (c*k+127)/255;

	139 dst[i] = (uint32_t)0xFF << 24

	140 \| (uint32_t) b << 16

	141 \| (uint32_t) g << 8

	142 \| (uint32_t) r << 0;

	143 }

	144 }

	145

	146 static void inverted_CMYK_to_BGR1_portable(uint32_t* dst, const void* vsrc, int count) {

	147 const uint32_t* src = (const uint32_t*)vsrc;

	148 for (int i = 0; i < count; i++) {

	149 uint8_t k = src[i] >> 24,

	150 y = src[i] >> 16,

	151 m = src[i] >> 8,

	152 c = src[i] >> 0;

	153 uint8_t b = (y*k+127)/255,

	154 g = (m*k+127)/255,

	155 r = (c*k+127)/255;

	156 dst[i] = (uint32_t)0xFF << 24

	157 \| (uint32_t) r << 16

	158 \| (uint32_t) g << 8

	159 \| (uint32_t) b << 0;

	160 }

	161 }

	162

128 #if defined(SK_ARM_HAS_NEON)	163 #if defined(SK_ARM_HAS_NEON)

129	164

130 // Rounded divide by 255, (x + 127) / 255	165 // Rounded divide by 255, (x + 127) / 255

131 static uint8x8_t div255_round(uint16x8_t x) {	166 static uint8x8_t div255_round(uint16x8_t x) {

132 // result = (x + 127) / 255	167 // result = (x + 127) / 255

133 // result = (x + 127) / 256 + error1	168 // result = (x + 127) / 256 + error1

134 //	169 //

135 // error1 = (x + 127) / (255 * 256)	170 // error1 = (x + 127) / (255 * 256)

136 // error1 = (x + 127) / (256 * 256) + error2	171 // error1 = (x + 127) / (256 * 256) + error2

137 //	172 //

(...skipping 256 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
394 }	429 }

395	430

396 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {	431 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {

397 expand_grayA<false>(dst, src, count);	432 expand_grayA<false>(dst, src, count);

398 }	433 }

399	434

400 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {	435 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {

401 expand_grayA<true>(dst, src, count);	436 expand_grayA<true>(dst, src, count);

402 }	437 }

403	438

	439 template <bool kSwapRB>
	mtklein 2016/02/08 17:31:26 Let's do some renaming to get "swap" out of these Let's do some renaming to get "swap" out of these names? It's sort of weird to say swap when there's no starting red and blue values. E.g. enum Format { kRGB1, kBGR1 }; template <Format format> static void inverted_cmyk_to(...) { } (Called as inverted_cmyk_to_<kRGB1>, inverted_cmyk_to<kBGR1>) msarett 2016/02/08 18:09:47 Done. Show quoted text On 2016/02/08 17:31:26, mtklein wrote: > Let's do some renaming to get "swap" out of these names? It's sort of weird to > say swap when there's no starting red and blue values. E.g. > > enum Format { kRGB1, kBGR1 }; > > template <Format format> > static void inverted_cmyk_to(...) { > > > } > (Called as inverted_cmyk_to_<kRGB1>, inverted_cmyk_to<kBGR1>) Done.
	440 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) {

	441 auto src = (const uint32_t*)vsrc;

	442 while (count >= 8) {

	443 // Load 8 cmyk pixels.

	444 uint8x8x4_t pixels = vld4_u8((const uint8_t*) src);

	445

	446 uint8x8_t k = pixels.val[3],

	447 y = pixels.val[2],

	448 m = pixels.val[1],

	449 c = pixels.val[0];

	450

	451 // Scale to r, g, b.

	452 uint8x8_t b = scale(y, k);

	453 uint8x8_t g = scale(m, k);

	454 uint8x8_t r = scale(c, k);

	455

	456 // Store 8 rgba pixels.

	457 if (kSwapRB) {

	458 pixels.val[3] = vdup_n_u8(0xFF);

	459 pixels.val[2] = r;

	460 pixels.val[1] = g;

	461 pixels.val[0] = b;

	462 } else {

	463 pixels.val[3] = vdup_n_u8(0xFF);

	464 pixels.val[2] = b;

	465 pixels.val[1] = g;

	466 pixels.val[0] = r;

	467 }

	468 vst4_u8((uint8_t*) dst, pixels);

	469 src += 8;

	470 dst += 8;

	471 count -= 8;

	472 }

	473

	474 auto proc = kSwapRB ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1 _portable;

	475 proc(dst, src, count);

	476 }

	477

	478 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {

	479 cmyk_should_swapRB<false>(dst, src, count);

	480 }

	481

	482 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {

	483 cmyk_should_swapRB<true>(dst, src, count);

	484 }

	485

404 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3	486 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3

405	487

406 // Scale a byte by another.	488 // Scale a byte by another.

407 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits.	489 // Inputs are stored in 16-bit lanes, but are not larger than 8-bits.

408 static __m128i scale(__m128i x, __m128i y) {	490 static __m128i scale(__m128i x, __m128i y) {

409 const __m128i _128 = _mm_set1_epi16(128);	491 const __m128i _128 = _mm_set1_epi16(128);

410 const __m128i _257 = _mm_set1_epi16(257);	492 const __m128i _257 = _mm_set1_epi16(257);

411	493

412 // (x+127)/255 == ((x+128)257)>>16 for 0 <= x <= 255255.	494 // (x+127)/255 == ((x+128)257)>>16 for 0 <= x <= 255255.

413 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257);	495 return _mm_mulhi_epu16(_mm_add_epi16(_mm_mullo_epi16(x, y), _128), _257);

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
624 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi);	706 _mm_storeu_si128((__m128i*) (dst + 4), ggga_hi);

625	707

626 src += 8*2;	708 src += 8*2;

627 dst += 8;	709 dst += 8;

628 count -= 8;	710 count -= 8;

629 }	711 }

630	712

631 grayA_to_rgbA_portable(dst, src, count);	713 grayA_to_rgbA_portable(dst, src, count);

632 }	714 }

633	715

	716 template <bool kSwapRB>

	717 static void cmyk_should_swapRB(uint32_t* dst, const void* vsrc, int count) {

	718 auto src = (const uint32_t*)vsrc;

	719

	720 auto convert8 = [](__m128i* lo, __m128i* hi) {

	721 const __m128i zeros = _mm_setzero_si128();

	722 __m128i planar;

	723 if (kSwapRB) {

	724 planar = _mm_setr_epi8(2,6,10,14, 1,5,9,13, 0,4,8,12, 3,7,11,15);

	725 } else {

	726 planar = _mm_setr_epi8(0,4,8,12, 1,5,9,13, 2,6,10,14, 3,7,11,15);

	727 }

	728

	729 // Swizzle the pixels to 8-bit planar.

	730 lo = _mm_shuffle_epi8(lo, planar); // ccccmmmm yy yykkkk

	731 hi = _mm_shuffle_epi8(hi, planar); // CCCCMMMM YY YYKKKK

	732 __m128i cm = _mm_unpacklo_epi32(lo, hi), // ccccCCCC mm mmMMMM

	733 yk = _mm_unpackhi_epi32(lo, hi); // yyyyYYYY kk kkKKKK

	734

	735 // Unpack to 16-bit planar.

	736 __m128i c = _mm_unpacklo_epi8(cm, zeros), // c_c_c_c_ C_ C_C_C_

	737 m = _mm_unpackhi_epi8(cm, zeros), // m_m_m_m_ M_ M_M_M_

	738 y = _mm_unpacklo_epi8(yk, zeros), // y_y_y_y_ Y_ Y_Y_Y_

	739 k = _mm_unpackhi_epi8(yk, zeros); // k_k_k_k_ K_ K_K_K_

	740

	741 // Scale to r, g, b.

	742 __m128i r = scale(c, k),

	743 g = scale(m, k),

	744 b = scale(y, k);

	745

	746 // Repack into interlaced pixels.

	747 __m128i rg = _mm_or_si128(r, _mm_slli_epi16(g, 8)), // rgrgrgrg RG RGRGRG

	748 ba = _mm_or_si128(b, _mm_set1_epi16(0xFF00)); // b1b1b1b1 B1 B1B1B1

	749 *lo = _mm_unpacklo_epi16(rg, ba); // rgbargba rg bargba

	750 *hi = _mm_unpackhi_epi16(rg, ba); // RGB1RGB1 RG B1RGB1

	751 };

	752

	753 while (count >= 8) {

	754 __m128i lo = _mm_loadu_si128((const __m128i*) (src + 0)),

	755 hi = _mm_loadu_si128((const __m128i*) (src + 4));

	756

	757 convert8(&lo, &hi);

	758

	759 _mm_storeu_si128((__m128i*) (dst + 0), lo);

	760 _mm_storeu_si128((__m128i*) (dst + 4), hi);

	761

	762 src += 8;

	763 dst += 8;

	764 count -= 8;

	765 }

	766

	767 if (count >= 4) {

	768 __m128i lo = _mm_loadu_si128((const __m128i*) src),

	769 hi = _mm_setzero_si128();

	770

	771 convert8(&lo, &hi);

	772

	773 _mm_storeu_si128((__m128i*) dst, lo);

	774

	775 src += 4;

	776 dst += 4;

	777 count -= 4;

	778 }

	779

	780 auto proc = kSwapRB ? inverted_CMYK_to_BGR1_portable : inverted_CMYK_to_RGB1 _portable;

	781 proc(dst, src, count);

	782 }

	783

	784 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {

	785 cmyk_should_swapRB<false>(dst, src, count);

	786 }

	787

	788 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {

	789 cmyk_should_swapRB<true>(dst, src, count);

	790 }

	791

634 #else	792 #else

635	793

636 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {	794 static void RGBA_to_rgbA(uint32_t* dst, const void* src, int count) {

637 RGBA_to_rgbA_portable(dst, src, count);	795 RGBA_to_rgbA_portable(dst, src, count);

638 }	796 }

639	797

640 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {	798 static void RGBA_to_bgrA(uint32_t* dst, const void* src, int count) {

641 RGBA_to_bgrA_portable(dst, src, count);	799 RGBA_to_bgrA_portable(dst, src, count);

642 }	800 }

643	801

(...skipping 14 matching lines...) Expand all Loading...
658 }	816 }

659	817

660 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {	818 static void grayA_to_RGBA(uint32_t dst[], const void* src, int count) {

661 grayA_to_RGBA_portable(dst, src, count);	819 grayA_to_RGBA_portable(dst, src, count);

662 }	820 }

663	821

664 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {	822 static void grayA_to_rgbA(uint32_t dst[], const void* src, int count) {

665 grayA_to_rgbA_portable(dst, src, count);	823 grayA_to_rgbA_portable(dst, src, count);

666 }	824 }

667	825

	826 static void inverted_CMYK_to_RGB1(uint32_t dst[], const void* src, int count) {

	827 inverted_CMYK_to_RGB1_portable(dst, src, count);

	828 }

	829

	830 static void inverted_CMYK_to_BGR1(uint32_t dst[], const void* src, int count) {

	831 inverted_CMYK_to_BGR1_portable(dst, src, count);

	832 }

	833

668 #endif	834 #endif

669	835

670 }	836 }

671	837

672 #endif // SkSwizzler_opts_DEFINED	838 #endif // SkSwizzler_opts_DEFINED

OLD	NEW

« no previous file with comments | « src/opts/SkOpts_ssse3.cpp ('k') | no next file » | no next file with comments »