src/opts/SkColor_opts_SSE2.h - Issue 232793002: Xfermode: SSE2 implementation of a number of simple transfer modes

Side by Side Diff: src/opts/SkColor_opts_SSE2.h

Issue 232793002: Xfermode: SSE2 implementation of a number of simple transfer modes (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 The Android Open Source Project	2 * Copyright 2014 The Android Open Source Project

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkColor_opts_SSE2_DEFINED	8 #ifndef SkColor_opts_SSE2_DEFINED

9 #define SkColor_opts_SSE2_DEFINED	9 #define SkColor_opts_SSE2_DEFINED

10	10

11 #include <emmintrin.h>	11 #include <emmintrin.h>

12	12

	13 static inline __m128i SkAlpha255To256_SSE2(const __m128i& alpha) {

	14 return _mm_add_epi32(alpha, _mm_set1_epi32(1));

	15 }

	16

13 // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp.	17 // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp.

14 static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a,	18 static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a,

15 const __m128i& b) {	19 const __m128i& b) {

16 __m128i prod = _mm_mullo_epi16(a, b);	20 __m128i prod = _mm_mullo_epi16(a, b);

17 prod = _mm_add_epi32(prod, _mm_set1_epi32(128));	21 prod = _mm_add_epi32(prod, _mm_set1_epi32(128));

18 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8));	22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8));

19 prod = _mm_srli_epi32(prod, 8);	23 prod = _mm_srli_epi32(prod, 8);

20	24

21 return prod;	25 return prod;

22 }	26 }

23	27

	28 // Portable version SkAlphaMulQ is in SkColorPriv.h.

	29 static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const __m128i& scale) {
	mtklein 2014/04/11 18:41:54 Is this still the best way to do this with SSE2? Is this still the best way to do this with SSE2? You see we're doing a sort of 4x8-bit SIMD in 32-bit ints, but we have to split them in two because the 88 bit multiply results in 16 bits. Any way to perhaps just do this directly in on SSE register instead of splitting into rb and ag? Or I guess that would need a non-existent _mm_mullo_epi8? qiankun* 2014/04/14 02:33:01 Unfortunately, there is no 8-bits multiply funcito Show quoted text On 2014/04/11 18:41:54, mtklein wrote: > Is this still the best way to do this with SSE2? You see we're doing a sort of > 4x8-bit SIMD in 32-bit ints, but we have to split them in two because the 8*8 > bit multiply results in 16 bits. Any way to perhaps just do this directly in on > SSE register instead of splitting into rb and ag? Or I guess that would need a > non-existent _mm_mullo_epi8? Unfortunately, there is no 8-bits multiply funciton in SSE2 intrinsics. So keep the current SSE2 implementation now.
	30 __m128i mask = _mm_set1_epi32(gMask_00FF00FF);

	31 __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale);

	32

	33 // uint32_t rb = ((c & mask) * scale) >> 8

	34 __m128i rb = _mm_and_si128(mask, c);

	35 rb = _mm_mullo_epi16(rb, s);

	36 rb = _mm_srli_epi16(rb, 8);

	37

	38 // uint32_t ag = ((c >> 8) & mask) * scale

	39 __m128i ag = _mm_srli_epi16(c, 8);

	40 ag = _mm_and_si128(ag, mask);

	41 ag = _mm_mullo_epi16(ag, s);

	42

	43 // (rb & mask) \| (ag & ~mask)

	44 rb = _mm_and_si128(mask, rb);

	45 ag = _mm_andnot_si128(mask, ag);

	46 return _mm_or_si128(rb, ag);

	47 }

	48

24 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) {	49 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) {

25 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT));	50 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT));

26 return _mm_srli_epi32(a, 24);	51 return _mm_srli_epi32(a, 24);

27 }	52 }

28	53

29 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) {	54 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) {

30 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT));	55 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT));

31 return _mm_srli_epi32(r, 24);	56 return _mm_srli_epi32(r, 24);

32 }	57 }

33	58

(...skipping 103 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
137 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK));	162 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK));

138 __m128i b = _mm_packs_epi32(b1, b2);	163 __m128i b = _mm_packs_epi32(b1, b2);

139	164

140 // Store 8 16-bit colors in dst.	165 // Store 8 16-bit colors in dst.

141 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b);	166 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b);

142	167

143 return d_pixel;	168 return d_pixel;

144 }	169 }

145	170

146 #endif // SkColor_opts_SSE2_DEFINED	171 #endif // SkColor_opts_SSE2_DEFINED

OLD	NEW

« no previous file with comments | « no previous file | src/opts/SkXfermode_opts_SSE2.cpp » ('j') | src/opts/SkXfermode_opts_SSE2.cpp » ('J')