OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2014 The Android Open Source Project | 2 * Copyright 2014 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkColor_opts_SSE2_DEFINED | 8 #ifndef SkColor_opts_SSE2_DEFINED |
9 #define SkColor_opts_SSE2_DEFINED | 9 #define SkColor_opts_SSE2_DEFINED |
10 | 10 |
11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
12 | 12 |
13 static inline __m128i SkAlpha255To256_SSE2(const __m128i& alpha) { | |
14 return _mm_add_epi32(alpha, _mm_set1_epi32(1)); | |
15 } | |
16 | |
13 // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp. | 17 // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp. |
14 static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a, | 18 static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a, |
15 const __m128i& b) { | 19 const __m128i& b) { |
16 __m128i prod = _mm_mullo_epi16(a, b); | 20 __m128i prod = _mm_mullo_epi16(a, b); |
17 prod = _mm_add_epi32(prod, _mm_set1_epi32(128)); | 21 prod = _mm_add_epi32(prod, _mm_set1_epi32(128)); |
18 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); | 22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); |
19 prod = _mm_srli_epi32(prod, 8); | 23 prod = _mm_srli_epi32(prod, 8); |
20 | 24 |
21 return prod; | 25 return prod; |
22 } | 26 } |
23 | 27 |
28 // Portable version SkAlphaMulQ is in SkColorPriv.h. | |
29 static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const __m128i& scale) { | |
mtklein
2014/04/11 18:41:54
Is this still the best way to do this with SSE2?
qiankun
2014/04/14 02:33:01
Unfortunately, there is no 8-bits multiply funcito
| |
30 __m128i mask = _mm_set1_epi32(gMask_00FF00FF); | |
31 __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale); | |
32 | |
33 // uint32_t rb = ((c & mask) * scale) >> 8 | |
34 __m128i rb = _mm_and_si128(mask, c); | |
35 rb = _mm_mullo_epi16(rb, s); | |
36 rb = _mm_srli_epi16(rb, 8); | |
37 | |
38 // uint32_t ag = ((c >> 8) & mask) * scale | |
39 __m128i ag = _mm_srli_epi16(c, 8); | |
40 ag = _mm_and_si128(ag, mask); | |
41 ag = _mm_mullo_epi16(ag, s); | |
42 | |
43 // (rb & mask) | (ag & ~mask) | |
44 rb = _mm_and_si128(mask, rb); | |
45 ag = _mm_andnot_si128(mask, ag); | |
46 return _mm_or_si128(rb, ag); | |
47 } | |
48 | |
24 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) { | 49 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) { |
25 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); | 50 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); |
26 return _mm_srli_epi32(a, 24); | 51 return _mm_srli_epi32(a, 24); |
27 } | 52 } |
28 | 53 |
29 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) { | 54 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) { |
30 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT)); | 55 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT)); |
31 return _mm_srli_epi32(r, 24); | 56 return _mm_srli_epi32(r, 24); |
32 } | 57 } |
33 | 58 |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
137 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); | 162 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); |
138 __m128i b = _mm_packs_epi32(b1, b2); | 163 __m128i b = _mm_packs_epi32(b1, b2); |
139 | 164 |
140 // Store 8 16-bit colors in dst. | 165 // Store 8 16-bit colors in dst. |
141 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); | 166 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); |
142 | 167 |
143 return d_pixel; | 168 return d_pixel; |
144 } | 169 } |
145 | 170 |
146 #endif // SkColor_opts_SSE2_DEFINED | 171 #endif // SkColor_opts_SSE2_DEFINED |
OLD | NEW |