Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2014 The Android Open Source Project | 2 * Copyright 2014 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkColor_opts_SSE2_DEFINED | 8 #ifndef SkColor_opts_SSE2_DEFINED |
| 9 #define SkColor_opts_SSE2_DEFINED | 9 #define SkColor_opts_SSE2_DEFINED |
| 10 | 10 |
| 11 #include <emmintrin.h> | 11 #include <emmintrin.h> |
| 12 | 12 |
| 13 static inline __m128i SkAlpha255To256_SSE2(const __m128i& alpha) { | |
| 14 return _mm_add_epi32(alpha, _mm_set1_epi32(1)); | |
| 15 } | |
| 16 | |
| 13 // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp. | 17 // See #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) in SkXfermode.cpp. |
| 14 static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a, | 18 static inline __m128i SkAlphaMulAlpha_SSE2(const __m128i& a, |
| 15 const __m128i& b) { | 19 const __m128i& b) { |
| 16 __m128i prod = _mm_mullo_epi16(a, b); | 20 __m128i prod = _mm_mullo_epi16(a, b); |
| 17 prod = _mm_add_epi32(prod, _mm_set1_epi32(128)); | 21 prod = _mm_add_epi32(prod, _mm_set1_epi32(128)); |
| 18 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); | 22 prod = _mm_add_epi32(prod, _mm_srli_epi32(prod, 8)); |
| 19 prod = _mm_srli_epi32(prod, 8); | 23 prod = _mm_srli_epi32(prod, 8); |
| 20 | 24 |
| 21 return prod; | 25 return prod; |
| 22 } | 26 } |
| 23 | 27 |
| 28 // Portable version SkAlphaMulQ is in SkColorPriv.h. | |
| 29 static inline __m128i SkAlphaMulQ_SSE2(const __m128i& c, const __m128i& scale) { | |
|
mtklein
2014/04/11 18:41:54
Is this still the best way to do this with SSE2?
qiankun
2014/04/14 02:33:01
Unfortunately, there is no 8-bits multiply funcito
| |
| 30 __m128i mask = _mm_set1_epi32(gMask_00FF00FF); | |
| 31 __m128i s = _mm_or_si128(_mm_slli_epi32(scale, 16), scale); | |
| 32 | |
| 33 // uint32_t rb = ((c & mask) * scale) >> 8 | |
| 34 __m128i rb = _mm_and_si128(mask, c); | |
| 35 rb = _mm_mullo_epi16(rb, s); | |
| 36 rb = _mm_srli_epi16(rb, 8); | |
| 37 | |
| 38 // uint32_t ag = ((c >> 8) & mask) * scale | |
| 39 __m128i ag = _mm_srli_epi16(c, 8); | |
| 40 ag = _mm_and_si128(ag, mask); | |
| 41 ag = _mm_mullo_epi16(ag, s); | |
| 42 | |
| 43 // (rb & mask) | (ag & ~mask) | |
| 44 rb = _mm_and_si128(mask, rb); | |
| 45 ag = _mm_andnot_si128(mask, ag); | |
| 46 return _mm_or_si128(rb, ag); | |
| 47 } | |
| 48 | |
| 24 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) { | 49 static inline __m128i SkGetPackedA32_SSE2(const __m128i& src) { |
| 25 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); | 50 __m128i a = _mm_slli_epi32(src, (24 - SK_A32_SHIFT)); |
| 26 return _mm_srli_epi32(a, 24); | 51 return _mm_srli_epi32(a, 24); |
| 27 } | 52 } |
| 28 | 53 |
| 29 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) { | 54 static inline __m128i SkGetPackedR32_SSE2(const __m128i& src) { |
| 30 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT)); | 55 __m128i r = _mm_slli_epi32(src, (24 - SK_R32_SHIFT)); |
| 31 return _mm_srli_epi32(r, 24); | 56 return _mm_srli_epi32(r, 24); |
| 32 } | 57 } |
| 33 | 58 |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 137 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); | 162 b2 = _mm_and_si128(b2, _mm_set1_epi32(SK_B16_MASK)); |
| 138 __m128i b = _mm_packs_epi32(b1, b2); | 163 __m128i b = _mm_packs_epi32(b1, b2); |
| 139 | 164 |
| 140 // Store 8 16-bit colors in dst. | 165 // Store 8 16-bit colors in dst. |
| 141 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); | 166 __m128i d_pixel = SkPackRGB16_SSE2(r, g, b); |
| 142 | 167 |
| 143 return d_pixel; | 168 return d_pixel; |
| 144 } | 169 } |
| 145 | 170 |
| 146 #endif // SkColor_opts_SSE2_DEFINED | 171 #endif // SkColor_opts_SSE2_DEFINED |
| OLD | NEW |