OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 namespace { // See Sk4px.h | 8 namespace { // See Sk4px.h |
9 | 9 |
10 inline Sk4px Sk4px::DupPMColor(SkPMColor px) { return Sk16b(_mm_set1_epi32(px)); } | 10 inline Sk4px Sk4px::DupPMColor(SkPMColor px) { return Sk16b(_mm_set1_epi32(px)); } |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
56 } | 56 } |
57 | 57 |
58 inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) { | 58 inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) { |
59 uint32_t as = *(const uint32_t*)a; | 59 uint32_t as = *(const uint32_t*)a; |
60 __m128i splat = _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0); | 60 __m128i splat = _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1, 0,0,0,0); |
61 return Sk16b(_mm_shuffle_epi8(_mm_cvtsi32_si128(as), splat)); | 61 return Sk16b(_mm_shuffle_epi8(_mm_cvtsi32_si128(as), splat)); |
62 } | 62 } |
63 #else | 63 #else |
64 inline Sk4px Sk4px::alphas() const { | 64 inline Sk4px Sk4px::alphas() const { |
65 static_assert(SK_A32_SHIFT == 24, "Intel's always little-endian."); | 65 static_assert(SK_A32_SHIFT == 24, "Intel's always little-endian."); |
66 __m128i as = _mm_srli_epi32(this->fVec, 24); // ___3 ___2 ___1 ___0 | 66 // We exploit that A >= rgb for any premul pixel. |
msarett
2015/11/17 19:59:57
Clever :)
| |
67 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 | 67 __m128i as = fVec; // 3xxx 2xxx 1xxx 0xxx |
68 as = _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000 | 68 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx |
69 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 | |
69 return Sk16b(as); | 70 return Sk16b(as); |
70 } | 71 } |
71 | 72 |
72 inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) { | 73 inline Sk4px Sk4px::Load4Alphas(const SkAlpha a[4]) { |
73 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____ ____ 3210 | 74 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____ ____ 3210 |
74 as = _mm_unpacklo_epi8 (as, _mm_setzero_si128()); // ____ ____ _3_2 _1_0 | 75 as = _mm_unpacklo_epi8 (as, as); // ____ ____ 3322 1100 |
75 as = _mm_unpacklo_epi16(as, _mm_setzero_si128()); // ___3 ___2 ___1 ___0 | 76 as = _mm_unpacklo_epi16(as, as); // 3333 2222 1111 0000 |
76 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 | |
77 as = _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000 | |
78 return Sk16b(as); | 77 return Sk16b(as); |
79 } | 78 } |
80 #endif | 79 #endif |
81 | 80 |
82 inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) { | 81 inline Sk4px Sk4px::Load2Alphas(const SkAlpha a[2]) { |
83 uint32_t as = *(const uint16_t*)a; // Aa -> Aa00 | 82 uint32_t as = *(const uint16_t*)a; // Aa -> Aa00 |
84 return Load4Alphas((const SkAlpha*)&as); | 83 return Load4Alphas((const SkAlpha*)&as); |
85 } | 84 } |
86 | 85 |
87 inline Sk4px Sk4px::zeroColors() const { | 86 inline Sk4px Sk4px::zeroColors() const { |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
162 uint32_t dst2 = _mm_cvtsi128_si32(narrow_to_565(this->fVec)); | 161 uint32_t dst2 = _mm_cvtsi128_si32(narrow_to_565(this->fVec)); |
163 dst[0] = dst2; | 162 dst[0] = dst2; |
164 dst[1] = dst2 >> 16; | 163 dst[1] = dst2 >> 16; |
165 } | 164 } |
166 inline void Sk4px::store1(SkPMColor16 dst[1]) const { | 165 inline void Sk4px::store1(SkPMColor16 dst[1]) const { |
167 uint32_t dst2 = _mm_cvtsi128_si32(narrow_to_565(this->fVec)); | 166 uint32_t dst2 = _mm_cvtsi128_si32(narrow_to_565(this->fVec)); |
168 dst[0] = dst2; | 167 dst[0] = dst2; |
169 } | 168 } |
170 | 169 |
171 } // namespace | 170 } // namespace |
OLD | NEW |