OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkPx_sse_DEFINED | 8 #ifndef SkPx_sse_DEFINED |
9 #define SkPx_sse_DEFINED | 9 #define SkPx_sse_DEFINED |
10 | 10 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
54 struct Alpha { | 54 struct Alpha { |
55 __m128i fVec; | 55 __m128i fVec; |
56 Alpha(__m128i vec) : fVec(vec) {} | 56 Alpha(__m128i vec) : fVec(vec) {} |
57 | 57 |
58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); } | 58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); } |
59 static Alpha Load(const uint8_t* a) { | 59 static Alpha Load(const uint8_t* a) { |
60 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____
____ 3210 | 60 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____
____ 3210 |
61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1,
0,0,0,0)); | 62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1,
0,0,0,0)); |
63 #else | 63 #else |
64 as = _mm_unpacklo_epi8 (as, _mm_setzero_si128()); // ____ ____
_3_2 _1_0 | 64 as = _mm_unpacklo_epi8 (as, as); // ____ ____
3322 1100 |
65 as = _mm_unpacklo_epi16(as, _mm_setzero_si128()); // ___3 ___2
___1 ___0 | 65 as = _mm_unpacklo_epi16(as, as); // 3333 2222
1111 0000 |
66 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22
__11 __00 | 66 return as; |
67 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222
1111 0000 | |
68 #endif | 67 #endif |
69 } | 68 } |
70 static Alpha Load(const uint8_t* a, int n) { | 69 static Alpha Load(const uint8_t* a, int n) { |
71 SkASSERT(n > 0 && n < 4); | 70 SkASSERT(n > 0 && n < 4); |
72 uint8_t a4[] = { 0,0,0,0 }; | 71 uint8_t a4[] = { 0,0,0,0 }; |
73 switch (n) { | 72 switch (n) { |
74 case 3: a4[2] = a[2]; // fall through | 73 case 3: a4[2] = a[2]; // fall through |
75 case 2: a4[1] = a[1]; // fall through | 74 case 2: a4[1] = a[1]; // fall through |
76 case 1: a4[0] = a[0]; | 75 case 1: a4[0] = a[0]; |
77 } | 76 } |
(...skipping 23 matching lines...) Expand all Loading... |
101 SkPx addNarrowHi(const SkPx& o) const { | 100 SkPx addNarrowHi(const SkPx& o) const { |
102 Wide sum = (*this + o.widenLo()).shr<8>(); | 101 Wide sum = (*this + o.widenLo()).shr<8>(); |
103 return _mm_packus_epi16(sum.fLo, sum.fHi); | 102 return _mm_packus_epi16(sum.fLo, sum.fHi); |
104 } | 103 } |
105 }; | 104 }; |
106 | 105 |
107 Alpha alpha() const { | 106 Alpha alpha() const { |
108 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 107 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
109 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7
,7,7, 3,3,3,3)); | 108 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7
,7,7, 3,3,3,3)); |
110 #else | 109 #else |
111 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0 | 110 // We exploit that A >= rgb for any premul pixel. |
112 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 | 111 __m128i as = fVec; // 3xxx 2xxx 1xxx 0xxx |
113 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000 | 112 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx |
| 113 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 |
| 114 return as; |
114 #endif | 115 #endif |
115 } | 116 } |
116 | 117 |
117 Wide widenLo() const { | 118 Wide widenLo() const { |
118 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()), | 119 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()), |
119 _mm_unpackhi_epi8(fVec, _mm_setzero_si128())); | 120 _mm_unpackhi_epi8(fVec, _mm_setzero_si128())); |
120 } | 121 } |
121 Wide widenHi() const { | 122 Wide widenHi() const { |
122 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec), | 123 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec), |
123 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec)); | 124 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec)); |
(...skipping 21 matching lines...) Expand all Loading... |
145 SkPx addAlpha(const Alpha& a) const { | 146 SkPx addAlpha(const Alpha& a) const { |
146 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000
0))); | 147 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000
0))); |
147 } | 148 } |
148 }; | 149 }; |
149 | 150 |
150 } // namespace sse | 151 } // namespace sse |
151 | 152 |
152 typedef sse::SkPx SkPx; | 153 typedef sse::SkPx SkPx; |
153 | 154 |
154 #endif//SkPx_sse_DEFINED | 155 #endif//SkPx_sse_DEFINED |
OLD | NEW |