| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkPx_sse_DEFINED | 8 #ifndef SkPx_sse_DEFINED |
| 9 #define SkPx_sse_DEFINED | 9 #define SkPx_sse_DEFINED |
| 10 | 10 |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 54 struct Alpha { | 54 struct Alpha { |
| 55 __m128i fVec; | 55 __m128i fVec; |
| 56 Alpha(__m128i vec) : fVec(vec) {} | 56 Alpha(__m128i vec) : fVec(vec) {} |
| 57 | 57 |
| 58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); } | 58 static Alpha Dup(uint8_t a) { return _mm_set1_epi8(a); } |
| 59 static Alpha Load(const uint8_t* a) { | 59 static Alpha Load(const uint8_t* a) { |
| 60 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____
____ 3210 | 60 __m128i as = _mm_cvtsi32_si128(*(const uint32_t*)a); // ____ ____
____ 3210 |
| 61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 61 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1,
0,0,0,0)); | 62 return _mm_shuffle_epi8(as, _mm_set_epi8(3,3,3,3, 2,2,2,2, 1,1,1,1,
0,0,0,0)); |
| 63 #else | 63 #else |
| 64 as = _mm_unpacklo_epi8 (as, _mm_setzero_si128()); // ____ ____
_3_2 _1_0 | 64 as = _mm_unpacklo_epi8 (as, as); // ____ ____
3322 1100 |
| 65 as = _mm_unpacklo_epi16(as, _mm_setzero_si128()); // ___3 ___2
___1 ___0 | 65 as = _mm_unpacklo_epi16(as, as); // 3333 2222
1111 0000 |
| 66 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22
__11 __00 | 66 return as; |
| 67 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222
1111 0000 | |
| 68 #endif | 67 #endif |
| 69 } | 68 } |
| 70 static Alpha Load(const uint8_t* a, int n) { | 69 static Alpha Load(const uint8_t* a, int n) { |
| 71 SkASSERT(n > 0 && n < 4); | 70 SkASSERT(n > 0 && n < 4); |
| 72 uint8_t a4[] = { 0,0,0,0 }; | 71 uint8_t a4[] = { 0,0,0,0 }; |
| 73 switch (n) { | 72 switch (n) { |
| 74 case 3: a4[2] = a[2]; // fall through | 73 case 3: a4[2] = a[2]; // fall through |
| 75 case 2: a4[1] = a[1]; // fall through | 74 case 2: a4[1] = a[1]; // fall through |
| 76 case 1: a4[0] = a[0]; | 75 case 1: a4[0] = a[0]; |
| 77 } | 76 } |
| (...skipping 23 matching lines...) Expand all Loading... |
| 101 SkPx addNarrowHi(const SkPx& o) const { | 100 SkPx addNarrowHi(const SkPx& o) const { |
| 102 Wide sum = (*this + o.widenLo()).shr<8>(); | 101 Wide sum = (*this + o.widenLo()).shr<8>(); |
| 103 return _mm_packus_epi16(sum.fLo, sum.fHi); | 102 return _mm_packus_epi16(sum.fLo, sum.fHi); |
| 104 } | 103 } |
| 105 }; | 104 }; |
| 106 | 105 |
| 107 Alpha alpha() const { | 106 Alpha alpha() const { |
| 108 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 107 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 109 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7
,7,7, 3,3,3,3)); | 108 return _mm_shuffle_epi8(fVec, _mm_set_epi8(15,15,15,15, 11,11,11,11, 7,7
,7,7, 3,3,3,3)); |
| 110 #else | 109 #else |
| 111 __m128i as = _mm_srli_epi32(fVec, 24); // ___3 ___2 ___1 ___0 | 110 // We exploit that A >= rgb for any premul pixel. |
| 112 as = _mm_or_si128(as, _mm_slli_si128(as, 1)); // __33 __22 __11 __00 | 111 __m128i as = fVec; // 3xxx 2xxx 1xxx 0xxx |
| 113 return _mm_or_si128(as, _mm_slli_si128(as, 2)); // 3333 2222 1111 0000 | 112 as = _mm_max_epu8(as, _mm_srli_epi32(as, 8)); // 33xx 22xx 11xx 00xx |
| 113 as = _mm_max_epu8(as, _mm_srli_epi32(as, 16)); // 3333 2222 1111 0000 |
| 114 return as; |
| 114 #endif | 115 #endif |
| 115 } | 116 } |
| 116 | 117 |
| 117 Wide widenLo() const { | 118 Wide widenLo() const { |
| 118 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()), | 119 return Wide(_mm_unpacklo_epi8(fVec, _mm_setzero_si128()), |
| 119 _mm_unpackhi_epi8(fVec, _mm_setzero_si128())); | 120 _mm_unpackhi_epi8(fVec, _mm_setzero_si128())); |
| 120 } | 121 } |
| 121 Wide widenHi() const { | 122 Wide widenHi() const { |
| 122 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec), | 123 return Wide(_mm_unpacklo_epi8(_mm_setzero_si128(), fVec), |
| 123 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec)); | 124 _mm_unpackhi_epi8(_mm_setzero_si128(), fVec)); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 145 SkPx addAlpha(const Alpha& a) const { | 146 SkPx addAlpha(const Alpha& a) const { |
| 146 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000
0))); | 147 return _mm_add_epi8(fVec, _mm_and_si128(a.fVec, _mm_set1_epi32(0xFF00000
0))); |
| 147 } | 148 } |
| 148 }; | 149 }; |
| 149 | 150 |
| 150 } // namespace sse | 151 } // namespace sse |
| 151 | 152 |
| 152 typedef sse::SkPx SkPx; | 153 typedef sse::SkPx SkPx; |
| 153 | 154 |
| 154 #endif//SkPx_sse_DEFINED | 155 #endif//SkPx_sse_DEFINED |
| OLD | NEW |