OLD | NEW |
1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
2 #include <emmintrin.h> | 2 #include <emmintrin.h> |
3 | 3 |
4 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo
nents in 16 bits | 4 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo
nents in 16 bits |
5 // (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally conver
t those to floats. | 5 // (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally conver
t those to floats. |
6 | 6 |
7 // get() and clamped() do the opposite, working from floats to 8-bit-in-32-bit, | 7 // get() and clamped() do the opposite, working from floats to 8-bit-in-32-bit, |
8 // to 8-bit-in-16-bit, back down to 8-bit components. | 8 // to 8-bit-in-16-bit, back down to 8-bit components. |
9 // _mm_packus_epi16() gives us clamping for free while narrowing. | 9 // _mm_packus_epi16() gives us clamping for free while narrowing. |
10 | 10 |
11 inline SkPMFloat::SkPMFloat(SkPMColor c) { | 11 inline SkPMFloat::SkPMFloat(SkPMColor c) { |
12 SkPMColorAssert(c); | 12 SkPMColorAssert(c); |
13 __m128i fix8 = _mm_set_epi32(0,0,0,c), | 13 __m128i fix8 = _mm_set_epi32(0,0,0,c), |
14 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), | 14 fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), |
15 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); | 15 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); |
16 _mm_store_ps(fColor, _mm_cvtepi32_ps(fix8_32)); | 16 _mm_store_ps(fColor, _mm_cvtepi32_ps(fix8_32)); |
17 SkASSERT(this->isValid()); | 17 SkASSERT(this->isValid()); |
18 } | 18 } |
19 | 19 |
20 inline SkPMColor SkPMFloat::get() const { | 20 inline SkPMColor SkPMFloat::get() const { |
21 SkASSERT(this->isValid()); | 21 SkASSERT(this->isValid()); |
22 return this->clamped(); // At the moment, we don't know anything faster. | 22 return this->clamped(); // Haven't beaten this yet. |
23 } | 23 } |
24 | 24 |
25 inline SkPMColor SkPMFloat::clamped() const { | 25 inline SkPMColor SkPMFloat::clamped() const { |
26 __m128i fix8_32 = _mm_cvtps_epi32(_mm_load_ps(fColor)), // _mm_cvtps_epi32
rounds for us! | 26 __m128i fix8_32 = _mm_cvtps_epi32(_mm_load_ps(fColor)), // _mm_cvtps_epi32
rounds for us! |
27 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), | 27 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
28 fix8 = _mm_packus_epi16(fix8_16, fix8_16); | 28 fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
29 SkPMColor c = _mm_cvtsi128_si32(fix8); | 29 SkPMColor c = _mm_cvtsi128_si32(fix8); |
30 SkPMColorAssert(c); | 30 SkPMColorAssert(c); |
31 return c; | 31 return c; |
32 } | 32 } |
| 33 |
| 34 inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors
[4]) { |
| 35 // Haven't beaten this yet. |
| 36 for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); } |
| 37 } |
| 38 |
| 39 inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4
]) { |
| 40 SkASSERT(floats[0].isValid() && floats[1].isValid() |
| 41 && floats[2].isValid() && floats[3].isValid()); |
| 42 // Haven't beaten this yet. |
| 43 ClampTo4PMColors(colors, floats); |
| 44 } |
| 45 |
| 46 inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat flo
ats[4]) { |
| 47 // Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop use
s 8. |
| 48 __m128i c0 = _mm_cvtps_epi32(_mm_load_ps(floats[0].fColor)), // _mm_cvtps_e
pi32 rounds for us! |
| 49 c1 = _mm_cvtps_epi32(_mm_load_ps(floats[1].fColor)), |
| 50 c2 = _mm_cvtps_epi32(_mm_load_ps(floats[2].fColor)), |
| 51 c3 = _mm_cvtps_epi32(_mm_load_ps(floats[3].fColor)); |
| 52 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), |
| 53 _mm_packus_epi16(c2, c3)); |
| 54 _mm_storeu_si128((__m128i*)colors, c3210); |
| 55 SkPMColorAssert(colors[0]); |
| 56 SkPMColorAssert(colors[1]); |
| 57 SkPMColorAssert(colors[2]); |
| 58 SkPMColorAssert(colors[3]); |
| 59 } |
OLD | NEW |