OLD | NEW |
1 #include "SkColorPriv.h" | 1 #include "SkColorPriv.h" |
2 #include <tmmintrin.h> | 2 #include <tmmintrin.h> |
3 | 3 |
4 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo
nents in 32 bits | 4 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo
nents in 32 bits |
5 // (fix8_32), then convert those to floats. | 5 // (fix8_32), then convert those to floats. |
6 | 6 |
7 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t
o packed 8 bit. | 7 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t
o packed 8 bit. |
8 | 8 |
9 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi
t, with | 9 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi
t, with |
10 // _mm_packus_epi16() both clamping and narrowing. | 10 // _mm_packus_epi16() both clamping and narrowing. |
(...skipping 18 matching lines...) Expand all Loading... |
29 } | 29 } |
30 | 30 |
31 inline SkPMColor SkPMFloat::clamped() const { | 31 inline SkPMColor SkPMFloat::clamped() const { |
32 __m128i fix8_32 = _mm_cvtps_epi32(_mm_load_ps(fColor)), // _mm_cvtps_epi32
rounds for us! | 32 __m128i fix8_32 = _mm_cvtps_epi32(_mm_load_ps(fColor)), // _mm_cvtps_epi32
rounds for us! |
33 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), | 33 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
34 fix8 = _mm_packus_epi16(fix8_16, fix8_16); | 34 fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
35 SkPMColor c = _mm_cvtsi128_si32(fix8); | 35 SkPMColor c = _mm_cvtsi128_si32(fix8); |
36 SkPMColorAssert(c); | 36 SkPMColorAssert(c); |
37 return c; | 37 return c; |
38 } | 38 } |
| 39 |
| 40 inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors
[4]) { |
| 41 // Haven't beaten this yet. |
| 42 for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); } |
| 43 } |
| 44 |
| 45 inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4
]) { |
| 46 // Haven't beaten this yet. Still faster than ClampTo4PMColors too. |
| 47 for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); } |
| 48 } |
| 49 |
| 50 inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat flo
ats[4]) { |
| 51 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses
8. |
| 52 __m128i c0 = _mm_cvtps_epi32(_mm_load_ps(floats[0].fColor)), // _mm_cvtps_e
pi32 rounds for us! |
| 53 c1 = _mm_cvtps_epi32(_mm_load_ps(floats[1].fColor)), |
| 54 c2 = _mm_cvtps_epi32(_mm_load_ps(floats[2].fColor)), |
| 55 c3 = _mm_cvtps_epi32(_mm_load_ps(floats[3].fColor)); |
| 56 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), |
| 57 _mm_packus_epi16(c2, c3)); |
| 58 _mm_storeu_si128((__m128i*)colors, c3210); |
| 59 SkPMColorAssert(colors[0]); |
| 60 SkPMColorAssert(colors[1]); |
| 61 SkPMColorAssert(colors[2]); |
| 62 SkPMColorAssert(colors[3]); |
| 63 } |
OLD | NEW |