OLD | NEW |
1 #include "SkColorPriv.h" | 1 inline SkPMFloat& SkPMFloat::operator=(const SkPMFloat& that) { |
2 #include <tmmintrin.h> | 2 fColors = that.fColors; |
| 3 return *this; |
| 4 } |
3 | 5 |
4 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo
nents in 32 bits | 6 // For SkPMFloat(SkPMColor), we widen our 8 bit components (fix8) to 8-bit compo
nents in 32 bits |
5 // (fix8_32), then convert those to floats. | 7 // (fix8_32), then convert those to floats. |
6 | 8 |
7 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t
o packed 8 bit. | 9 // get() does the opposite, working from floats to 8-bit-in-32-bits, then back t
o packed 8 bit. |
8 | 10 |
9 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi
t, with | 11 // clamped() is the same as _SSE2: floats to 8-in-32, to 8-in-16, to packed 8 bi
t, with |
10 // _mm_packus_epi16() both clamping and narrowing. | 12 // _mm_packus_epi16() both clamping and narrowing. |
11 | 13 |
12 inline SkPMFloat::SkPMFloat(SkPMColor c) { | 14 inline SkPMFloat::SkPMFloat(SkPMColor c) { |
13 SkPMColorAssert(c); | 15 SkPMColorAssert(c); |
14 const int _ = 255; // _ means to zero that byte. | 16 const int _ = 255; // _ means to zero that byte. |
15 __m128i fix8 = _mm_set_epi32(0,0,0,c), | 17 __m128i fix8 = _mm_set_epi32(0,0,0,c), |
16 fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_,
_,1, _,_,_,0)); | 18 fix8_32 = _mm_shuffle_epi8(fix8, _mm_set_epi8(_,_,_,3, _,_,_,2, _,_,
_,1, _,_,_,0)); |
17 _mm_store_ps(fColor, _mm_cvtepi32_ps(fix8_32)); | 19 fColors = _mm_cvtepi32_ps(fix8_32); |
18 SkASSERT(this->isValid()); | 20 SkASSERT(this->isValid()); |
19 } | 21 } |
20 | 22 |
21 inline SkPMColor SkPMFloat::get() const { | 23 inline SkPMColor SkPMFloat::get() const { |
22 SkASSERT(this->isValid()); | 24 SkASSERT(this->isValid()); |
23 const int _ = 255; // _ means to zero that byte. | 25 const int _ = 255; // _ means to zero that byte. |
24 __m128i fix8_32 = _mm_cvtps_epi32(_mm_load_ps(fColor)), // _mm_cvtps_epi32
rounds for us! | 26 __m128i fix8_32 = _mm_cvtps_epi32(fColors), // _mm_cvtps_epi32 rounds for u
s! |
25 fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _
,_,_,_, 12,8,4,0)); | 27 fix8 = _mm_shuffle_epi8(fix8_32, _mm_set_epi8(_,_,_,_, _,_,_,_, _
,_,_,_, 12,8,4,0)); |
26 SkPMColor c = _mm_cvtsi128_si32(fix8); | 28 SkPMColor c = _mm_cvtsi128_si32(fix8); |
27 SkPMColorAssert(c); | 29 SkPMColorAssert(c); |
28 return c; | 30 return c; |
29 } | 31 } |
30 | 32 |
31 inline SkPMColor SkPMFloat::clamped() const { | 33 inline SkPMColor SkPMFloat::clamped() const { |
32 __m128i fix8_32 = _mm_cvtps_epi32(_mm_load_ps(fColor)), // _mm_cvtps_epi32
rounds for us! | 34 __m128i fix8_32 = _mm_cvtps_epi32(fColors), // _mm_cvtps_epi32 rounds for u
s! |
33 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), | 35 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
34 fix8 = _mm_packus_epi16(fix8_16, fix8_16); | 36 fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
35 SkPMColor c = _mm_cvtsi128_si32(fix8); | 37 SkPMColor c = _mm_cvtsi128_si32(fix8); |
36 SkPMColorAssert(c); | 38 SkPMColorAssert(c); |
37 return c; | 39 return c; |
38 } | 40 } |
39 | 41 |
40 inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors
[4]) { | 42 inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors
[4]) { |
41 // Haven't beaten this yet. | 43 // Haven't beaten this yet. |
42 for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); } | 44 for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); } |
43 } | 45 } |
44 | 46 |
45 inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4
]) { | 47 inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4
]) { |
46 // Haven't beaten this yet. Still faster than ClampTo4PMColors too. | 48 // Haven't beaten this yet. Still faster than ClampTo4PMColors too. |
47 for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); } | 49 for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); } |
48 } | 50 } |
49 | 51 |
50 inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat flo
ats[4]) { | 52 inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat flo
ats[4]) { |
51 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses
8. | 53 // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses
8. |
52 __m128i c0 = _mm_cvtps_epi32(_mm_load_ps(floats[0].fColor)), // _mm_cvtps_e
pi32 rounds for us! | 54 __m128i c0 = _mm_cvtps_epi32(floats[0].fColors), // _mm_cvtps_epi32 rounds
for us! |
53 c1 = _mm_cvtps_epi32(_mm_load_ps(floats[1].fColor)), | 55 c1 = _mm_cvtps_epi32(floats[1].fColors), |
54 c2 = _mm_cvtps_epi32(_mm_load_ps(floats[2].fColor)), | 56 c2 = _mm_cvtps_epi32(floats[2].fColors), |
55 c3 = _mm_cvtps_epi32(_mm_load_ps(floats[3].fColor)); | 57 c3 = _mm_cvtps_epi32(floats[3].fColors); |
56 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), | 58 __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), |
57 _mm_packus_epi16(c2, c3)); | 59 _mm_packus_epi16(c2, c3)); |
58 _mm_storeu_si128((__m128i*)colors, c3210); | 60 _mm_storeu_si128((__m128i*)colors, c3210); |
59 SkPMColorAssert(colors[0]); | 61 SkPMColorAssert(colors[0]); |
60 SkPMColorAssert(colors[1]); | 62 SkPMColorAssert(colors[1]); |
61 SkPMColorAssert(colors[2]); | 63 SkPMColorAssert(colors[2]); |
62 SkPMColorAssert(colors[3]); | 64 SkPMColorAssert(colors[3]); |
63 } | 65 } |
OLD | NEW |