| Index: src/opts/SkPMFloat_SSSE3.h
|
| diff --git a/src/opts/SkPMFloat_SSSE3.h b/src/opts/SkPMFloat_SSSE3.h
|
| index c4fba400dc36863ce4ada1dba377082cce6b8a90..ff296178cc781fe4b0c031b37c0cddf29c5b2b44 100644
|
| --- a/src/opts/SkPMFloat_SSSE3.h
|
| +++ b/src/opts/SkPMFloat_SSSE3.h
|
| @@ -36,3 +36,28 @@ inline SkPMColor SkPMFloat::clamped() const {
|
| SkPMColorAssert(c);
|
| return c;
|
| }
|
| +
|
| +inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors[4]) {
|
| + // Haven't beaten this yet.
|
| + for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); }
|
| +}
|
| +
|
| +inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
|
| + // Haven't beaten this yet. Still faster than ClampTo4PMColors too.
|
| + for (int i = 0; i < 4; i++) { colors[i] = floats[i].get(); }
|
| +}
|
| +
|
| +inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) {
|
| + // Same as _SSE2.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8.
|
| + __m128i c0 = _mm_cvtps_epi32(_mm_load_ps(floats[0].fColor)), // _mm_cvtps_epi32 rounds for us!
|
| + c1 = _mm_cvtps_epi32(_mm_load_ps(floats[1].fColor)),
|
| + c2 = _mm_cvtps_epi32(_mm_load_ps(floats[2].fColor)),
|
| + c3 = _mm_cvtps_epi32(_mm_load_ps(floats[3].fColor));
|
| + __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1),
|
| + _mm_packus_epi16(c2, c3));
|
| + _mm_storeu_si128((__m128i*)colors, c3210);
|
| + SkPMColorAssert(colors[0]);
|
| + SkPMColorAssert(colors[1]);
|
| + SkPMColorAssert(colors[2]);
|
| + SkPMColorAssert(colors[3]);
|
| +}
|
|
|