Index: src/opts/SkPMFloat_SSE2.h |
diff --git a/src/opts/SkPMFloat_SSE2.h b/src/opts/SkPMFloat_SSE2.h |
index e14f94fe135682cffdf8d1a3f28cab6bf6aeb4a6..7298b4da3c6b67ddd9611b73ef3ef0ab91bc3a92 100644 |
--- a/src/opts/SkPMFloat_SSE2.h |
+++ b/src/opts/SkPMFloat_SSE2.h |
@@ -19,7 +19,7 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { |
inline SkPMColor SkPMFloat::get() const { |
SkASSERT(this->isValid()); |
- return this->clamped(); // At the moment, we don't know anything faster. |
+ return this->clamped(); // Haven't beaten this yet. |
} |
inline SkPMColor SkPMFloat::clamped() const { |
@@ -30,3 +30,30 @@ inline SkPMColor SkPMFloat::clamped() const { |
SkPMColorAssert(c); |
return c; |
} |
+ |
+inline void SkPMFloat::From4PMColors(SkPMFloat floats[4], const SkPMColor colors[4]) { |
+ // Haven't beaten this yet. |
+ for (int i = 0; i < 4; i++) { floats[i] = FromPMColor(colors[i]); } |
+} |
+ |
+inline void SkPMFloat::To4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) { |
+ SkASSERT(floats[0].isValid() && floats[1].isValid() |
+ && floats[2].isValid() && floats[3].isValid()); |
+ // Haven't beaten this yet. |
+ ClampTo4PMColors(colors, floats); |
+} |
+ |
+inline void SkPMFloat::ClampTo4PMColors(SkPMColor colors[4], const SkPMFloat floats[4]) { |
+ // Same as _SSSE3.h's. We use 3 _mm_packus_epi16() where the naive loop uses 8. |
+ __m128i c0 = _mm_cvtps_epi32(_mm_load_ps(floats[0].fColor)), // _mm_cvtps_epi32 rounds for us! |
+ c1 = _mm_cvtps_epi32(_mm_load_ps(floats[1].fColor)), |
+ c2 = _mm_cvtps_epi32(_mm_load_ps(floats[2].fColor)), |
+ c3 = _mm_cvtps_epi32(_mm_load_ps(floats[3].fColor)); |
+ __m128i c3210 = _mm_packus_epi16(_mm_packus_epi16(c0, c1), |
+ _mm_packus_epi16(c2, c3)); |
+ _mm_storeu_si128((__m128i*)colors, c3210); |
+ SkPMColorAssert(colors[0]); |
+ SkPMColorAssert(colors[1]); |
+ SkPMColorAssert(colors[2]); |
+ SkPMColorAssert(colors[3]); |
+} |