Index: src/opts/SkPMFloat_neon.h |
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h |
index cabb29a2a80ee8b399878a21e26aa723f0c90e31..57f613751d1e657264325be1ad620f9d5b911c34 100644 |
--- a/src/opts/SkPMFloat_neon.h |
+++ b/src/opts/SkPMFloat_neon.h |
@@ -7,70 +7,24 @@ |
namespace { // See SkPMFloat.h |
-// For SkPMFloat(SkPMFColor), we widen our 8 bit components (fix8) to 8-bit components in 16 bits |
-// (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally convert those to floats. |
- |
-// round() and roundClamp() do the opposite, working from floats to 8-bit-in-32-bit, |
-// to 8-bit-in-16-bit, back down to 8-bit components. |
-// roundClamp() uses vqmovn to clamp while narrowing instead of just narrowing with vmovn. |
- |
inline SkPMFloat::SkPMFloat(SkPMColor c) { |
SkPMColorAssert(c); |
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c); |
uint16x8_t fix8_16 = vmovl_u8(fix8); |
uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); |
- fVec = vcvtq_f32_u32(fix8_32); |
+ fVec = vcvtq_n_f32_u32(fix8_32, 8); |
SkASSERT(this->isValid()); |
} |
-inline SkPMColor SkPMFloat::trunc() const { |
- uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); // vcvtq_u32_f32 truncates |
- uint16x4_t fix8_16 = vmovn_u32(fix8_32); |
- uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
- SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); |
- SkPMColorAssert(c); |
- return c; |
-} |
- |
inline SkPMColor SkPMFloat::round() const { |
- return SkPMFloat(Sk4f(0.5f) + *this).trunc(); |
-} |
- |
-inline SkPMColor SkPMFloat::roundClamp() const { |
- float32x4_t add_half = vaddq_f32(fVec, vdupq_n_f32(0.5f)); |
- uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually |
- uint16x4_t fix8_16 = vqmovn_u32(fix8_32); |
- uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
+ // vcvtq_n_u32_f32 truncates, so we round manually by adding a half before converting. |
+ float32x4_t rounded = vaddq_f32(fVec, vdupq_n_f32(0.5f/255)); |
+ uint32x4_t fix8_32 = vcvtq_n_u32_f32(rounded, 8); |
+ uint16x4_t fix8_16 = vqmovn_u32(fix8_32); |
+ uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); |
SkPMColorAssert(c); |
return c; |
} |
-// TODO: we should be able to beat these loops on all three methods. |
-inline void SkPMFloat::From4PMColors(const SkPMColor colors[4], |
- SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) { |
- *a = FromPMColor(colors[0]); |
- *b = FromPMColor(colors[1]); |
- *c = FromPMColor(colors[2]); |
- *d = FromPMColor(colors[3]); |
-} |
- |
-inline void SkPMFloat::RoundTo4PMColors( |
- const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, |
- SkPMColor colors[4]) { |
- colors[0] = a.round(); |
- colors[1] = b.round(); |
- colors[2] = c.round(); |
- colors[3] = d.round(); |
-} |
- |
-inline void SkPMFloat::RoundClampTo4PMColors( |
- const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d, |
- SkPMColor colors[4]) { |
- colors[0] = a.roundClamp(); |
- colors[1] = b.roundClamp(); |
- colors[2] = c.roundClamp(); |
- colors[3] = d.roundClamp(); |
-} |
- |
} // namespace |