| Index: src/opts/SkPMFloat_neon.h
|
| diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h
|
| index cabb29a2a80ee8b399878a21e26aa723f0c90e31..57f613751d1e657264325be1ad620f9d5b911c34 100644
|
| --- a/src/opts/SkPMFloat_neon.h
|
| +++ b/src/opts/SkPMFloat_neon.h
|
| @@ -7,70 +7,24 @@
|
|
|
| namespace { // See SkPMFloat.h
|
|
|
| -// For SkPMFloat(SkPMFColor), we widen our 8 bit components (fix8) to 8-bit components in 16 bits
|
| -// (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally convert those to floats.
|
| -
|
| -// round() and roundClamp() do the opposite, working from floats to 8-bit-in-32-bit,
|
| -// to 8-bit-in-16-bit, back down to 8-bit components.
|
| -// roundClamp() uses vqmovn to clamp while narrowing instead of just narrowing with vmovn.
|
| -
|
| inline SkPMFloat::SkPMFloat(SkPMColor c) {
|
| SkPMColorAssert(c);
|
| uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);
|
| uint16x8_t fix8_16 = vmovl_u8(fix8);
|
| uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
|
| - fVec = vcvtq_f32_u32(fix8_32);
|
| + fVec = vcvtq_n_f32_u32(fix8_32, 8);
|
| SkASSERT(this->isValid());
|
| }
|
|
|
| -inline SkPMColor SkPMFloat::trunc() const {
|
| - uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); // vcvtq_u32_f32 truncates
|
| - uint16x4_t fix8_16 = vmovn_u32(fix8_32);
|
| - uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
|
| - SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
|
| - SkPMColorAssert(c);
|
| - return c;
|
| -}
|
| -
|
| inline SkPMColor SkPMFloat::round() const {
|
| - return SkPMFloat(Sk4f(0.5f) + *this).trunc();
|
| -}
|
| -
|
| -inline SkPMColor SkPMFloat::roundClamp() const {
|
| - float32x4_t add_half = vaddq_f32(fVec, vdupq_n_f32(0.5f));
|
| - uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually
|
| - uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
|
| - uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
|
| + // vcvtq_n_u32_f32 truncates, so we round manually by adding a half before converting.
|
| + float32x4_t rounded = vaddq_f32(fVec, vdupq_n_f32(0.5f/255));
|
| + uint32x4_t fix8_32 = vcvtq_n_u32_f32(rounded, 8);
|
| + uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
|
| + uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
|
| SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);
|
| SkPMColorAssert(c);
|
| return c;
|
| }
|
|
|
| -// TODO: we should be able to beat these loops on all three methods.
|
| -inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],
|
| - SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {
|
| - *a = FromPMColor(colors[0]);
|
| - *b = FromPMColor(colors[1]);
|
| - *c = FromPMColor(colors[2]);
|
| - *d = FromPMColor(colors[3]);
|
| -}
|
| -
|
| -inline void SkPMFloat::RoundTo4PMColors(
|
| - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
|
| - SkPMColor colors[4]) {
|
| - colors[0] = a.round();
|
| - colors[1] = b.round();
|
| - colors[2] = c.round();
|
| - colors[3] = d.round();
|
| -}
|
| -
|
| -inline void SkPMFloat::RoundClampTo4PMColors(
|
| - const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,
|
| - SkPMColor colors[4]) {
|
| - colors[0] = a.roundClamp();
|
| - colors[1] = b.roundClamp();
|
| - colors[2] = c.roundClamp();
|
| - colors[3] = d.roundClamp();
|
| -}
|
| -
|
| } // namespace
|
|
|