Index: src/opts/SkPMFloat_neon.h |
diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h |
index 57f613751d1e657264325be1ad620f9d5b911c34..c8976860b1805ed29ad3b0592c3c123c916ee6a2 100644 |
--- a/src/opts/SkPMFloat_neon.h |
+++ b/src/opts/SkPMFloat_neon.h |
@@ -12,14 +12,14 @@ inline SkPMFloat::SkPMFloat(SkPMColor c) { |
uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c); |
uint16x8_t fix8_16 = vmovl_u8(fix8); |
uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); |
- fVec = vcvtq_n_f32_u32(fix8_32, 8); |
+ fVec = vmulq_f32(vcvtq_f32_u32(fix8_32), vdupq_n_f32(1.0f/255)); |
SkASSERT(this->isValid()); |
} |
inline SkPMColor SkPMFloat::round() const { |
- // vcvtq_n_u32_f32 truncates, so we round manually by adding a half before converting. |
- float32x4_t rounded = vaddq_f32(fVec, vdupq_n_f32(0.5f/255)); |
- uint32x4_t fix8_32 = vcvtq_n_u32_f32(rounded, 8); |
+ // vcvt_u32_f32 truncates, so we round manually by adding a half before converting. |
+ float32x4_t rounded = vmlaq_f32(vdupq_n_f32(0.5f), fVec, vdupq_n_f32(255)); |
+ uint32x4_t fix8_32 = vcvtq_u32_f32(rounded); |
uint16x4_t fix8_16 = vqmovn_u32(fix8_32); |
uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); |
SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0); |