src/opts/SkPMFloat_neon.h - Issue 1201343004: Convert SkPMFloat to [0,1] range and prune its API.

Unified Diff: src/opts/SkPMFloat_neon.h

Issue 1201343004: Convert SkPMFloat to [0,1] range and prune its API. (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: tweaks Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/SkPMFloat_neon.h

diff --git a/src/opts/SkPMFloat_neon.h b/src/opts/SkPMFloat_neon.h

index cabb29a2a80ee8b399878a21e26aa723f0c90e31..57f613751d1e657264325be1ad620f9d5b911c34 100644

--- a/src/opts/SkPMFloat_neon.h

+++ b/src/opts/SkPMFloat_neon.h

@@ -7,70 +7,24 @@

namespace { // See SkPMFloat.h

-// For SkPMFloat(SkPMFColor), we widen our 8 bit components (fix8) to 8-bit components in 16 bits

-// (fix8_16), then widen those to 8-bit-in-32-bits (fix8_32), and finally convert those to floats.

-// round() and roundClamp() do the opposite, working from floats to 8-bit-in-32-bit,

-// to 8-bit-in-16-bit, back down to 8-bit components.

-// roundClamp() uses vqmovn to clamp while narrowing instead of just narrowing with vmovn.

inline SkPMFloat::SkPMFloat(SkPMColor c) {

SkPMColorAssert(c);

uint8x8_t fix8 = (uint8x8_t)vdup_n_u32(c);

uint16x8_t fix8_16 = vmovl_u8(fix8);

uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));

- fVec = vcvtq_f32_u32(fix8_32);

+ fVec = vcvtq_n_f32_u32(fix8_32, 8);

SkASSERT(this->isValid());

}

-inline SkPMColor SkPMFloat::trunc() const {

- uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); // vcvtq_u32_f32 truncates

- uint16x4_t fix8_16 = vmovn_u32(fix8_32);

- uint8x8_t fix8 = vmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));

- SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);

- SkPMColorAssert(c);

- return c;

inline SkPMColor SkPMFloat::round() const {

- return SkPMFloat(Sk4f(0.5f) + *this).trunc();

-inline SkPMColor SkPMFloat::roundClamp() const {

- float32x4_t add_half = vaddq_f32(fVec, vdupq_n_f32(0.5f));

- uint32x4_t fix8_32 = vcvtq_u32_f32(add_half); // vcvtq_u32_f32 truncates, so round manually

- uint16x4_t fix8_16 = vqmovn_u32(fix8_32);

- uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));

+ // vcvtq_n_u32_f32 truncates, so we round manually by adding a half before converting.

+ float32x4_t rounded = vaddq_f32(fVec, vdupq_n_f32(0.5f/255));

+ uint32x4_t fix8_32 = vcvtq_n_u32_f32(rounded, 8);

+ uint16x4_t fix8_16 = vqmovn_u32(fix8_32);

+ uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));

SkPMColor c = vget_lane_u32((uint32x2_t)fix8, 0);

SkPMColorAssert(c);

return c;

}

-// TODO: we should be able to beat these loops on all three methods.

-inline void SkPMFloat::From4PMColors(const SkPMColor colors[4],

- SkPMFloat* a, SkPMFloat* b, SkPMFloat* c, SkPMFloat* d) {

- *a = FromPMColor(colors[0]);

- *b = FromPMColor(colors[1]);

- *c = FromPMColor(colors[2]);

- *d = FromPMColor(colors[3]);

-inline void SkPMFloat::RoundTo4PMColors(

- const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,

- SkPMColor colors[4]) {

- colors[0] = a.round();

- colors[1] = b.round();

- colors[2] = c.round();

- colors[3] = d.round();

-inline void SkPMFloat::RoundClampTo4PMColors(

- const SkPMFloat& a, const SkPMFloat& b, const SkPMFloat&c, const SkPMFloat& d,

- SkPMColor colors[4]) {

- colors[0] = a.roundClamp();

- colors[1] = b.roundClamp();

- colors[2] = c.roundClamp();

- colors[3] = d.roundClamp();

} // namespace

« no previous file with comments | « src/opts/SkPMFloat_SSSE3.h ('k') | src/opts/SkPMFloat_none.h » ('j') | no next file with comments »