src/opts/SkNx_neon.h - Issue 1683543002: sknx refactoring

Unified Diff: src/opts/SkNx_neon.h

Issue 1683543002: sknx refactoring (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: typos Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/opts/SkNx_neon.h

diff --git a/src/opts/SkNx_neon.h b/src/opts/SkNx_neon.h

index a4b7cd1a731045380619052d9e5b945f3779fdd1..2cb8eb348d448e1aa9af4685c9739fb9123f46e7 100644

--- a/src/opts/SkNx_neon.h

+++ b/src/opts/SkNx_neon.h

@@ -10,8 +10,6 @@

#define SKNX_IS_FAST

-namespace { // See SkNx.h

// Well, this is absurd. The shifts require compile-time constant arguments.

#define SHIFT8(op, v, bits) switch(bits) { \

@@ -98,10 +96,12 @@ public:

#endif

}

- template <int k> float kth() const {

+ float operator[](int k) const {

SkASSERT(0 <= k && k < 2);

- return vget_lane_f32(fVec, k&1);

+ union { float32x2_t v; float fs[2]; } pun = {fVec};

+ return pun.fs[k&1];

}

+ template <int k> float kth() const { return (*this)[k]; }

bool allTrue() const {

auto v = vreinterpret_u32_f32(fVec);

@@ -116,33 +116,6 @@ public:

};

template <>

-class SkNx<4, int> {

-public:

- SkNx(const int32x4_t& vec) : fVec(vec) {}

- SkNx() {}

- SkNx(int val) : fVec(vdupq_n_s32(val)) {}

- static SkNx Load(const void* ptr) { return vld1q_s32((const int*)ptr); }

- SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }

- void store(void* ptr) const { vst1q_s32((int*)ptr, fVec); }

- SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }

- SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }

- SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }

- SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }

- SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }

- template <int k> int kth() const {

- SkASSERT(0 <= k && k < 4);

- return vgetq_lane_s32(fVec, k&3);

- }

- int32x4_t fVec;

-};

-template <>

class SkNx<4, float> {

public:

SkNx(float32x4_t vec) : fVec(vec) {}

@@ -207,10 +180,12 @@ public:

#endif

}

- template <int k> float kth() const {

+ float operator[](int k) const {

SkASSERT(0 <= k && k < 4);

- return vgetq_lane_f32(fVec, k&3);

+ union { float32x4_t v; float fs[4]; } pun = {fVec};

+ return pun.fs[k&3];

}

+ template <int k> float kth() const { return (*this)[k]; }

bool allTrue() const {

auto v = vreinterpretq_u32_f32(fVec);

@@ -257,10 +232,12 @@ public:

static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fVec); }

- template <int k> uint16_t kth() const {

+ uint16_t operator[](int k) const {

SkASSERT(0 <= k && k < 4);

- return vget_lane_u16(fVec, k&3);

+ union { uint16x4_t v; uint16_t us[4]; } pun = {fVec};

+ return pun.us[k&3];

}

+ template <int k> uint16_t kth() const { return (*this)[k]; }

SkNx thenElse(const SkNx& t, const SkNx& e) const {

return vbsl_u16(fVec, t.fVec, e.fVec);

@@ -294,10 +271,12 @@ public:

static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.fVec); }

- template <int k> uint16_t kth() const {

+ uint16_t operator[](int k) const {

SkASSERT(0 <= k && k < 8);

- return vgetq_lane_u16(fVec, k&7);

+ union { uint16x8_t v; uint16_t us[8]; } pun = {fVec};

+ return pun.us[k&7];

}

+ template <int k> uint16_t kth() const { return (*this)[k]; }

SkNx thenElse(const SkNx& t, const SkNx& e) const {

return vbslq_u16(fVec, t.fVec, e.fVec);

@@ -350,10 +329,12 @@ public:

static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fVec); }

SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); }

- template <int k> uint8_t kth() const {

- SkASSERT(0 <= k && k < 15);

- return vgetq_lane_u8(fVec, k&16);

+ uint8_t operator[](int k) const {

+ SkASSERT(0 <= k && k < 16);

+ union { uint8x16_t v; uint8_t us[16]; } pun = {fVec};

+ return pun.us[k&15];

}

+ template <int k> uint8_t kth() const { return (*this)[k]; }

SkNx thenElse(const SkNx& t, const SkNx& e) const {

return vbslq_u8(fVec, t.fVec, e.fVec);

@@ -366,17 +347,13 @@ public:

#undef SHIFT16

#undef SHIFT8

-template<> inline Sk4i SkNx_cast<int, float, 4>(const Sk4f& src) {

- return vcvtq_s32_f32(src.fVec);

-template<> inline Sk4b SkNx_cast<uint8_t, float, 4>(const Sk4f& src) {

+template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) {

uint32x4_t _32 = vcvtq_u32_f32(src.fVec);

uint16x4_t _16 = vqmovn_u32(_32);

return vqmovn_u16(vcombine_u16(_16, _16));

}

-template<> inline Sk4f SkNx_cast<float, uint8_t, 4>(const Sk4b& src) {

+template<> inline Sk4f SkNx_cast<float, uint8_t>(const Sk4b& src) {

uint16x8_t _16 = vmovl_u8 (src.fVec) ;

uint32x4_t _32 = vmovl_u16(vget_low_u16(_16));

return vcvtq_f32_u32(_32);

@@ -390,14 +367,12 @@ static inline void Sk4f_ToBytes(uint8_t bytes[16],

(uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0]).val[0]);

}

-template<> inline Sk4h SkNx_cast<uint16_t, uint8_t, 4>(const Sk4b& src) {

+template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {

return vget_low_u16(vmovl_u8(src.fVec));

}

-template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) {

+template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {

return vmovn_u16(vcombine_u16(src.fVec, src.fVec));

}

-} // namespace

#endif//SkNx_neon_DEFINED

« no previous file with comments | « src/core/SkNx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »