| Index: src/opts/SkNx_sse.h
|
| diff --git a/src/opts/SkNx_sse.h b/src/opts/SkNx_sse.h
|
| index a17d988ee7f29e4b2c6460ad8e4f3a96b151f257..71ecbfd89d9664ae6a4bded0cc44dac4dcd18618 100644
|
| --- a/src/opts/SkNx_sse.h
|
| +++ b/src/opts/SkNx_sse.h
|
| @@ -22,12 +22,12 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(float val) : fVec(_mm_set1_ps(val)) {}
|
| - static SkNx Load(const float vals[2]) {
|
| - return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
|
| + static SkNx Load(const void* ptr) {
|
| + return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)ptr));
|
| }
|
| SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
|
|
|
| - void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
|
| + void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); }
|
|
|
| SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
|
| SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
|
| @@ -71,10 +71,10 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(double val) : fVec(_mm_set1_pd(val)) {}
|
| - static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadu_pd((const double*)ptr); }
|
| SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {}
|
|
|
| - void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); }
|
| + void store(void* ptr) const { _mm_storeu_pd((double*)ptr, fVec); }
|
|
|
| SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); }
|
| SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); }
|
| @@ -117,10 +117,10 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(int val) : fVec(_mm_set1_epi32(val)) {}
|
| - static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*)vals); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
|
| SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
|
|
|
| - void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
|
| + void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
|
|
|
| SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); }
|
| SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
|
| @@ -155,11 +155,11 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(float val) : fVec( _mm_set1_ps(val) ) {}
|
| - static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr); }
|
|
|
| SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
|
|
|
| - void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
|
| + void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); }
|
|
|
| SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
|
| SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
|
| @@ -210,10 +210,10 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
|
| - static SkNx Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m128i*)vals); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
|
| SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a,b,c,d,0,0,0,0)) {}
|
|
|
| - void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
|
| + void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
|
|
|
| SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
|
| SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
|
| @@ -237,11 +237,11 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
|
| - static SkNx Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m128i*)vals); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
|
| SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
|
| uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a,b,c,d,e,f,g,h)) {}
|
|
|
| - void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
|
| + void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
|
|
|
| SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
|
| SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
|
| @@ -278,8 +278,8 @@ public:
|
| SkNx(const __m128i& vec) : fVec(vec) {}
|
|
|
| SkNx() {}
|
| - static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128(*(const int*)vals); }
|
| - void store(uint8_t vals[4]) const { *(int*)vals = _mm_cvtsi128_si32(fVec); }
|
| + static SkNx Load(const void* ptr) { return _mm_cvtsi32_si128(*(const int*)ptr); }
|
| + void store(void* ptr) const { *(int*)ptr = _mm_cvtsi128_si32(fVec); }
|
|
|
| // TODO as needed
|
|
|
| @@ -292,8 +292,8 @@ public:
|
| SkNx(const __m128i& vec) : fVec(vec) {}
|
|
|
| SkNx() {}
|
| - static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m128i*)vals); }
|
| - void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)ptr); }
|
| + void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); }
|
|
|
| // TODO as needed
|
|
|
| @@ -307,14 +307,14 @@ public:
|
|
|
| SkNx() {}
|
| SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
|
| - static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m128i*)vals); }
|
| + static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)ptr); }
|
| SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
|
| uint8_t e, uint8_t f, uint8_t g, uint8_t h,
|
| uint8_t i, uint8_t j, uint8_t k, uint8_t l,
|
| uint8_t m, uint8_t n, uint8_t o, uint8_t p)
|
| : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {}
|
|
|
| - void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
|
| + void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); }
|
|
|
| SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }
|
|
|
|
|