| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_DEFINED | 8 #ifndef SkNx_DEFINED |
| 9 #define SkNx_DEFINED | 9 #define SkNx_DEFINED |
| 10 | 10 |
| 11 | 11 |
| 12 #define SKNX_NO_SIMDx // Remove the x to disable SIMD for all SkNx types. | 12 //#define SKNX_NO_SIMD |
| 13 | |
| 14 | 13 |
| 15 #include "SkScalar.h" | 14 #include "SkScalar.h" |
| 16 #include "SkTypes.h" | 15 #include "SkTypes.h" |
| 17 #include <math.h> | 16 #include <math.h> |
| 18 #define REQUIRE(x) static_assert(x, #x) | 17 #define REQUIRE(x) static_assert(x, #x) |
| 19 | 18 |
| 20 // This file may be included multiple times by .cpp files with different flags,
leading | 19 // This file may be included multiple times by .cpp files with different flags,
leading |
| 21 // to different definitions. Usually that doesn't matter because it's all inlin
ed, but | 20 // to different definitions. Usually that doesn't matter because it's all inlin
ed, but |
| 22 // in Debug modes the compilers may not inline everything. So wrap everything i
n an | 21 // in Debug modes the compilers may not inline everything. So wrap everything i
n an |
| 23 // anonymous namespace to give each includer their own silo of this code (or the
linker | 22 // anonymous namespace to give each includer their own silo of this code (or the
linker |
| 24 // will probably pick one randomly for us, which is rarely correct). | 23 // will probably pick one randomly for us, which is rarely correct). |
| 25 namespace { | 24 namespace { |
| 26 | 25 |
| 27 // The default implementations just fall back on a pair of size N/2. | 26 // The default implementations just fall back on a pair of size N/2. |
| 28 | 27 // These support the union of operations we might do to ints and floats, but |
| 28 // platform specializations might support fewer (e.g. no float <<, no int /). |
| 29 template <int N, typename T> | 29 template <int N, typename T> |
| 30 class SkNx { | 30 class SkNx { |
| 31 public: | 31 public: |
| 32 SkNx() {} | 32 SkNx() {} |
| 33 SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {} | 33 SkNx(const SkNx<N/2, T>& lo, const SkNx<N/2, T>& hi) : fLo(lo), fHi(hi) {} |
| 34 SkNx(T val) : fLo(val), fHi(val) {} | 34 SkNx(T val) : fLo(val), fHi(val) {} |
| 35 static SkNx Load(const T vals[N]) { | 35 static SkNx Load(const T vals[N]) { |
| 36 return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2)); | 36 return SkNx(SkNx<N/2,T>::Load(vals), SkNx<N/2,T>::Load(vals+N/2)); |
| 37 } | 37 } |
| 38 | 38 |
| 39 SkNx(T a, T b) : fLo(a), fHi(b) {
REQUIRE(N==2); } | 39 SkNx(T a, T b) : fLo(a), fHi(b) {
REQUIRE(N==2); } |
| 40 SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) {
REQUIRE(N==4); } | 40 SkNx(T a, T b, T c, T d) : fLo(a,b), fHi(c,d) {
REQUIRE(N==4); } |
| 41 SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
REQUIRE(N==8); } | 41 SkNx(T a, T b, T c, T d, T e, T f, T g, T h) : fLo(a,b,c,d), fHi(e,f,g,h) {
REQUIRE(N==8); } |
| 42 SkNx(T a, T b, T c, T d, T e, T f, T g, T h, | 42 SkNx(T a, T b, T c, T d, T e, T f, T g, T h, |
| 43 T i, T j, T k, T l, T m, T n, T o, T p) | 43 T i, T j, T k, T l, T m, T n, T o, T p) |
| 44 : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); } | 44 : fLo(a,b,c,d, e,f,g,h), fHi(i,j,k,l, m,n,o,p) { REQUIRE(N==16); } |
| 45 | 45 |
| 46 void store(T vals[N]) const { | 46 void store(T vals[N]) const { |
| 47 fLo.store(vals); | 47 fLo.store(vals); |
| 48 fHi.store(vals+N/2); | 48 fHi.store(vals+N/2); |
| 49 } | 49 } |
| 50 | 50 |
| 51 SkNx saturatedAdd(const SkNx& o) const { | 51 SkNx saturatedAdd(const SkNx& o) const { |
| 52 return SkNx(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi)); | 52 return SkNx(fLo.saturatedAdd(o.fLo), fHi.saturatedAdd(o.fHi)); |
| 53 } | 53 } |
| 54 | 54 |
| 55 SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi
); } | 55 SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi
); } |
| 56 SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi
); } | 56 SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi
); } |
| 57 SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi
); } | 57 SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi
); } |
| 58 | |
| 59 SkNx operator << (int bits) const { return SkNx(fLo << bits, fHi << bits); } | |
| 60 SkNx operator >> (int bits) const { return SkNx(fLo >> bits, fHi >> bits); } | |
| 61 | |
| 62 static SkNx Min(const SkNx& a, const SkNx& b) { | |
| 63 return SkNx(SkNx<N/2, T>::Min(a.fLo, b.fLo), SkNx<N/2, T>::Min(a.fHi, b.
fHi)); | |
| 64 } | |
| 65 SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.fHi
); } | |
| 66 | |
| 67 template <int k> T kth() const { | |
| 68 SkASSERT(0 <= k && k < N); | |
| 69 return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); | |
| 70 } | |
| 71 | |
| 72 bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } | |
| 73 bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } | |
| 74 SkNx thenElse(const SkNx& t, const SkNx& e) const { | |
| 75 return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); | |
| 76 } | |
| 77 | |
| 78 protected: | |
| 79 REQUIRE(0 == (N & (N-1))); | |
| 80 | |
| 81 SkNx<N/2, T> fLo, fHi; | |
| 82 }; | |
| 83 | |
| 84 template <int N> | |
| 85 class SkNx<N,float> { | |
| 86 public: | |
| 87 SkNx() {} | |
| 88 SkNx(float val) : fLo(val), fHi(val) {} | |
| 89 static SkNx Load(const float vals[N]) { | |
| 90 return SkNx(SkNx<N/2, float>::Load(vals), SkNx<N/2, float>::Load(vals+N/
2)); | |
| 91 } | |
| 92 // FromBytes() and toBytes() specializations may assume their argument is N-
byte aligned. | |
| 93 // E.g. Sk4f::FromBytes() may assume it's reading from a 4-byte-aligned poin
ter. | |
| 94 // Converts [0,255] bytes to [0.0, 255.0] floats. | |
| 95 static SkNx FromBytes(const uint8_t bytes[N]) { | |
| 96 return SkNx(SkNx<N/2, float>::FromBytes(bytes), SkNx<N/2, float>::FromBy
tes(bytes+N/2)); | |
| 97 } | |
| 98 | |
| 99 SkNx(float a, float b) : fLo(a), fHi(b) { REQUIRE(N==2
); } | |
| 100 SkNx(float a, float b, float c, float d) : fLo(a,b), fHi(c,d) { REQUIRE(N==4
); } | |
| 101 SkNx(float a, float b, float c, float d, float e, float f, float g, float h) | |
| 102 : fLo(a,b,c,d) | |
| 103 , fHi(e,f,g,h) { REQUIRE(N==8); } | |
| 104 | |
| 105 void store(float vals[N]) const { | |
| 106 fLo.store(vals); | |
| 107 fHi.store(vals+N/2); | |
| 108 } | |
| 109 // Please see note on FromBytes(). | |
| 110 // Clamps to [0.0,255.0] floats and truncates to [0,255] bytes. | |
| 111 void toBytes(uint8_t bytes[N]) const { | |
| 112 fLo.toBytes(bytes); | |
| 113 fHi.toBytes(bytes+N/2); | |
| 114 } | |
| 115 | |
| 116 // Some implementations can do this faster. | |
| 117 static void ToBytes(uint8_t bytes[4*N], | |
| 118 const SkNx& a, const SkNx& b, const SkNx& c, const SkNx&
d) { | |
| 119 a.toBytes(bytes+0*N); | |
| 120 b.toBytes(bytes+1*N); | |
| 121 c.toBytes(bytes+2*N); | |
| 122 d.toBytes(bytes+3*N); | |
| 123 } | |
| 124 | |
| 125 SkNx operator + (const SkNx& o) const { return SkNx(fLo + o.fLo, fHi + o.fHi
); } | |
| 126 SkNx operator - (const SkNx& o) const { return SkNx(fLo - o.fLo, fHi - o.fHi
); } | |
| 127 SkNx operator * (const SkNx& o) const { return SkNx(fLo * o.fLo, fHi * o.fHi
); } | |
| 128 SkNx operator / (const SkNx& o) const { return SkNx(fLo / o.fLo, fHi / o.fHi
); } | 58 SkNx operator / (const SkNx& o) const { return SkNx(fLo / o.fLo, fHi / o.fHi
); } |
| 129 | 59 |
| 60 SkNx operator << (int bits) const { return SkNx(fLo << bits, fHi << bits); } |
| 61 SkNx operator >> (int bits) const { return SkNx(fLo >> bits, fHi >> bits); } |
| 62 |
| 130 SkNx operator == (const SkNx& o) const { return SkNx(fLo == o.fLo, fHi == o.
fHi); } | 63 SkNx operator == (const SkNx& o) const { return SkNx(fLo == o.fLo, fHi == o.
fHi); } |
| 131 SkNx operator != (const SkNx& o) const { return SkNx(fLo != o.fLo, fHi != o.
fHi); } | 64 SkNx operator != (const SkNx& o) const { return SkNx(fLo != o.fLo, fHi != o.
fHi); } |
| 132 SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.
fHi); } | 65 SkNx operator < (const SkNx& o) const { return SkNx(fLo < o.fLo, fHi < o.
fHi); } |
| 133 SkNx operator > (const SkNx& o) const { return SkNx(fLo > o.fLo, fHi > o.
fHi); } | 66 SkNx operator > (const SkNx& o) const { return SkNx(fLo > o.fLo, fHi > o.
fHi); } |
| 134 SkNx operator <= (const SkNx& o) const { return SkNx(fLo <= o.fLo, fHi <= o.
fHi); } | 67 SkNx operator <= (const SkNx& o) const { return SkNx(fLo <= o.fLo, fHi <= o.
fHi); } |
| 135 SkNx operator >= (const SkNx& o) const { return SkNx(fLo >= o.fLo, fHi >= o.
fHi); } | 68 SkNx operator >= (const SkNx& o) const { return SkNx(fLo >= o.fLo, fHi >= o.
fHi); } |
| 136 | 69 |
| 137 static SkNx Min(const SkNx& l, const SkNx& r) { | 70 static SkNx Min(const SkNx& a, const SkNx& b) { |
| 138 return SkNx(SkNx<N/2, float>::Min(l.fLo, r.fLo), SkNx<N/2, float>::Min(l
.fHi, r.fHi)); | 71 return SkNx(SkNx<N/2, T>::Min(a.fLo, b.fLo), SkNx<N/2, T>::Min(a.fHi, b.
fHi)); |
| 139 } | 72 } |
| 140 static SkNx Max(const SkNx& l, const SkNx& r) { | 73 static SkNx Max(const SkNx& a, const SkNx& b) { |
| 141 return SkNx(SkNx<N/2, float>::Max(l.fLo, r.fLo), SkNx<N/2, float>::Max(l
.fHi, r.fHi)); | 74 return SkNx(SkNx<N/2, T>::Max(a.fLo, b.fLo), SkNx<N/2, T>::Max(a.fHi, b.
fHi)); |
| 142 } | 75 } |
| 143 | 76 |
| 144 SkNx sqrt() const { return SkNx(fLo. sqrt(), fHi. sqrt()); } | 77 SkNx sqrt() const { return SkNx(fLo.sqrt(), fHi.sqrt()); } |
| 145 | |
| 146 // Generally, increasing precision, increasing cost. | 78 // Generally, increasing precision, increasing cost. |
| 147 SkNx rsqrt0() const { return SkNx(fLo.rsqrt0(), fHi.rsqrt0()); } | 79 SkNx rsqrt0() const { return SkNx(fLo.rsqrt0(), fHi.rsqrt0()); } |
| 148 SkNx rsqrt1() const { return SkNx(fLo.rsqrt1(), fHi.rsqrt1()); } | 80 SkNx rsqrt1() const { return SkNx(fLo.rsqrt1(), fHi.rsqrt1()); } |
| 149 SkNx rsqrt2() const { return SkNx(fLo.rsqrt2(), fHi.rsqrt2()); } | 81 SkNx rsqrt2() const { return SkNx(fLo.rsqrt2(), fHi.rsqrt2()); } |
| 150 | 82 |
| 151 SkNx invert() const { return SkNx(fLo. invert(), fHi. invert
()); } | 83 SkNx invert() const { return SkNx(fLo. invert(), fHi. invert
()); } |
| 152 SkNx approxInvert() const { return SkNx(fLo.approxInvert(), fHi.approxInvert
()); } | 84 SkNx approxInvert() const { return SkNx(fLo.approxInvert(), fHi.approxInvert
()); } |
| 153 | 85 |
| 154 template <int k> float kth() const { | 86 template <int k> T kth() const { |
| 155 SkASSERT(0 <= k && k < N); | 87 SkASSERT(0 <= k && k < N); |
| 156 return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); | 88 return k < N/2 ? fLo.template kth<k>() : fHi.template kth<k-N/2>(); |
| 157 } | 89 } |
| 158 | 90 |
| 159 bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } | 91 bool allTrue() const { return fLo.allTrue() && fHi.allTrue(); } |
| 160 bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } | 92 bool anyTrue() const { return fLo.anyTrue() || fHi.anyTrue(); } |
| 161 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 93 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 162 return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); | 94 return SkNx(fLo.thenElse(t.fLo, e.fLo), fHi.thenElse(t.fHi, e.fHi)); |
| 163 } | 95 } |
| 164 | 96 |
| 165 protected: | 97 protected: |
| 166 REQUIRE(0 == (N & (N-1))); | 98 REQUIRE(0 == (N & (N-1))); |
| 167 SkNx(const SkNx<N/2, float>& lo, const SkNx<N/2, float>& hi) : fLo(lo), fHi(
hi) {} | |
| 168 | 99 |
| 169 SkNx<N/2, float> fLo, fHi; | 100 SkNx<N/2, T> fLo, fHi; |
| 170 }; | 101 }; |
| 171 | 102 |
| 172 | |
| 173 // Bottom out the default implementations with scalars when nothing's been speci
alized. | 103 // Bottom out the default implementations with scalars when nothing's been speci
alized. |
| 174 | |
| 175 template <typename T> | 104 template <typename T> |
| 176 class SkNx<1,T> { | 105 class SkNx<1,T> { |
| 177 public: | 106 public: |
| 178 SkNx() {} | 107 SkNx() {} |
| 179 SkNx(T val) : fVal(val) {} | 108 SkNx(T val) : fVal(val) {} |
| 180 static SkNx Load(const T vals[1]) { return SkNx(vals[0]); } | 109 static SkNx Load(const T vals[1]) { return SkNx(vals[0]); } |
| 181 | 110 |
| 182 void store(T vals[1]) const { vals[0] = fVal; } | 111 void store(T vals[1]) const { vals[0] = fVal; } |
| 183 | 112 |
| 184 SkNx saturatedAdd(const SkNx& o) const { | 113 SkNx saturatedAdd(const SkNx& o) const { |
| 185 SkASSERT((T)(~0) > 0); // TODO: support signed T | 114 SkASSERT((T)(~0) > 0); // TODO: support signed T |
| 186 T sum = fVal + o.fVal; | 115 T sum = fVal + o.fVal; |
| 187 return SkNx(sum < fVal ? (T)(~0) : sum); | 116 return SkNx(sum < fVal ? (T)(~0) : sum); |
| 188 } | 117 } |
| 189 | 118 |
| 190 SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); } | 119 SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); } |
| 191 SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); } | 120 SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); } |
| 192 SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); } | 121 SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); } |
| 193 | |
| 194 SkNx operator << (int bits) const { return SkNx(fVal << bits); } | |
| 195 SkNx operator >> (int bits) const { return SkNx(fVal >> bits); } | |
| 196 | |
| 197 static SkNx Min(const SkNx& a, const SkNx& b) { return SkNx(SkTMin(a.fVal, b
.fVal)); } | |
| 198 SkNx operator <(const SkNx& o) const { return SkNx(fVal < o.fVal); } | |
| 199 | |
| 200 template <int k> T kth() const { | |
| 201 SkASSERT(0 == k); | |
| 202 return fVal; | |
| 203 } | |
| 204 | |
| 205 bool allTrue() const { return fVal; } | |
| 206 bool anyTrue() const { return fVal; } | |
| 207 SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal ? t : e; } | |
| 208 | |
| 209 protected: | |
| 210 T fVal; | |
| 211 }; | |
| 212 | |
| 213 template <> | |
| 214 class SkNx<1,float> { | |
| 215 public: | |
| 216 SkNx() {} | |
| 217 SkNx(float val) : fVal(val) {} | |
| 218 static SkNx Load(const float vals[1]) { return SkNx(vals[0]); } | |
| 219 static SkNx FromBytes(const uint8_t bytes[1]) { return SkNx((float)bytes[0])
; } | |
| 220 | |
| 221 void store(float vals[1]) const { vals[0] = fVal; } | |
| 222 void toBytes(uint8_t bytes[1]) const { bytes[0] = (uint8_t)(SkTMin(fVal, 255
.0f)); } | |
| 223 | |
| 224 SkNx operator + (const SkNx& o) const { return SkNx(fVal + o.fVal); } | |
| 225 SkNx operator - (const SkNx& o) const { return SkNx(fVal - o.fVal); } | |
| 226 SkNx operator * (const SkNx& o) const { return SkNx(fVal * o.fVal); } | |
| 227 SkNx operator / (const SkNx& o) const { return SkNx(fVal / o.fVal); } | 122 SkNx operator / (const SkNx& o) const { return SkNx(fVal / o.fVal); } |
| 228 | 123 |
| 124 SkNx operator << (int bits) const { return SkNx(fVal << bits); } |
| 125 SkNx operator >> (int bits) const { return SkNx(fVal >> bits); } |
| 126 |
| 229 SkNx operator == (const SkNx& o) const { return SkNx(fVal == o.fVal); } | 127 SkNx operator == (const SkNx& o) const { return SkNx(fVal == o.fVal); } |
| 230 SkNx operator != (const SkNx& o) const { return SkNx(fVal != o.fVal); } | 128 SkNx operator != (const SkNx& o) const { return SkNx(fVal != o.fVal); } |
| 231 SkNx operator < (const SkNx& o) const { return SkNx(fVal < o.fVal); } | 129 SkNx operator < (const SkNx& o) const { return SkNx(fVal < o.fVal); } |
| 232 SkNx operator > (const SkNx& o) const { return SkNx(fVal > o.fVal); } | 130 SkNx operator > (const SkNx& o) const { return SkNx(fVal > o.fVal); } |
| 233 SkNx operator <= (const SkNx& o) const { return SkNx(fVal <= o.fVal); } | 131 SkNx operator <= (const SkNx& o) const { return SkNx(fVal <= o.fVal); } |
| 234 SkNx operator >= (const SkNx& o) const { return SkNx(fVal >= o.fVal); } | 132 SkNx operator >= (const SkNx& o) const { return SkNx(fVal >= o.fVal); } |
| 235 | 133 |
| 236 static SkNx Min(const SkNx& l, const SkNx& r) { return SkNx(SkTMin(l.fVal, r
.fVal)); } | 134 static SkNx Min(const SkNx& a, const SkNx& b) { return SkNx(SkTMin(a.fVal, b
.fVal)); } |
| 237 static SkNx Max(const SkNx& l, const SkNx& r) { return SkNx(SkTMax(l.fVal, r
.fVal)); } | 135 static SkNx Max(const SkNx& a, const SkNx& b) { return SkNx(SkTMax(a.fVal, b
.fVal)); } |
| 238 | 136 |
| 239 SkNx sqrt() const { return SkNx(sqrtf(fVal)); } | 137 SkNx sqrt () const { return SkNx(Sqrt(fVal)); } |
| 240 SkNx rsqrt0() const { return SkNx(1.0f / sqrtf(fVal)); } | 138 SkNx rsqrt0() const { return this->sqrt().invert(); } |
| 241 SkNx rsqrt1() const { return this->rsqrt0(); } | 139 SkNx rsqrt1() const { return this->rsqrt0(); } |
| 242 SkNx rsqrt2() const { return this->rsqrt1(); } | 140 SkNx rsqrt2() const { return this->rsqrt1(); } |
| 243 | 141 |
| 244 SkNx invert() const { return SkNx(1.0f / fVal); } | 142 SkNx invert() const { return SkNx(1) / SkNx(fVal); } |
| 245 SkNx approxInvert() const { return this->invert(); } | 143 SkNx approxInvert() const { return this->invert(); } |
| 246 | 144 |
| 247 template <int k> float kth() const { | 145 template <int k> T kth() const { |
| 248 SkASSERT(k == 0); | 146 SkASSERT(0 == k); |
| 249 return fVal; | 147 return fVal; |
| 250 } | 148 } |
| 251 | 149 |
| 252 bool allTrue() const { return this->pun() != 0; } | 150 bool allTrue() const { return fVal != 0; } |
| 253 bool anyTrue() const { return this->pun() != 0; } | 151 bool anyTrue() const { return fVal != 0; } |
| 254 SkNx thenElse(const SkNx& t, const SkNx& e) const { return this->pun() ? t :
e; } | 152 SkNx thenElse(const SkNx& t, const SkNx& e) const { return fVal != 0 ? t : e
; } |
| 255 | 153 |
| 256 protected: | 154 protected: |
| 257 uint32_t pun() const { | 155 static double Sqrt(double val) { return ::sqrt (val); } |
| 258 union { float f; uint32_t i; } pun = { fVal }; | 156 static float Sqrt(float val) { return ::sqrtf(val); } |
| 259 return pun.i; | |
| 260 } | |
| 261 | 157 |
| 262 float fVal; | 158 T fVal; |
| 263 }; | 159 }; |
| 264 | 160 |
| 265 // This default implementation can be specialized by ../opts/SkNx_foo.h | 161 // This default implementation can be specialized by ../opts/SkNx_foo.h |
| 266 // if there's a better platform-specific shuffle strategy. | 162 // if there's a better platform-specific shuffle strategy. |
| 267 template <typename Nx, int... Ix> | 163 template <typename Nx, int... Ix> |
| 268 inline Nx SkNx_shuffle_impl(const Nx& src) { return Nx( src.template kth<Ix>()..
. ); } | 164 inline Nx SkNx_shuffle_impl(const Nx& src) { return Nx( src.template kth<Ix>()..
. ); } |
| 269 | 165 |
| 270 // This generic shuffle can be called with 1 or N indices: | 166 // This generic shuffle can be called with 1 or N indices: |
| 271 // Sk4f f(a,b,c,d); | 167 // Sk4f f(a,b,c,d); |
| 272 // SkNx_shuffle<3>(f); // ~~~> Sk4f(d,d,d,d) | 168 // SkNx_shuffle<3>(f); // ~~~> Sk4f(d,d,d,d) |
| (...skipping 16 matching lines...) Expand all Loading... |
| 289 template <> struct MakeSkIntSequence<16> : SkIntSequence<0,1,2,3,4,5,6,7,8,9,10,
11,12,13,14,15>{}; | 185 template <> struct MakeSkIntSequence<16> : SkIntSequence<0,1,2,3,4,5,6,7,8,9,10,
11,12,13,14,15>{}; |
| 290 | 186 |
| 291 // This is the default/fallback implementation for SkNx_cast. Best to specializ
e SkNx_cast! | 187 // This is the default/fallback implementation for SkNx_cast. Best to specializ
e SkNx_cast! |
| 292 template <typename D, typename S, int N, int... Ix> | 188 template <typename D, typename S, int N, int... Ix> |
| 293 SkNx<N,D> SkNx_cast_fallback(const SkNx<N,S>& src, SkIntSequence<Ix...>) { | 189 SkNx<N,D> SkNx_cast_fallback(const SkNx<N,S>& src, SkIntSequence<Ix...>) { |
| 294 return SkNx<N,D>( (D)src.template kth<Ix>()... ); | 190 return SkNx<N,D>( (D)src.template kth<Ix>()... ); |
| 295 } | 191 } |
| 296 | 192 |
| 297 // This is a generic cast between two SkNx with the same number of elements N.
E.g. | 193 // This is a generic cast between two SkNx with the same number of elements N.
E.g. |
| 298 // Sk4b bs = ...; // Load 4 bytes. | 194 // Sk4b bs = ...; // Load 4 bytes. |
| 299 // Sk4f fs = SkNx_cast<float>(bs); // (This will replace SkNf::FromBytes() o
ne day.) | 195 // Sk4f fs = SkNx_cast<float>(bs); // Cast each byte to a float. |
| 300 // Sk4i is = SkNx_cast<int>(fs); // Cast each float to int. | 196 // Sk4i is = SkNx_cast<int>(fs); // Cast each float to int. |
| 301 // This can be specialized in ../opts/SkNx_foo.h if there's a better platform-sp
ecific cast. | 197 // This can be specialized in ../opts/SkNx_foo.h if there's a better platform-sp
ecific cast. |
| 302 template <typename D, typename S, int N> | 198 template <typename D, typename S, int N> |
| 303 SkNx<N,D> SkNx_cast(const SkNx<N,S>& src) { | 199 SkNx<N,D> SkNx_cast(const SkNx<N,S>& src) { |
| 304 return SkNx_cast_fallback<D,S,N>(src, MakeSkIntSequence<N>()); | 200 return SkNx_cast_fallback<D,S,N>(src, MakeSkIntSequence<N>()); |
| 305 } | 201 } |
| 306 | 202 |
| 307 } // namespace | 203 } // namespace |
| 308 | 204 |
| 309 | |
| 310 // Include platform specific specializations if available. | |
| 311 #ifndef SKNX_NO_SIMD | |
| 312 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX | |
| 313 #include "../opts/SkNx_avx.h" | |
| 314 #elif SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | |
| 315 #include "../opts/SkNx_sse.h" | |
| 316 #elif defined(SK_ARM_HAS_NEON) | |
| 317 #include "../opts/SkNx_neon.h" | |
| 318 #endif | |
| 319 #endif | |
| 320 | |
| 321 #undef REQUIRE | |
| 322 | |
| 323 typedef SkNx<2, float> Sk2f; | 205 typedef SkNx<2, float> Sk2f; |
| 324 typedef SkNx<2, float> Sk2s; | 206 typedef SkNx<2, float> Sk2s; |
| 325 typedef SkNx<4, float> Sk4f; | 207 typedef SkNx<4, float> Sk4f; |
| 326 typedef SkNx<4, float> Sk4s; | 208 typedef SkNx<4, float> Sk4s; |
| 327 typedef SkNx<8, float> Sk8f; | 209 typedef SkNx<8, float> Sk8f; |
| 328 typedef SkNx<8, float> Sk8s; | 210 typedef SkNx<8, float> Sk8s; |
| 329 | 211 |
| 330 typedef SkNx<8, uint16_t> Sk8h; | 212 typedef SkNx< 4, uint16_t> Sk4h; |
| 213 typedef SkNx< 8, uint16_t> Sk8h; |
| 331 typedef SkNx<16, uint16_t> Sk16h; | 214 typedef SkNx<16, uint16_t> Sk16h; |
| 215 |
| 216 typedef SkNx< 4, uint8_t> Sk4b; |
| 217 typedef SkNx< 8, uint8_t> Sk8b; |
| 332 typedef SkNx<16, uint8_t> Sk16b; | 218 typedef SkNx<16, uint8_t> Sk16b; |
| 333 | 219 |
| 334 typedef SkNx<4, int> Sk4i; | 220 typedef SkNx<4, int> Sk4i; |
| 335 | 221 |
| 222 // Include platform specific specializations if available. |
| 223 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX |
| 224 #include "../opts/SkNx_avx.h" |
| 225 #elif !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| 226 #include "../opts/SkNx_sse.h" |
| 227 #elif !defined(SKNX_NO_SIMD) && defined(SK_ARM_HAS_NEON) |
| 228 #include "../opts/SkNx_neon.h" |
| 229 #else |
| 230 static inline |
| 231 void Sk4f_ToBytes(uint8_t p[16], const Sk4f& a, const Sk4f& b, const Sk4f& c
, const Sk4f& d) { |
| 232 SkNx_cast<uint8_t>(a).store(p+ 0); |
| 233 SkNx_cast<uint8_t>(b).store(p+ 4); |
| 234 SkNx_cast<uint8_t>(c).store(p+ 8); |
| 235 SkNx_cast<uint8_t>(d).store(p+12); |
| 236 } |
| 237 #endif |
| 238 |
| 239 #undef REQUIRE |
| 240 |
| 241 |
| 336 #endif//SkNx_DEFINED | 242 #endif//SkNx_DEFINED |
| OLD | NEW |