OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
174 SkASSERT(0 <= k && k < 2); | 174 SkASSERT(0 <= k && k < 2); |
175 return vgetq_lane_f64(fVec, k&1); | 175 return vgetq_lane_f64(fVec, k&1); |
176 } | 176 } |
177 | 177 |
178 private: | 178 private: |
179 float64x2_t fVec; | 179 float64x2_t fVec; |
180 }; | 180 }; |
181 #endif//defined(SK_CPU_ARM64) | 181 #endif//defined(SK_CPU_ARM64) |
182 | 182 |
183 template <> | 183 template <> |
184 class SkNi<4, int> { | |
185 public: | |
186 SkNi(const int32x4_t& vec) : fVec(vec) {} | |
187 | |
188 SkNi() {} | |
189 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} | |
190 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } | |
191 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } | |
192 | |
193 void store(int vals[4]) const { vst1q_s32(vals, fVec); } | |
194 | |
195 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } | |
196 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } | |
197 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } | |
198 | |
199 // Well, this is absurd. The shifts require compile-time constant arguments
. | |
200 #define SHIFT(op, v, bits) switch(bits) { \ | |
201 case 1: return op(v, 1); case 2: return op(v, 2); case 3: return op(v
, 3); \ | |
202 case 4: return op(v, 4); case 5: return op(v, 5); case 6: return op(v
, 6); \ | |
203 case 7: return op(v, 7); case 8: return op(v, 8); case 9: return op(v
, 9); \ | |
204 case 10: return op(v, 10); case 11: return op(v, 11); case 12: return op(v
, 12); \ | |
205 case 13: return op(v, 13); case 14: return op(v, 14); case 15: return op(v
, 15); \ | |
206 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v
, 18); \ | |
207 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v
, 21); \ | |
208 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v
, 24); \ | |
209 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ | |
210 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ | |
211 case 31: return op(v, 31); } return fVec | |
212 | |
213 SkNi operator << (int bits) const { SHIFT(vshlq_n_s32, fVec, bits); } | |
214 SkNi operator >> (int bits) const { SHIFT(vshrq_n_s32, fVec, bits); } | |
215 #undef SHIFT | |
216 | |
217 template <int k> int kth() const { | |
218 SkASSERT(0 <= k && k < 4); | |
219 return vgetq_lane_s32(fVec, k); | |
220 } | |
221 protected: | |
222 int32x4_t fVec; | |
223 }; | |
224 | |
225 template <> | |
226 class SkNf<4, float> { | 184 class SkNf<4, float> { |
227 typedef SkNb<4, 4> Nb; | 185 typedef SkNb<4, 4> Nb; |
228 public: | 186 public: |
229 SkNf(float32x4_t vec) : fVec(vec) {} | 187 SkNf(float32x4_t vec) : fVec(vec) {} |
230 | 188 |
231 SkNf() {} | 189 SkNf() {} |
232 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 190 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
233 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 191 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
234 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 192 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
235 | 193 |
236 void store(float vals[4]) const { vst1q_f32(vals, fVec); } | 194 void store(float vals[4]) const { vst1q_f32(vals, fVec); } |
237 | 195 |
238 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); } | |
239 | |
240 SkNf approxInvert() const { | 196 SkNf approxInvert() const { |
241 float32x4_t est0 = vrecpeq_f32(fVec), | 197 float32x4_t est0 = vrecpeq_f32(fVec), |
242 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); | 198 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); |
243 return est1; | 199 return est1; |
244 } | 200 } |
245 SkNf invert() const { | 201 SkNf invert() const { |
246 float32x4_t est1 = this->approxInvert().fVec, | 202 float32x4_t est1 = this->approxInvert().fVec, |
247 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); | 203 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); |
248 return est2; | 204 return est2; |
249 } | 205 } |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 template <int k> float kth() const { | 245 template <int k> float kth() const { |
290 SkASSERT(0 <= k && k < 4); | 246 SkASSERT(0 <= k && k < 4); |
291 return vgetq_lane_f32(fVec, k&3); | 247 return vgetq_lane_f32(fVec, k&3); |
292 } | 248 } |
293 | 249 |
294 protected: | 250 protected: |
295 float32x4_t fVec; | 251 float32x4_t fVec; |
296 }; | 252 }; |
297 | 253 |
298 #endif//SkNx_neon_DEFINED | 254 #endif//SkNx_neon_DEFINED |
OLD | NEW |