OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); | 223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); |
224 } | 224 } |
225 | 225 |
226 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 226 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
227 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); | 227 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); |
228 } | 228 } |
229 | 229 |
230 float32x4_t fVec; | 230 float32x4_t fVec; |
231 }; | 231 }; |
232 | 232 |
233 // It's possible that for our current use cases, representing this as | |
234 // half a uint16x8_t might be better than representing it as a uint16x4_t. | |
235 // It'd make conversion to Sk4b one step simpler. | |
236 template <> | |
237 class SkNx<4, uint16_t> { | |
238 public: | |
239 SkNx(const uint16x4_t& vec) : fVec(vec) {} | |
240 | |
241 SkNx() {} | |
242 SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {} | |
243 static SkNx Load(const uint16_t vals[4]) { return vld1_u16(vals); } | |
244 | |
245 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) { | |
246 fVec = (uint16x4_t) { a,b,c,d }; | |
247 } | |
248 | |
249 void store(uint16_t vals[4]) const { vst1_u16(vals, fVec); } | |
250 | |
251 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); } | |
252 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); } | |
253 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); } | |
254 | |
255 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } | |
256 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } | |
257 | |
258 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV
ec); } | |
259 | |
260 template <int k> uint16_t kth() const { | |
261 SkASSERT(0 <= k && k < 4); | |
262 return vget_lane_u16(fVec, k&3); | |
263 } | |
264 | |
265 SkNx thenElse(const SkNx& t, const SkNx& e) const { | |
266 return vbsl_u16(fVec, t.fVec, e.fVec); | |
267 } | |
268 | |
269 uint16x4_t fVec; | |
270 }; | |
271 | |
272 template <> | 233 template <> |
273 class SkNx<8, uint16_t> { | 234 class SkNx<8, uint16_t> { |
274 public: | 235 public: |
275 SkNx(const uint16x8_t& vec) : fVec(vec) {} | 236 SkNx(const uint16x8_t& vec) : fVec(vec) {} |
276 | 237 |
277 SkNx() {} | 238 SkNx() {} |
278 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} | 239 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
279 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } | 240 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } |
280 | 241 |
281 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 242 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
383 } | 344 } |
384 | 345 |
385 static inline void Sk4f_ToBytes(uint8_t bytes[16], | 346 static inline void Sk4f_ToBytes(uint8_t bytes[16], |
386 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { | 347 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { |
387 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), | 348 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), |
388 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], | 349 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], |
389 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), | 350 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), |
390 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0])
.val[0]); | 351 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0])
.val[0]); |
391 } | 352 } |
392 | 353 |
393 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t, 4>(const Sk4b& src) { | |
394 return vget_low_u16(vmovl_u8(src.fVec)); | |
395 } | |
396 | |
397 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { | |
398 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | |
399 } | |
400 | |
401 } // namespace | 354 } // namespace |
402 | 355 |
403 #endif//SkNx_neon_DEFINED | 356 #endif//SkNx_neon_DEFINED |
OLD | NEW |