OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); | 223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); |
224 } | 224 } |
225 | 225 |
226 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 226 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
227 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); | 227 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); |
228 } | 228 } |
229 | 229 |
230 float32x4_t fVec; | 230 float32x4_t fVec; |
231 }; | 231 }; |
232 | 232 |
| 233 // It's possible that for our current use cases, representing this as |
| 234 // half a uint16x8_t might be better than representing it as a uint16x4_t. |
| 235 // It'd make conversion to Sk4b one step simpler. |
| 236 template <> |
| 237 class SkNx<4, uint16_t> { |
| 238 public: |
| 239 SkNx(const uint16x4_t& vec) : fVec(vec) {} |
| 240 |
| 241 SkNx() {} |
| 242 SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {} |
| 243 static SkNx Load(const uint16_t vals[4]) { return vld1_u16(vals); } |
| 244 |
| 245 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) { |
| 246 fVec = (uint16x4_t) { a,b,c,d }; |
| 247 } |
| 248 |
| 249 void store(uint16_t vals[4]) const { vst1_u16(vals, fVec); } |
| 250 |
| 251 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); } |
| 252 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); } |
| 253 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); } |
| 254 |
| 255 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } |
| 256 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } |
| 257 |
| 258 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV
ec); } |
| 259 |
| 260 template <int k> uint16_t kth() const { |
| 261 SkASSERT(0 <= k && k < 4); |
| 262 return vget_lane_u16(fVec, k&3); |
| 263 } |
| 264 |
| 265 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 266 return vbsl_u16(fVec, t.fVec, e.fVec); |
| 267 } |
| 268 |
| 269 uint16x4_t fVec; |
| 270 }; |
| 271 |
233 template <> | 272 template <> |
234 class SkNx<8, uint16_t> { | 273 class SkNx<8, uint16_t> { |
235 public: | 274 public: |
236 SkNx(const uint16x8_t& vec) : fVec(vec) {} | 275 SkNx(const uint16x8_t& vec) : fVec(vec) {} |
237 | 276 |
238 SkNx() {} | 277 SkNx() {} |
239 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} | 278 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
240 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } | 279 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } |
241 | 280 |
242 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 281 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
344 } | 383 } |
345 | 384 |
346 static inline void Sk4f_ToBytes(uint8_t bytes[16], | 385 static inline void Sk4f_ToBytes(uint8_t bytes[16], |
347 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { | 386 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { |
348 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), | 387 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), |
349 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], | 388 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], |
350 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), | 389 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), |
351 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0])
.val[0]); | 390 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0])
.val[0]); |
352 } | 391 } |
353 | 392 |
| 393 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t, 4>(const Sk4b& src) { |
| 394 return vget_low_u16(vmovl_u8(src.fVec)); |
| 395 } |
| 396 |
| 397 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { |
| 398 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
| 399 } |
| 400 |
354 } // namespace | 401 } // namespace |
355 | 402 |
356 #endif//SkNx_neon_DEFINED | 403 #endif//SkNx_neon_DEFINED |
OLD | NEW |