| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); | 223 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); |
| 224 } | 224 } |
| 225 | 225 |
| 226 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 226 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 227 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); | 227 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); |
| 228 } | 228 } |
| 229 | 229 |
| 230 float32x4_t fVec; | 230 float32x4_t fVec; |
| 231 }; | 231 }; |
| 232 | 232 |
| 233 // It's possible that for our current use cases, representing this as | |
| 234 // half a uint16x8_t might be better than representing it as a uint16x4_t. | |
| 235 // It'd make conversion to Sk4b one step simpler. | |
| 236 template <> | |
| 237 class SkNx<4, uint16_t> { | |
| 238 public: | |
| 239 SkNx(const uint16x4_t& vec) : fVec(vec) {} | |
| 240 | |
| 241 SkNx() {} | |
| 242 SkNx(uint16_t val) : fVec(vdup_n_u16(val)) {} | |
| 243 static SkNx Load(const uint16_t vals[4]) { return vld1_u16(vals); } | |
| 244 | |
| 245 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) { | |
| 246 fVec = (uint16x4_t) { a,b,c,d }; | |
| 247 } | |
| 248 | |
| 249 void store(uint16_t vals[4]) const { vst1_u16(vals, fVec); } | |
| 250 | |
| 251 SkNx operator + (const SkNx& o) const { return vadd_u16(fVec, o.fVec); } | |
| 252 SkNx operator - (const SkNx& o) const { return vsub_u16(fVec, o.fVec); } | |
| 253 SkNx operator * (const SkNx& o) const { return vmul_u16(fVec, o.fVec); } | |
| 254 | |
| 255 SkNx operator << (int bits) const { SHIFT16(vshl_n_u16, fVec, bits); } | |
| 256 SkNx operator >> (int bits) const { SHIFT16(vshr_n_u16, fVec, bits); } | |
| 257 | |
| 258 static SkNx Min(const SkNx& a, const SkNx& b) { return vmin_u16(a.fVec, b.fV
ec); } | |
| 259 | |
| 260 template <int k> uint16_t kth() const { | |
| 261 SkASSERT(0 <= k && k < 4); | |
| 262 return vget_lane_u16(fVec, k&3); | |
| 263 } | |
| 264 | |
| 265 SkNx thenElse(const SkNx& t, const SkNx& e) const { | |
| 266 return vbsl_u16(fVec, t.fVec, e.fVec); | |
| 267 } | |
| 268 | |
| 269 uint16x4_t fVec; | |
| 270 }; | |
| 271 | |
| 272 template <> | 233 template <> |
| 273 class SkNx<8, uint16_t> { | 234 class SkNx<8, uint16_t> { |
| 274 public: | 235 public: |
| 275 SkNx(const uint16x8_t& vec) : fVec(vec) {} | 236 SkNx(const uint16x8_t& vec) : fVec(vec) {} |
| 276 | 237 |
| 277 SkNx() {} | 238 SkNx() {} |
| 278 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} | 239 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
| 279 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } | 240 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); } |
| 280 | 241 |
| 281 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 242 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
| (...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 383 } | 344 } |
| 384 | 345 |
| 385 static inline void Sk4f_ToBytes(uint8_t bytes[16], | 346 static inline void Sk4f_ToBytes(uint8_t bytes[16], |
| 386 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { | 347 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { |
| 387 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), | 348 vst1q_u8(bytes, vuzpq_u8(vuzpq_u8((uint8x16_t)vcvtq_u32_f32(a.fVec), |
| 388 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], | 349 (uint8x16_t)vcvtq_u32_f32(b.fVec)).val[0], |
| 389 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), | 350 vuzpq_u8((uint8x16_t)vcvtq_u32_f32(c.fVec), |
| 390 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0])
.val[0]); | 351 (uint8x16_t)vcvtq_u32_f32(d.fVec)).val[0])
.val[0]); |
| 391 } | 352 } |
| 392 | 353 |
| 393 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t, 4>(const Sk4b& src) { | |
| 394 return vget_low_u16(vmovl_u8(src.fVec)); | |
| 395 } | |
| 396 | |
| 397 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { | |
| 398 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | |
| 399 } | |
| 400 | |
| 401 } // namespace | 354 } // namespace |
| 402 | 355 |
| 403 #endif//SkNx_neon_DEFINED | 356 #endif//SkNx_neon_DEFINED |
| OLD | NEW |