| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| (...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 354 } | 354 } |
| 355 | 355 |
| 356 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 356 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 357 return vbslq_u8(fVec, t.fVec, e.fVec); | 357 return vbslq_u8(fVec, t.fVec, e.fVec); |
| 358 } | 358 } |
| 359 | 359 |
| 360 uint8x16_t fVec; | 360 uint8x16_t fVec; |
| 361 }; | 361 }; |
| 362 | 362 |
| 363 template <> | 363 template <> |
| 364 class SkNx<4, int> { | 364 class SkNx<4, int32_t> { |
| 365 public: | 365 public: |
| 366 SkNx(const int32x4_t& vec) : fVec(vec) {} | 366 SkNx(const int32x4_t& vec) : fVec(vec) {} |
| 367 | 367 |
| 368 SkNx() {} | 368 SkNx() {} |
| 369 SkNx(int v) { | 369 SkNx(int32_t v) { |
| 370 fVec = vdupq_n_s32(v); | 370 fVec = vdupq_n_s32(v); |
| 371 } | 371 } |
| 372 SkNx(int a, int b, int c, int d) { | 372 SkNx(int32_t a, int32_t b, int32_t c, int32_t d) { |
| 373 fVec = (int32x4_t){a,b,c,d}; | 373 fVec = (int32x4_t){a,b,c,d}; |
| 374 } | 374 } |
| 375 static SkNx Load(const void* ptr) { | 375 static SkNx Load(const void* ptr) { |
| 376 return vld1q_s32((const int32_t*)ptr); | 376 return vld1q_s32((const int32_t*)ptr); |
| 377 } | 377 } |
| 378 void store(void* ptr) const { | 378 void store(void* ptr) const { |
| 379 return vst1q_s32((int32_t*)ptr, fVec); | 379 return vst1q_s32((int32_t*)ptr, fVec); |
| 380 } | 380 } |
| 381 int operator[](int k) const { | 381 int32_t operator[](int k) const { |
| 382 SkASSERT(0 <= k && k < 4); | 382 SkASSERT(0 <= k && k < 4); |
| 383 union { int32x4_t v; int is[4]; } pun = {fVec}; | 383 union { int32x4_t v; int32_t is[4]; } pun = {fVec}; |
| 384 return pun.is[k&3]; | 384 return pun.is[k&3]; |
| 385 } | 385 } |
| 386 | 386 |
| 387 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } | 387 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } |
| 388 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } | 388 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } |
| 389 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); } | 389 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); } |
| 390 | 390 |
| 391 SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); } | 391 SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); } |
| 392 SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); } | 392 SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); } |
| 393 SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); } | 393 SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); } |
| (...skipping 14 matching lines...) Expand all Loading... |
| 408 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f
Vec); } | 408 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f
Vec); } |
| 409 // TODO as needed | 409 // TODO as needed |
| 410 | 410 |
| 411 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 411 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 412 return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); | 412 return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); |
| 413 } | 413 } |
| 414 | 414 |
| 415 int32x4_t fVec; | 415 int32x4_t fVec; |
| 416 }; | 416 }; |
| 417 | 417 |
| 418 template <> |
| 419 class SkNx<4, uint32_t> { |
| 420 public: |
| 421 SkNx(const uint32x4_t& vec) : fVec(vec) {} |
| 422 |
| 423 SkNx() {} |
| 424 SkNx(uint32_t v) { |
| 425 fVec = vdupq_n_u32(v); |
| 426 } |
| 427 SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { |
| 428 fVec = (uint32x4_t){a,b,c,d}; |
| 429 } |
| 430 static SkNx Load(const void* ptr) { |
| 431 return vld1q_u32((const uint32_t*)ptr); |
| 432 } |
| 433 void store(void* ptr) const { |
| 434 return vst1q_u32((uint32_t*)ptr, fVec); |
| 435 } |
| 436 uint32_t operator[](int k) const { |
| 437 SkASSERT(0 <= k && k < 4); |
| 438 union { uint32x4_t v; uint32_t us[4]; } pun = {fVec}; |
| 439 return pun.us[k&3]; |
| 440 } |
| 441 |
| 442 SkNx operator + (const SkNx& o) const { return vaddq_u32(fVec, o.fVec); } |
| 443 SkNx operator - (const SkNx& o) const { return vsubq_u32(fVec, o.fVec); } |
| 444 SkNx operator * (const SkNx& o) const { return vmulq_u32(fVec, o.fVec); } |
| 445 |
| 446 SkNx operator & (const SkNx& o) const { return vandq_u32(fVec, o.fVec); } |
| 447 SkNx operator | (const SkNx& o) const { return vorrq_u32(fVec, o.fVec); } |
| 448 SkNx operator ^ (const SkNx& o) const { return veorq_u32(fVec, o.fVec); } |
| 449 |
| 450 SkNx operator << (int bits) const { SHIFT32(vshlq_n_u32, fVec, bits); } |
| 451 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_u32, fVec, bits); } |
| 452 |
| 453 SkNx operator == (const SkNx& o) const { return vceqq_u32(fVec, o.fVec); } |
| 454 SkNx operator < (const SkNx& o) const { return vcltq_u32(fVec, o.fVec); } |
| 455 SkNx operator > (const SkNx& o) const { return vcgtq_u32(fVec, o.fVec); } |
| 456 |
| 457 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f
Vec); } |
| 458 // TODO as needed |
| 459 |
| 460 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 461 return vbslq_u32(fVec, t.fVec, e.fVec); |
| 462 } |
| 463 |
| 464 uint32x4_t fVec; |
| 465 }; |
| 466 |
| 418 #undef SHIFT32 | 467 #undef SHIFT32 |
| 419 #undef SHIFT16 | 468 #undef SHIFT16 |
| 420 #undef SHIFT8 | 469 #undef SHIFT8 |
| 421 | 470 |
| 422 template<> inline Sk4i SkNx_cast<int, float>(const Sk4f& src) { | 471 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) { |
| 423 return vcvtq_s32_f32(src.fVec); | 472 return vcvtq_s32_f32(src.fVec); |
| 424 | 473 |
| 425 } | 474 } |
| 426 template<> inline Sk4f SkNx_cast<float, int>(const Sk4i& src) { | 475 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) { |
| 427 return vcvtq_f32_s32(src.fVec); | 476 return vcvtq_f32_s32(src.fVec); |
| 428 } | 477 } |
| 478 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) { |
| 479 return SkNx_cast<float>(Sk4i::Load(&src)); |
| 480 } |
| 429 | 481 |
| 430 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) { | 482 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) { |
| 431 return vqmovn_u32(vcvtq_u32_f32(src.fVec)); | 483 return vqmovn_u32(vcvtq_u32_f32(src.fVec)); |
| 432 } | 484 } |
| 433 | 485 |
| 434 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { | 486 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { |
| 435 return vcvtq_f32_u32(vmovl_u16(src.fVec)); | 487 return vcvtq_f32_u32(vmovl_u16(src.fVec)); |
| 436 } | 488 } |
| 437 | 489 |
| 438 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) { | 490 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) { |
| (...skipping 22 matching lines...) Expand all Loading... |
| 461 } | 513 } |
| 462 | 514 |
| 463 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { | 515 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |
| 464 return vget_low_u16(vmovl_u8(src.fVec)); | 516 return vget_low_u16(vmovl_u8(src.fVec)); |
| 465 } | 517 } |
| 466 | 518 |
| 467 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { | 519 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { |
| 468 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | 520 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
| 469 } | 521 } |
| 470 | 522 |
| 471 template<> inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { | 523 template<> inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) { |
| 472 uint16x4_t _16 = vqmovun_s32(src.fVec); | 524 uint16x4_t _16 = vqmovun_s32(src.fVec); |
| 473 return vqmovn_u16(vcombine_u16(_16, _16)); | 525 return vqmovn_u16(vcombine_u16(_16, _16)); |
| 474 } | 526 } |
| 475 | 527 |
| 476 template<> inline Sk4i SkNx_cast<int, uint16_t>(const Sk4h& src) { | 528 template<> inline Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src) { |
| 477 return vreinterpretq_s32_u32(vmovl_u16(src.fVec)); | 529 return vreinterpretq_s32_u32(vmovl_u16(src.fVec)); |
| 478 } | 530 } |
| 479 | 531 |
| 480 template<> inline Sk4h SkNx_cast<uint16_t, int>(const Sk4i& src) { | 532 template<> inline Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) { |
| 481 return vmovn_u32(vreinterpretq_u32_s32(src.fVec)); | 533 return vmovn_u32(vreinterpretq_u32_s32(src.fVec)); |
| 482 } | 534 } |
| 483 | 535 |
| 484 static inline Sk4i Sk4f_round(const Sk4f& x) { | 536 static inline Sk4i Sk4f_round(const Sk4f& x) { |
| 485 return vcvtq_s32_f32((x + 0.5f).fVec); | 537 return vcvtq_s32_f32((x + 0.5f).fVec); |
| 486 } | 538 } |
| 487 | 539 |
| 488 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h*
a) { | 540 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h*
a) { |
| 489 uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr); | 541 uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr); |
| 490 *r = rgba.val[0]; | 542 *r = rgba.val[0]; |
| 491 *g = rgba.val[1]; | 543 *g = rgba.val[1]; |
| 492 *b = rgba.val[2]; | 544 *b = rgba.val[2]; |
| 493 *a = rgba.val[3]; | 545 *a = rgba.val[3]; |
| 494 } | 546 } |
| 495 | 547 |
| 496 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, | 548 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, |
| 497 const Sk4h& a) { | 549 const Sk4h& a) { |
| 498 uint16x4x4_t rgba = {{ | 550 uint16x4x4_t rgba = {{ |
| 499 r.fVec, | 551 r.fVec, |
| 500 g.fVec, | 552 g.fVec, |
| 501 b.fVec, | 553 b.fVec, |
| 502 a.fVec, | 554 a.fVec, |
| 503 }}; | 555 }}; |
| 504 vst4_u16((uint16_t*) dst, rgba); | 556 vst4_u16((uint16_t*) dst, rgba); |
| 505 } | 557 } |
| 506 | 558 |
| 507 #endif//SkNx_neon_DEFINED | 559 #endif//SkNx_neon_DEFINED |
| OLD | NEW |