OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
354 } | 354 } |
355 | 355 |
356 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 356 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
357 return vbslq_u8(fVec, t.fVec, e.fVec); | 357 return vbslq_u8(fVec, t.fVec, e.fVec); |
358 } | 358 } |
359 | 359 |
360 uint8x16_t fVec; | 360 uint8x16_t fVec; |
361 }; | 361 }; |
362 | 362 |
363 template <> | 363 template <> |
364 class SkNx<4, int> { | 364 class SkNx<4, int32_t> { |
365 public: | 365 public: |
366 SkNx(const int32x4_t& vec) : fVec(vec) {} | 366 SkNx(const int32x4_t& vec) : fVec(vec) {} |
367 | 367 |
368 SkNx() {} | 368 SkNx() {} |
369 SkNx(int v) { | 369 SkNx(int32_t v) { |
370 fVec = vdupq_n_s32(v); | 370 fVec = vdupq_n_s32(v); |
371 } | 371 } |
372 SkNx(int a, int b, int c, int d) { | 372 SkNx(int32_t a, int32_t b, int32_t c, int32_t d) { |
373 fVec = (int32x4_t){a,b,c,d}; | 373 fVec = (int32x4_t){a,b,c,d}; |
374 } | 374 } |
375 static SkNx Load(const void* ptr) { | 375 static SkNx Load(const void* ptr) { |
376 return vld1q_s32((const int32_t*)ptr); | 376 return vld1q_s32((const int32_t*)ptr); |
377 } | 377 } |
378 void store(void* ptr) const { | 378 void store(void* ptr) const { |
379 return vst1q_s32((int32_t*)ptr, fVec); | 379 return vst1q_s32((int32_t*)ptr, fVec); |
380 } | 380 } |
381 int operator[](int k) const { | 381 int32_t operator[](int k) const { |
382 SkASSERT(0 <= k && k < 4); | 382 SkASSERT(0 <= k && k < 4); |
383 union { int32x4_t v; int is[4]; } pun = {fVec}; | 383 union { int32x4_t v; int32_t is[4]; } pun = {fVec}; |
384 return pun.is[k&3]; | 384 return pun.is[k&3]; |
385 } | 385 } |
386 | 386 |
387 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } | 387 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } |
388 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } | 388 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } |
389 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); } | 389 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); } |
390 | 390 |
391 SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); } | 391 SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); } |
392 SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); } | 392 SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); } |
393 SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); } | 393 SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); } |
(...skipping 14 matching lines...) Expand all Loading... |
408 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f
Vec); } | 408 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f
Vec); } |
409 // TODO as needed | 409 // TODO as needed |
410 | 410 |
411 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 411 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
412 return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); | 412 return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); |
413 } | 413 } |
414 | 414 |
415 int32x4_t fVec; | 415 int32x4_t fVec; |
416 }; | 416 }; |
417 | 417 |
| 418 template <> |
| 419 class SkNx<4, uint32_t> { |
| 420 public: |
| 421 SkNx(const uint32x4_t& vec) : fVec(vec) {} |
| 422 |
| 423 SkNx() {} |
| 424 SkNx(uint32_t v) { |
| 425 fVec = vdupq_n_u32(v); |
| 426 } |
| 427 SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d) { |
| 428 fVec = (uint32x4_t){a,b,c,d}; |
| 429 } |
| 430 static SkNx Load(const void* ptr) { |
| 431 return vld1q_u32((const uint32_t*)ptr); |
| 432 } |
| 433 void store(void* ptr) const { |
| 434 return vst1q_u32((uint32_t*)ptr, fVec); |
| 435 } |
| 436 uint32_t operator[](int k) const { |
| 437 SkASSERT(0 <= k && k < 4); |
| 438 union { uint32x4_t v; uint32_t us[4]; } pun = {fVec}; |
| 439 return pun.us[k&3]; |
| 440 } |
| 441 |
| 442 SkNx operator + (const SkNx& o) const { return vaddq_u32(fVec, o.fVec); } |
| 443 SkNx operator - (const SkNx& o) const { return vsubq_u32(fVec, o.fVec); } |
| 444 SkNx operator * (const SkNx& o) const { return vmulq_u32(fVec, o.fVec); } |
| 445 |
| 446 SkNx operator & (const SkNx& o) const { return vandq_u32(fVec, o.fVec); } |
| 447 SkNx operator | (const SkNx& o) const { return vorrq_u32(fVec, o.fVec); } |
| 448 SkNx operator ^ (const SkNx& o) const { return veorq_u32(fVec, o.fVec); } |
| 449 |
| 450 SkNx operator << (int bits) const { SHIFT32(vshlq_n_u32, fVec, bits); } |
| 451 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_u32, fVec, bits); } |
| 452 |
| 453 SkNx operator == (const SkNx& o) const { return vceqq_u32(fVec, o.fVec); } |
| 454 SkNx operator < (const SkNx& o) const { return vcltq_u32(fVec, o.fVec); } |
| 455 SkNx operator > (const SkNx& o) const { return vcgtq_u32(fVec, o.fVec); } |
| 456 |
| 457 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f
Vec); } |
| 458 // TODO as needed |
| 459 |
| 460 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 461 return vbslq_u32(fVec, t.fVec, e.fVec); |
| 462 } |
| 463 |
| 464 uint32x4_t fVec; |
| 465 }; |
| 466 |
418 #undef SHIFT32 | 467 #undef SHIFT32 |
419 #undef SHIFT16 | 468 #undef SHIFT16 |
420 #undef SHIFT8 | 469 #undef SHIFT8 |
421 | 470 |
422 template<> inline Sk4i SkNx_cast<int, float>(const Sk4f& src) { | 471 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) { |
423 return vcvtq_s32_f32(src.fVec); | 472 return vcvtq_s32_f32(src.fVec); |
424 | 473 |
425 } | 474 } |
426 template<> inline Sk4f SkNx_cast<float, int>(const Sk4i& src) { | 475 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) { |
427 return vcvtq_f32_s32(src.fVec); | 476 return vcvtq_f32_s32(src.fVec); |
428 } | 477 } |
| 478 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) { |
| 479 return SkNx_cast<float>(Sk4i::Load(&src)); |
| 480 } |
429 | 481 |
430 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) { | 482 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) { |
431 return vqmovn_u32(vcvtq_u32_f32(src.fVec)); | 483 return vqmovn_u32(vcvtq_u32_f32(src.fVec)); |
432 } | 484 } |
433 | 485 |
434 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { | 486 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { |
435 return vcvtq_f32_u32(vmovl_u16(src.fVec)); | 487 return vcvtq_f32_u32(vmovl_u16(src.fVec)); |
436 } | 488 } |
437 | 489 |
438 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) { | 490 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) { |
(...skipping 22 matching lines...) Expand all Loading... |
461 } | 513 } |
462 | 514 |
463 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { | 515 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { |
464 return vget_low_u16(vmovl_u8(src.fVec)); | 516 return vget_low_u16(vmovl_u8(src.fVec)); |
465 } | 517 } |
466 | 518 |
467 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { | 519 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { |
468 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); | 520 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); |
469 } | 521 } |
470 | 522 |
471 template<> inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { | 523 template<> inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) { |
472 uint16x4_t _16 = vqmovun_s32(src.fVec); | 524 uint16x4_t _16 = vqmovun_s32(src.fVec); |
473 return vqmovn_u16(vcombine_u16(_16, _16)); | 525 return vqmovn_u16(vcombine_u16(_16, _16)); |
474 } | 526 } |
475 | 527 |
476 template<> inline Sk4i SkNx_cast<int, uint16_t>(const Sk4h& src) { | 528 template<> inline Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src) { |
477 return vreinterpretq_s32_u32(vmovl_u16(src.fVec)); | 529 return vreinterpretq_s32_u32(vmovl_u16(src.fVec)); |
478 } | 530 } |
479 | 531 |
480 template<> inline Sk4h SkNx_cast<uint16_t, int>(const Sk4i& src) { | 532 template<> inline Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) { |
481 return vmovn_u32(vreinterpretq_u32_s32(src.fVec)); | 533 return vmovn_u32(vreinterpretq_u32_s32(src.fVec)); |
482 } | 534 } |
483 | 535 |
484 static inline Sk4i Sk4f_round(const Sk4f& x) { | 536 static inline Sk4i Sk4f_round(const Sk4f& x) { |
485 return vcvtq_s32_f32((x + 0.5f).fVec); | 537 return vcvtq_s32_f32((x + 0.5f).fVec); |
486 } | 538 } |
487 | 539 |
488 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h*
a) { | 540 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h*
a) { |
489 uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr); | 541 uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr); |
490 *r = rgba.val[0]; | 542 *r = rgba.val[0]; |
491 *g = rgba.val[1]; | 543 *g = rgba.val[1]; |
492 *b = rgba.val[2]; | 544 *b = rgba.val[2]; |
493 *a = rgba.val[3]; | 545 *a = rgba.val[3]; |
494 } | 546 } |
495 | 547 |
496 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, | 548 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk
4h& b, |
497 const Sk4h& a) { | 549 const Sk4h& a) { |
498 uint16x4x4_t rgba = {{ | 550 uint16x4x4_t rgba = {{ |
499 r.fVec, | 551 r.fVec, |
500 g.fVec, | 552 g.fVec, |
501 b.fVec, | 553 b.fVec, |
502 a.fVec, | 554 a.fVec, |
503 }}; | 555 }}; |
504 vst4_u16((uint16_t*) dst, rgba); | 556 vst4_u16((uint16_t*) dst, rgba); |
505 } | 557 } |
506 | 558 |
507 #endif//SkNx_neon_DEFINED | 559 #endif//SkNx_neon_DEFINED |
OLD | NEW |