Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(50)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 2197683002: SkNx: add Sk4u (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
354 } 354 }
355 355
356 SkNx thenElse(const SkNx& t, const SkNx& e) const { 356 SkNx thenElse(const SkNx& t, const SkNx& e) const {
357 return vbslq_u8(fVec, t.fVec, e.fVec); 357 return vbslq_u8(fVec, t.fVec, e.fVec);
358 } 358 }
359 359
360 uint8x16_t fVec; 360 uint8x16_t fVec;
361 }; 361 };
362 362
363 template <> 363 template <>
364 class SkNx<4, int> { 364 class SkNx<4, int32_t> {
365 public: 365 public:
366 SkNx(const int32x4_t& vec) : fVec(vec) {} 366 SkNx(const int32x4_t& vec) : fVec(vec) {}
367 367
368 SkNx() {} 368 SkNx() {}
369 SkNx(int v) { 369 SkNx(int32_t v) {
370 fVec = vdupq_n_s32(v); 370 fVec = vdupq_n_s32(v);
371 } 371 }
372 SkNx(int a, int b, int c, int d) { 372 SkNx(int32_t a, int32_t b, int32_t c, int32_t d) {
373 fVec = (int32x4_t){a,b,c,d}; 373 fVec = (int32x4_t){a,b,c,d};
374 } 374 }
375 static SkNx Load(const void* ptr) { 375 static SkNx Load(const void* ptr) {
376 return vld1q_s32((const int32_t*)ptr); 376 return vld1q_s32((const int32_t*)ptr);
377 } 377 }
378 void store(void* ptr) const { 378 void store(void* ptr) const {
379 return vst1q_s32((int32_t*)ptr, fVec); 379 return vst1q_s32((int32_t*)ptr, fVec);
380 } 380 }
381 int operator[](int k) const { 381 int32_t operator[](int k) const {
382 SkASSERT(0 <= k && k < 4); 382 SkASSERT(0 <= k && k < 4);
383 union { int32x4_t v; int is[4]; } pun = {fVec}; 383 union { int32x4_t v; int32_t is[4]; } pun = {fVec};
384 return pun.is[k&3]; 384 return pun.is[k&3];
385 } 385 }
386 386
387 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); } 387 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
388 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); } 388 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
389 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); } 389 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }
390 390
391 SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); } 391 SkNx operator & (const SkNx& o) const { return vandq_s32(fVec, o.fVec); }
392 SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); } 392 SkNx operator | (const SkNx& o) const { return vorrq_s32(fVec, o.fVec); }
393 SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); } 393 SkNx operator ^ (const SkNx& o) const { return veorq_s32(fVec, o.fVec); }
(...skipping 14 matching lines...) Expand all
408 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); } 408 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_s32(a.fVec, b.f Vec); }
409 // TODO as needed 409 // TODO as needed
410 410
411 SkNx thenElse(const SkNx& t, const SkNx& e) const { 411 SkNx thenElse(const SkNx& t, const SkNx& e) const {
412 return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec); 412 return vbslq_s32(vreinterpretq_u32_s32(fVec), t.fVec, e.fVec);
413 } 413 }
414 414
415 int32x4_t fVec; 415 int32x4_t fVec;
416 }; 416 };
417 417
418 template <>
419 class SkNx<4, uint32_t> {
420 public:
421 SkNx(const uint32x4_t& vec) : fVec(vec) {}
422
423 SkNx() {}
424 SkNx(uint32_t v) {
425 fVec = vdupq_n_u32(v);
426 }
427 SkNx(uint32_t a, uint32_t b, uint32_t c, uint32_t d) {
428 fVec = (uint32x4_t){a,b,c,d};
429 }
430 static SkNx Load(const void* ptr) {
431 return vld1q_u32((const uint32_t*)ptr);
432 }
433 void store(void* ptr) const {
434 return vst1q_u32((uint32_t*)ptr, fVec);
435 }
436 uint32_t operator[](int k) const {
437 SkASSERT(0 <= k && k < 4);
438 union { uint32x4_t v; uint32_t us[4]; } pun = {fVec};
439 return pun.us[k&3];
440 }
441
442 SkNx operator + (const SkNx& o) const { return vaddq_u32(fVec, o.fVec); }
443 SkNx operator - (const SkNx& o) const { return vsubq_u32(fVec, o.fVec); }
444 SkNx operator * (const SkNx& o) const { return vmulq_u32(fVec, o.fVec); }
445
446 SkNx operator & (const SkNx& o) const { return vandq_u32(fVec, o.fVec); }
447 SkNx operator | (const SkNx& o) const { return vorrq_u32(fVec, o.fVec); }
448 SkNx operator ^ (const SkNx& o) const { return veorq_u32(fVec, o.fVec); }
449
450 SkNx operator << (int bits) const { SHIFT32(vshlq_n_u32, fVec, bits); }
451 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_u32, fVec, bits); }
452
453 SkNx operator == (const SkNx& o) const { return vceqq_u32(fVec, o.fVec); }
454 SkNx operator < (const SkNx& o) const { return vcltq_u32(fVec, o.fVec); }
455 SkNx operator > (const SkNx& o) const { return vcgtq_u32(fVec, o.fVec); }
456
457 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u32(a.fVec, b.f Vec); }
458 // TODO as needed
459
460 SkNx thenElse(const SkNx& t, const SkNx& e) const {
461 return vbslq_u32(fVec, t.fVec, e.fVec);
462 }
463
464 uint32x4_t fVec;
465 };
466
418 #undef SHIFT32 467 #undef SHIFT32
419 #undef SHIFT16 468 #undef SHIFT16
420 #undef SHIFT8 469 #undef SHIFT8
421 470
422 template<> inline Sk4i SkNx_cast<int, float>(const Sk4f& src) { 471 template<> inline Sk4i SkNx_cast<int32_t, float>(const Sk4f& src) {
423 return vcvtq_s32_f32(src.fVec); 472 return vcvtq_s32_f32(src.fVec);
424 473
425 } 474 }
426 template<> inline Sk4f SkNx_cast<float, int>(const Sk4i& src) { 475 template<> inline Sk4f SkNx_cast<float, int32_t>(const Sk4i& src) {
427 return vcvtq_f32_s32(src.fVec); 476 return vcvtq_f32_s32(src.fVec);
428 } 477 }
478 template<> inline Sk4f SkNx_cast<float, uint32_t>(const Sk4u& src) {
479 return SkNx_cast<float>(Sk4i::Load(&src));
480 }
429 481
430 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) { 482 template<> inline Sk4h SkNx_cast<uint16_t, float>(const Sk4f& src) {
431 return vqmovn_u32(vcvtq_u32_f32(src.fVec)); 483 return vqmovn_u32(vcvtq_u32_f32(src.fVec));
432 } 484 }
433 485
434 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { 486 template<> inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) {
435 return vcvtq_f32_u32(vmovl_u16(src.fVec)); 487 return vcvtq_f32_u32(vmovl_u16(src.fVec));
436 } 488 }
437 489
438 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) { 490 template<> inline Sk4b SkNx_cast<uint8_t, float>(const Sk4f& src) {
(...skipping 22 matching lines...) Expand all
461 } 513 }
462 514
463 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) { 515 template<> inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src) {
464 return vget_low_u16(vmovl_u8(src.fVec)); 516 return vget_low_u16(vmovl_u8(src.fVec));
465 } 517 }
466 518
467 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) { 519 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src) {
468 return vmovn_u16(vcombine_u16(src.fVec, src.fVec)); 520 return vmovn_u16(vcombine_u16(src.fVec, src.fVec));
469 } 521 }
470 522
471 template<> inline Sk4b SkNx_cast<uint8_t, int>(const Sk4i& src) { 523 template<> inline Sk4b SkNx_cast<uint8_t, int32_t>(const Sk4i& src) {
472 uint16x4_t _16 = vqmovun_s32(src.fVec); 524 uint16x4_t _16 = vqmovun_s32(src.fVec);
473 return vqmovn_u16(vcombine_u16(_16, _16)); 525 return vqmovn_u16(vcombine_u16(_16, _16));
474 } 526 }
475 527
476 template<> inline Sk4i SkNx_cast<int, uint16_t>(const Sk4h& src) { 528 template<> inline Sk4i SkNx_cast<int32_t, uint16_t>(const Sk4h& src) {
477 return vreinterpretq_s32_u32(vmovl_u16(src.fVec)); 529 return vreinterpretq_s32_u32(vmovl_u16(src.fVec));
478 } 530 }
479 531
480 template<> inline Sk4h SkNx_cast<uint16_t, int>(const Sk4i& src) { 532 template<> inline Sk4h SkNx_cast<uint16_t, int32_t>(const Sk4i& src) {
481 return vmovn_u32(vreinterpretq_u32_s32(src.fVec)); 533 return vmovn_u32(vreinterpretq_u32_s32(src.fVec));
482 } 534 }
483 535
484 static inline Sk4i Sk4f_round(const Sk4f& x) { 536 static inline Sk4i Sk4f_round(const Sk4f& x) {
485 return vcvtq_s32_f32((x + 0.5f).fVec); 537 return vcvtq_s32_f32((x + 0.5f).fVec);
486 } 538 }
487 539
488 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h* a) { 540 static inline void Sk4h_load4(const void* ptr, Sk4h* r, Sk4h* g, Sk4h* b, Sk4h* a) {
489 uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr); 541 uint16x4x4_t rgba = vld4_u16((const uint16_t*)ptr);
490 *r = rgba.val[0]; 542 *r = rgba.val[0];
491 *g = rgba.val[1]; 543 *g = rgba.val[1];
492 *b = rgba.val[2]; 544 *b = rgba.val[2];
493 *a = rgba.val[3]; 545 *a = rgba.val[3];
494 } 546 }
495 547
496 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b, 548 static inline void Sk4h_store4(void* dst, const Sk4h& r, const Sk4h& g, const Sk 4h& b,
497 const Sk4h& a) { 549 const Sk4h& a) {
498 uint16x4x4_t rgba = {{ 550 uint16x4x4_t rgba = {{
499 r.fVec, 551 r.fVec,
500 g.fVec, 552 g.fVec,
501 b.fVec, 553 b.fVec,
502 a.fVec, 554 a.fVec,
503 }}; 555 }};
504 vst4_u16((uint16_t*) dst, rgba); 556 vst4_u16((uint16_t*) dst, rgba);
505 } 557 }
506 558
507 #endif//SkNx_neon_DEFINED 559 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkRasterPipelineBlitter.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698