Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(266)

Side by Side Diff: src/core/SkXfermode.cpp

Issue 23644006: ARM Skia NEON patches - 28 - Xfermode: SIMD modeprocs (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2006 The Android Open Source Project 3 * Copyright 2006 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #include "SkXfermode.h" 10 #include "SkXfermode.h"
11 #include "SkColorPriv.h" 11 #include "SkColorPriv.h"
12 #include "SkFlattenableBuffers.h" 12 #include "SkFlattenableBuffers.h"
13 #include "SkMathPriv.h" 13 #include "SkMathPriv.h"
14 #include "SkString.h" 14 #include "SkString.h"
15 #include "SkXfermode_opts.h"
16 #include "SkUtilsArm.h"
15 17
16 SK_DEFINE_INST_COUNT(SkXfermode) 18 SK_DEFINE_INST_COUNT(SkXfermode)
17 19
18 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) 20 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b)
19 21
20 #if 0 22 #if 0
21 // idea for higher precision blends in xfer procs (and slightly faster) 23 // idea for higher precision blends in xfer procs (and slightly faster)
22 // see DstATop as a probable caller 24 // see DstATop as a probable caller
23 static U8CPU mulmuldiv255round(U8CPU a, U8CPU b, U8CPU c, U8CPU d) { 25 static U8CPU mulmuldiv255round(U8CPU a, U8CPU b, U8CPU c, U8CPU d) {
24 SkASSERT(a <= 255); 26 SkASSERT(a <= 255);
(...skipping 602 matching lines...) Expand 10 before | Expand all | Expand 10 after
627 int a = srcover_byte(sa, da); 629 int a = srcover_byte(sa, da);
628 int r = blendfunc_nonsep_byte(sr, dr, sa, da, Dr); 630 int r = blendfunc_nonsep_byte(sr, dr, sa, da, Dr);
629 int g = blendfunc_nonsep_byte(sg, dg, sa, da, Dg); 631 int g = blendfunc_nonsep_byte(sg, dg, sa, da, Dg);
630 int b = blendfunc_nonsep_byte(sb, db, sa, da, Db); 632 int b = blendfunc_nonsep_byte(sb, db, sa, da, Db);
631 return SkPackARGB32(a, r, g, b); 633 return SkPackARGB32(a, r, g, b);
632 } 634 }
633 635
634 636
635 struct ProcCoeff { 637 struct ProcCoeff {
636 SkXfermodeProc fProc; 638 SkXfermodeProc fProc;
639 SkXfermodeProcSIMD fProcSIMD;
djsollen 2013/10/03 16:38:29 instead of defining this at compile time why not h
637 SkXfermode::Coeff fSC; 640 SkXfermode::Coeff fSC;
638 SkXfermode::Coeff fDC; 641 SkXfermode::Coeff fDC;
639 }; 642 };
640 643
641 #define CANNOT_USE_COEFF SkXfermode::Coeff(-1) 644 #define CANNOT_USE_COEFF SkXfermode::Coeff(-1)
642 645
643 static const ProcCoeff gProcCoeffs[] = { 646 static const ProcCoeff gProcCoeffs[] = {
644 { clear_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kZero_Coeff }, 647 { clear_modeproc, SK_XMPSIMD_CLEAR, SkXfermode::kZero_Coeff, SkXf ermode::kZero_Coeff },
645 { src_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kZero_Coeff }, 648 { src_modeproc, SK_XMPSIMD_SRC, SkXfermode::kOne_Coeff, SkXf ermode::kZero_Coeff },
646 { dst_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kOne_Coeff }, 649 { dst_modeproc, SK_XMPSIMD_DST, SkXfermode::kZero_Coeff, SkXf ermode::kOne_Coeff },
647 { srcover_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISA_Coeff }, 650 { srcover_modeproc, SK_XMPSIMD_SRCOVER, SkXfermode::kOne_Coeff, SkXf ermode::kISA_Coeff },
648 { dstover_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kOne_Coeff }, 651 { dstover_modeproc, SK_XMPSIMD_DSTOVER, SkXfermode::kIDA_Coeff, SkXf ermode::kOne_Coeff },
649 { srcin_modeproc, SkXfermode::kDA_Coeff, SkXfermode::kZero_Coeff }, 652 { srcin_modeproc, SK_XMPSIMD_SRCIN, SkXfermode::kDA_Coeff, SkXf ermode::kZero_Coeff },
650 { dstin_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kSA_Coeff }, 653 { dstin_modeproc, SK_XMPSIMD_DSTIN, SkXfermode::kZero_Coeff, SkXf ermode::kSA_Coeff },
651 { srcout_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kZero_Coeff }, 654 { srcout_modeproc, SK_XMPSIMD_SRCOUT, SkXfermode::kIDA_Coeff, SkXf ermode::kZero_Coeff },
652 { dstout_modeproc, SkXfermode::kZero_Coeff, SkXfermode::kISA_Coeff }, 655 { dstout_modeproc, SK_XMPSIMD_DSTOUT, SkXfermode::kZero_Coeff, SkXf ermode::kISA_Coeff },
653 { srcatop_modeproc, SkXfermode::kDA_Coeff, SkXfermode::kISA_Coeff }, 656 { srcatop_modeproc, SK_XMPSIMD_SRCATOP, SkXfermode::kDA_Coeff, SkXf ermode::kISA_Coeff },
654 { dstatop_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kSA_Coeff }, 657 { dstatop_modeproc, SK_XMPSIMD_DSTATOP, SkXfermode::kIDA_Coeff, SkXf ermode::kSA_Coeff },
655 { xor_modeproc, SkXfermode::kIDA_Coeff, SkXfermode::kISA_Coeff }, 658 { xor_modeproc, SK_XMPSIMD_XOR, SkXfermode::kIDA_Coeff, SkXf ermode::kISA_Coeff },
656 659
657 { plus_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kOne_Coeff }, 660 { plus_modeproc, SK_XMPSIMD_PLUS, SkXfermode::kOne_Coeff, SkXf ermode::kOne_Coeff },
658 { modulate_modeproc,SkXfermode::kZero_Coeff, SkXfermode::kSC_Coeff }, 661 { modulate_modeproc, SK_XMPSIMD_MODULATE, SkXfermode::kZero_Coeff, SkXf ermode::kSC_Coeff },
659 { screen_modeproc, SkXfermode::kOne_Coeff, SkXfermode::kISC_Coeff }, 662 { screen_modeproc, SK_XMPSIMD_SCREEN, SkXfermode::kOne_Coeff, SkXf ermode::kISC_Coeff },
660 { overlay_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 663 { overlay_modeproc, SK_XMPSIMD_OVERLAY, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
661 { darken_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 664 { darken_modeproc, SK_XMPSIMD_DARKEN, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
662 { lighten_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 665 { lighten_modeproc, SK_XMPSIMD_LIGHTEN, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
663 { colordodge_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 666 { colordodge_modeproc,SK_XMPSIMD_COLORDODGE,CANNOT_USE_COEFF, CANN OT_USE_COEFF },
664 { colorburn_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 667 { colorburn_modeproc, SK_XMPSIMD_COLORBURN, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
665 { hardlight_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 668 { hardlight_modeproc, SK_XMPSIMD_HARDLIGHT, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
666 { softlight_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 669 { softlight_modeproc, SK_XMPSIMD_SOFTLIGHT, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
667 { difference_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 670 { difference_modeproc,SK_XMPSIMD_DIFFERENCE,CANNOT_USE_COEFF, CANN OT_USE_COEFF },
668 { exclusion_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 671 { exclusion_modeproc, SK_XMPSIMD_EXCLUSION, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
669 { multiply_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 672 { multiply_modeproc, SK_XMPSIMD_MULTIPLY, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
670 { hue_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 673 { hue_modeproc, NULL, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
671 { saturation_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 674 { saturation_modeproc,NULL, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
672 { color_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 675 { color_modeproc, NULL, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
673 { luminosity_modeproc, CANNOT_USE_COEFF, CANNOT_USE_COEFF }, 676 { luminosity_modeproc,NULL, CANNOT_USE_COEFF, CANN OT_USE_COEFF },
674 }; 677 };
675 678
676 /////////////////////////////////////////////////////////////////////////////// 679 ///////////////////////////////////////////////////////////////////////////////
677 680
678 bool SkXfermode::asCoeff(Coeff* src, Coeff* dst) const { 681 bool SkXfermode::asCoeff(Coeff* src, Coeff* dst) const {
679 return false; 682 return false;
680 } 683 }
681 684
682 bool SkXfermode::asMode(Mode* mode) const { 685 bool SkXfermode::asMode(Mode* mode) const {
683 return false; 686 return false;
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
784 787
785 void SkProcXfermode::xfer32(SkPMColor* SK_RESTRICT dst, 788 void SkProcXfermode::xfer32(SkPMColor* SK_RESTRICT dst,
786 const SkPMColor* SK_RESTRICT src, int count, 789 const SkPMColor* SK_RESTRICT src, int count,
787 const SkAlpha* SK_RESTRICT aa) const { 790 const SkAlpha* SK_RESTRICT aa) const {
788 SkASSERT(dst && src && count >= 0); 791 SkASSERT(dst && src && count >= 0);
789 792
790 SkXfermodeProc proc = fProc; 793 SkXfermodeProc proc = fProc;
791 794
792 if (NULL != proc) { 795 if (NULL != proc) {
793 if (NULL == aa) { 796 if (NULL == aa) {
794 for (int i = count - 1; i >= 0; --i) { 797 #if SK_ARM_NEON_IS_ALWAYS
djsollen 2013/10/03 16:38:29 if you do the above approach this code could be pu
reed1 2013/10/04 13:39:19 Looks like we could also return a different subcla
795 dst[i] = proc(src[i], dst[i]); 798 SkXfermodeProcSIMD procSIMD = fProcSIMD;
799 if (procSIMD != NULL) {
800 // Unrolled NEON code
801 while (count >= 8) {
802 uint8x8x4_t vsrc, vdst, vres;
803
804 asm volatile (
805 "vld4.u8 %h[vsrc], [%[src]]! \t\n"
806 "vld4.u8 %h[vdst], [%[dst]] \t\n"
807 : [vsrc] "=w" (vsrc), [vdst] "=w" (vdst)
808 : [src] "r" (src), [dst] "r" (dst)
809 :
810 );
811
812 vres = procSIMD(vsrc, vdst);
813
814 vst4_u8((uint8_t*)dst, vres);
815
816 count -= 8;
817 dst += 8;
818 }
819 // Leftovers
820 for (int i = 0; i < count; i++) {
821 dst[i] = proc(src[i], dst[i]);
822 }
823 } else
824 #endif
825 {
826 for (int i = count - 1; i >= 0; --i) {
827 dst[i] = proc(src[i], dst[i]);
828 }
796 } 829 }
797 } else { 830 } else {
798 for (int i = count - 1; i >= 0; --i) { 831 for (int i = count - 1; i >= 0; --i) {
799 unsigned a = aa[i]; 832 unsigned a = aa[i];
800 if (0 != a) { 833 if (0 != a) {
801 SkPMColor dstC = dst[i]; 834 SkPMColor dstC = dst[i];
802 SkPMColor C = proc(src[i], dstC); 835 SkPMColor C = proc(src[i], dstC);
803 if (a != 0xFF) { 836 if (a != 0xFF) {
804 C = SkFourByteInterp(C, dstC, a); 837 C = SkFourByteInterp(C, dstC, a);
805 } 838 }
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
865 dst[i] = SkToU8(A); 898 dst[i] = SkToU8(A);
866 } 899 }
867 } 900 }
868 } 901 }
869 } 902 }
870 } 903 }
871 904
872 SkProcXfermode::SkProcXfermode(SkFlattenableReadBuffer& buffer) 905 SkProcXfermode::SkProcXfermode(SkFlattenableReadBuffer& buffer)
873 : SkXfermode(buffer) { 906 : SkXfermode(buffer) {
874 fProc = NULL; 907 fProc = NULL;
908 fProcSIMD = NULL;
875 if (!buffer.isCrossProcess()) { 909 if (!buffer.isCrossProcess()) {
876 fProc = (SkXfermodeProc)buffer.readFunctionPtr(); 910 fProc = (SkXfermodeProc)buffer.readFunctionPtr();
911 fProcSIMD = (SkXfermodeProcSIMD)buffer.readFunctionPtr();
877 } 912 }
878 } 913 }
879 914
880 void SkProcXfermode::flatten(SkFlattenableWriteBuffer& buffer) const { 915 void SkProcXfermode::flatten(SkFlattenableWriteBuffer& buffer) const {
881 this->INHERITED::flatten(buffer); 916 this->INHERITED::flatten(buffer);
882 if (!buffer.isCrossProcess()) { 917 if (!buffer.isCrossProcess()) {
883 buffer.writeFunctionPtr((void*)fProc); 918 buffer.writeFunctionPtr((void*)fProc);
919 buffer.writeFunctionPtr((void*)fProcSIMD);
884 } 920 }
885 } 921 }
886 922
887 #ifdef SK_DEVELOPER 923 #ifdef SK_DEVELOPER
888 void SkProcXfermode::toString(SkString* str) const { 924 void SkProcXfermode::toString(SkString* str) const {
889 str->appendf("SkProcXfermode: %p", fProc); 925 str->appendf("SkProcXfermode: fProc = %p, fProcSIMD = %p", fProc, fProcSIMD) ;
890 } 926 }
891 #endif 927 #endif
892 928
893 ////////////////////////////////////////////////////////////////////////////// 929 //////////////////////////////////////////////////////////////////////////////
894 930
895 #if SK_SUPPORT_GPU 931 #if SK_SUPPORT_GPU
896 932
897 #include "GrEffect.h" 933 #include "GrEffect.h"
898 #include "GrEffectUnitTest.h" 934 #include "GrEffectUnitTest.h"
899 #include "GrTBackendEffectFactory.h" 935 #include "GrTBackendEffectFactory.h"
(...skipping 470 matching lines...) Expand 10 before | Expand all | Expand 10 after
1370 } 1406 }
1371 1407
1372 #endif 1408 #endif
1373 1409
1374 /////////////////////////////////////////////////////////////////////////////// 1410 ///////////////////////////////////////////////////////////////////////////////
1375 /////////////////////////////////////////////////////////////////////////////// 1411 ///////////////////////////////////////////////////////////////////////////////
1376 1412
1377 class SkProcCoeffXfermode : public SkProcXfermode { 1413 class SkProcCoeffXfermode : public SkProcXfermode {
1378 public: 1414 public:
1379 SkProcCoeffXfermode(const ProcCoeff& rec, Mode mode) 1415 SkProcCoeffXfermode(const ProcCoeff& rec, Mode mode)
1380 : INHERITED(rec.fProc) { 1416 : INHERITED(rec.fProc, rec.fProcSIMD) {
1381 fMode = mode; 1417 fMode = mode;
1382 // these may be valid, or may be CANNOT_USE_COEFF 1418 // these may be valid, or may be CANNOT_USE_COEFF
1383 fSrcCoeff = rec.fSC; 1419 fSrcCoeff = rec.fSC;
1384 fDstCoeff = rec.fDC; 1420 fDstCoeff = rec.fDC;
1385 } 1421 }
1386 1422
1387 virtual bool asMode(Mode* mode) const SK_OVERRIDE { 1423 virtual bool asMode(Mode* mode) const SK_OVERRIDE {
1388 if (mode) { 1424 if (mode) {
1389 *mode = fMode; 1425 *mode = fMode;
1390 } 1426 }
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1430 1466
1431 protected: 1467 protected:
1432 SkProcCoeffXfermode(SkFlattenableReadBuffer& buffer) : INHERITED(buffer) { 1468 SkProcCoeffXfermode(SkFlattenableReadBuffer& buffer) : INHERITED(buffer) {
1433 fMode = (SkXfermode::Mode)buffer.read32(); 1469 fMode = (SkXfermode::Mode)buffer.read32();
1434 1470
1435 const ProcCoeff& rec = gProcCoeffs[fMode]; 1471 const ProcCoeff& rec = gProcCoeffs[fMode];
1436 // these may be valid, or may be CANNOT_USE_COEFF 1472 // these may be valid, or may be CANNOT_USE_COEFF
1437 fSrcCoeff = rec.fSC; 1473 fSrcCoeff = rec.fSC;
1438 fDstCoeff = rec.fDC; 1474 fDstCoeff = rec.fDC;
1439 // now update our function-ptr in the super class 1475 // now update our function-ptr in the super class
1440 this->INHERITED::setProc(rec.fProc); 1476 this->INHERITED::setProcs(rec.fProc, rec.fProcSIMD);
1441 } 1477 }
1442 1478
1443 virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE { 1479 virtual void flatten(SkFlattenableWriteBuffer& buffer) const SK_OVERRIDE {
1444 this->INHERITED::flatten(buffer); 1480 this->INHERITED::flatten(buffer);
1445 buffer.write32(fMode); 1481 buffer.write32(fMode);
1446 } 1482 }
1447 1483
1448 private: 1484 private:
1449 Mode fMode; 1485 Mode fMode;
1450 Coeff fSrcCoeff, fDstCoeff; 1486 Coeff fSrcCoeff, fDstCoeff;
(...skipping 512 matching lines...) Expand 10 before | Expand all | Expand 10 after
1963 return proc16; 1999 return proc16;
1964 } 2000 }
1965 2001
1966 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode) 2002 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_START(SkXfermode)
1967 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode) 2003 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkProcCoeffXfermode)
1968 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkClearXfermode) 2004 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkClearXfermode)
1969 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkSrcXfermode) 2005 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkSrcXfermode)
1970 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkDstInXfermode) 2006 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkDstInXfermode)
1971 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkDstOutXfermode) 2007 SK_DEFINE_FLATTENABLE_REGISTRAR_ENTRY(SkDstOutXfermode)
1972 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END 2008 SK_DEFINE_FLATTENABLE_REGISTRAR_GROUP_END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698