Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(48)

Side by Side Diff: src/utils/SkTextureCompressor.cpp

Issue 406693002: First pass at a blitter for R11 EAC alpha masks. This shaves 10ms off (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkTextureCompressor.h" 8 #include "SkTextureCompressor.h"
9 9
10 #include "SkBitmap.h" 10 #include "SkBitmap.h"
(...skipping 714 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Compress it 725 // Compress it
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); 726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes);
727 ++encPtr; 727 ++encPtr;
728 } 728 }
729 src += 4 * rowBytes; 729 src += 4 * rowBytes;
730 } 730 }
731 return true; 731 return true;
732 } 732 }
733 #endif // COMPRESS_R11_EAC_FASTEST 733 #endif // COMPRESS_R11_EAC_FASTEST
734 734
735 // The R11 EAC format expects that indices are given in column-major order. Sinc e
736 // we receive alpha values in raster order, this usually means that we have to u se
737 // pack6 above to properly pack our indices. However, if our indices come from t he
738 // blitter, then each integer will be a column of indices, and hence can be effi ciently
739 // packed. This function takes the bottom three bits of each byte and places the m in
740 // the least significant 12 bits of the resulting integer.
741 static inline uint32_t pack_indices_vertical(uint32_t x) {
742 #if defined (SK_CPU_BENDIAN)
743 return
744 (x & 7) |
745 ((x >> 5) & (7 << 3)) |
746 ((x >> 10) & (7 << 6)) |
747 ((x >> 15) & (7 << 9));
748 #else
749 return
750 ((x >> 24) & 7) |
751 ((x >> 13) & (7 << 3)) |
752 ((x >> 2) & (7 << 6)) |
753 ((x << 9) & (7 << 9));
754 #endif
755 }
756
757 // This function returns the compressed format of a block given as four columns of
758 // alpha values. Each column is assumed to be loaded from top to bottom, and hen ce
759 // must first be converted to indices and then packed into the resulting 64-bit
760 // integer.
761 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0,
762 const uint32_t alphaColumn1,
763 const uint32_t alphaColumn2,
764 const uint32_t alphaColumn3) {
765
766 if (alphaColumn0 == alphaColumn1 &&
767 alphaColumn2 == alphaColumn3 &&
768 alphaColumn0 == alphaColumn2) {
769
770 if (0 == alphaColumn0) {
771 // Transparent
772 return 0x0020000000002000ULL;
773 }
774 else if (0xFFFFFFFF == alphaColumn0) {
775 // Opaque
776 return 0xFFFFFFFFFFFFFFFFULL;
777 }
778 }
779
780 const uint32_t indexColumn0 = convert_indices(alphaColumn0);
781 const uint32_t indexColumn1 = convert_indices(alphaColumn1);
782 const uint32_t indexColumn2 = convert_indices(alphaColumn2);
783 const uint32_t indexColumn3 = convert_indices(alphaColumn3);
784
785 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0);
786 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1);
787 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2);
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);
789
790 return SkEndian_SwapBE64(0x8490000000000000ULL |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) |
794 static_cast<uint64_t>(packedIndexColumn3));
795
796 }
797
735 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, 798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,
736 int width, int height, int rowBytes) { 799 int width, int height, int rowBytes) {
737 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) 800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
738 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block); 801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
739 #elif COMPRESS_R11_EAC_FASTEST 802 #elif COMPRESS_R11_EAC_FASTEST
740 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); 803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
741 #else 804 #else
742 #error "Must choose R11 EAC algorithm" 805 #error "Must choose R11 EAC algorithm"
743 #endif 806 #endif
744 } 807 }
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
813 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); 876 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));
814 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), 877 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),
815 bitmap.rowBytes(), format)) { 878 bitmap.rowBytes(), format)) {
816 return SkData::NewFromMalloc(dst, compressedDataSize); 879 return SkData::NewFromMalloc(dst, compressedDataSize);
817 } 880 }
818 881
819 sk_free(dst); 882 sk_free(dst);
820 return NULL; 883 return NULL;
821 } 884 }
822 885
886 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)
robertphillips 2014/07/21 14:28:06 Where is 0x7FFE coming from?
krajcevski 2014/07/21 17:35:26 Done.
887 : kLongestRun(0x7FFE), kZeroAlpha(0)
888 , fNextRun(0)
889 , fWidth(width)
890 , fHeight(height)
891 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))
892 {
893 SkASSERT((width % kR11_EACBlockSz) == 0);
894 SkASSERT((height % kR11_EACBlockSz) == 0);
895 }
896
897 void R11_EACBlitter::blitAntiH(int x, int y,
898 const SkAlpha* antialias,
899 const int16_t* runs) SK_OVERRIDE {
900 if (fNextRun > 0 &&
901 ((x != fBufferedRuns[fNextRun-1].fX) ||
902 (y-1 != fBufferedRuns[fNextRun-1].fY))) {
robertphillips 2014/07/21 14:28:06 // comment ?
krajcevski 2014/07/21 17:35:26 Done.
903 this->flushRuns();
904 }
905
906 const int row = y & ~3;
907 while ((row + fNextRun) < y) {
robertphillips 2014/07/21 14:28:06 // comment?
krajcevski 2014/07/21 17:35:26 Done.
908 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha;
909 fBufferedRuns[fNextRun].fRuns = &kLongestRun;
910 fBufferedRuns[fNextRun].fX = 0;
911 fBufferedRuns[fNextRun].fY = row + fNextRun;
912 ++fNextRun;
913 }
914
915 SkASSERT(fNextRun == (y & 3));
916 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y);
917
918 fBufferedRuns[fNextRun].fAlphas = antialias;
919 fBufferedRuns[fNextRun].fRuns = runs;
920 fBufferedRuns[fNextRun].fX = x;
921 fBufferedRuns[fNextRun].fY = y;
922
923 if (4 == ++fNextRun) {
924 this->flushRuns();
925 }
926 }
927
928 void R11_EACBlitter::flushRuns() {
929
930 if (0 == fNextRun) {
931 return;
932 }
933
robertphillips 2014/07/21 14:28:06 left justify # lines ?
krajcevski 2014/07/21 17:35:26 Done.
934 #ifndef NDEBUG
935 for (int i = 1; i < fNextRun; ++i) {
936 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1);
937 }
938 #endif
939
940 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) {
941 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i;
942 fBufferedRuns[i].fX = fBufferedRuns[0].fX;
943 fBufferedRuns[i].fAlphas = &kZeroAlpha;
944 fBufferedRuns[i].fRuns = &kLongestRun;
945 }
946
947 SkASSERT(fNextRun > 0 && fNextRun <= 4);
948 SkASSERT((fBufferedRuns[0].fY & 3) == 0);
949
950 // Setup
951 uint32_t blockCol1 = 0;
952 uint32_t blockCol2 = 0;
953 uint32_t blockCol3 = 0;
954 uint32_t blockCol4 = 0;
955
956 uint32_t curAlphai = 0;
957 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphai);
958
959 int nextX[kR11_EACBlockSz];
960 for (int i = 0; i < kR11_EACBlockSz; ++i) {
961 nextX[i] = 0x7FFFFF;
962 }
963
964 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);
965
966 // Populate the first set of runs and figure out how far we need to
967 // advance on the first leg
968 int curX = 0;
969 int finalX = 0xFFFFF;
970 for (int i = 0; i < kR11_EACBlockSz; ++i) {
971 nextX[i] = *(fBufferedRuns[i].fRuns);
972 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
973
974 finalX = SkMin32(nextX[i], finalX);
975 }
976
robertphillips 2014/07/21 14:28:06 Would making these two inlined functions kill perf
krajcevski 2014/07/21 17:35:26 Done.
977 #define UPDATE_BLOCK do { \
978 SkASSERT(col + colsLeft <= 4); \
979 for (int i = col; i < (col + colsLeft); ++i) { \
980 switch(i) { \
981 case 0: \
982 blockCol1 = curAlphai; \
983 break; \
984 case 1: \
985 blockCol2 = curAlphai; \
986 break; \
987 case 2: \
988 blockCol3 = curAlphai; \
989 break; \
990 case 3: \
991 blockCol4 = curAlphai; \
992 break; \
993 } \
994 } \
995 } while(0)
996
997 #define COMPRESS_BLOCK \
998 compress_block_vertical(blockCol1, blockCol2, blockCol3, blockCol4)
999
1000 // Run the blitter...
1001 while (curX != finalX) {
1002 SkASSERT(finalX >= curX);
1003
1004 // Do we need to populate the rest of the block?
1005 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {
1006 const int col = curX & 3;
1007 const int colsLeft = 4 - col;
1008 SkASSERT(curX + colsLeft <= finalX);
1009
1010 UPDATE_BLOCK;
1011
1012 // Write this block
1013 *outPtr = COMPRESS_BLOCK;
1014 ++outPtr;
1015 curX += colsLeft;
1016 }
1017
1018 // If we can advance even further, then memset the block and do
1019 // your thing...
1020 if ((finalX - curX) >= kR11_EACBlockSz) {
1021 SkASSERT((curX & 3) == 0);
1022
1023 const int col = 0;
1024 const int colsLeft = kR11_EACBlockSz;
1025
1026 UPDATE_BLOCK;
1027
1028 // While we can keep advancing, just keep writing the block.
1029 uint64_t lastBlock = COMPRESS_BLOCK;
1030 while((finalX - curX) >= kR11_EACBlockSz) {
1031 *outPtr = lastBlock;
1032 ++outPtr;
1033 curX += kR11_EACBlockSz;
1034 }
1035 }
1036
1037 // If we haven't advanced within the block then do so.
1038 if (curX < finalX) {
1039 const int col = curX & 3;
1040 const int colsLeft = finalX - curX;
1041
1042 UPDATE_BLOCK;
1043
1044 curX += colsLeft;
1045 }
1046
1047 SkASSERT(curX == finalX);
1048
1049 // Figure out what the next advancement is...
1050 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1051 if (nextX[i] == finalX) {
1052 const int16_t run = *(fBufferedRuns[i].fRuns);
1053 fBufferedRuns[i].fRuns += run;
1054 fBufferedRuns[i].fAlphas += run;
1055 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1056 nextX[i] += *(fBufferedRuns[i].fRuns);
1057 }
1058 }
1059
1060 finalX = 0xFFFFF;
1061 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1062 finalX = SkMin32(nextX[i], finalX);
1063 }
1064 }
1065
1066 // If we didn't land on a block boundary, output the block...
1067 if ((curX & 3) > 1) {
1068 *outPtr = COMPRESS_BLOCK;
1069 }
1070
1071 #undef COMPRESS_BLOCK
1072 #undef UPDATE_BLOCK
1073
1074 fNextRun = 0;
1075 }
1076
823 } // namespace SkTextureCompressor 1077 } // namespace SkTextureCompressor
OLDNEW
« src/utils/SkTextureCompressor.h ('K') | « src/utils/SkTextureCompressor.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698