Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: src/utils/SkTextureCompressor.cpp

Issue 406693002: First pass at a blitter for R11 EAC alpha masks. This shaves 10ms off (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Actually check in buildable code Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkTextureCompressor.h" 8 #include "SkTextureCompressor.h"
9 9
10 #include "SkBitmap.h" 10 #include "SkBitmap.h"
(...skipping 714 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Compress it 725 // Compress it
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); 726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes);
727 ++encPtr; 727 ++encPtr;
728 } 728 }
729 src += 4 * rowBytes; 729 src += 4 * rowBytes;
730 } 730 }
731 return true; 731 return true;
732 } 732 }
733 #endif // COMPRESS_R11_EAC_FASTEST 733 #endif // COMPRESS_R11_EAC_FASTEST
734 734
735 // The R11 EAC format expects that indices are given in column-major order. Sinc e
736 // we receive alpha values in raster order, this usually means that we have to u se
737 // pack6 above to properly pack our indices. However, if our indices come from t he
738 // blitter, then each integer will be a column of indices, and hence can be effi ciently
739 // packed. This function takes the bottom three bits of each byte and places the m in
740 // the least significant 12 bits of the resulting integer.
741 static inline uint32_t pack_indices_vertical(uint32_t x) {
742 #if defined (SK_CPU_BENDIAN)
743 return
744 (x & 7) |
745 ((x >> 5) & (7 << 3)) |
746 ((x >> 10) & (7 << 6)) |
747 ((x >> 15) & (7 << 9));
748 #else
749 return
750 ((x >> 24) & 7) |
751 ((x >> 13) & (7 << 3)) |
752 ((x >> 2) & (7 << 6)) |
753 ((x << 9) & (7 << 9));
754 #endif
755 }
756
757 // This function returns the compressed format of a block given as four columns of
758 // alpha values. Each column is assumed to be loaded from top to bottom, and hen ce
759 // must first be converted to indices and then packed into the resulting 64-bit
760 // integer.
761 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0,
762 const uint32_t alphaColumn1,
763 const uint32_t alphaColumn2,
764 const uint32_t alphaColumn3) {
765
766 if (alphaColumn0 == alphaColumn1 &&
767 alphaColumn2 == alphaColumn3 &&
768 alphaColumn0 == alphaColumn2) {
769
770 if (0 == alphaColumn0) {
771 // Transparent
772 return 0x0020000000002000ULL;
773 }
774 else if (0xFFFFFFFF == alphaColumn0) {
775 // Opaque
776 return 0xFFFFFFFFFFFFFFFFULL;
777 }
778 }
779
780 const uint32_t indexColumn0 = convert_indices(alphaColumn0);
781 const uint32_t indexColumn1 = convert_indices(alphaColumn1);
782 const uint32_t indexColumn2 = convert_indices(alphaColumn2);
783 const uint32_t indexColumn3 = convert_indices(alphaColumn3);
784
785 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0);
786 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1);
787 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2);
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);
789
790 return SkEndian_SwapBE64(0x8490000000000000ULL |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) |
794 static_cast<uint64_t>(packedIndexColumn3));
795
796 }
797
735 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, 798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,
736 int width, int height, int rowBytes) { 799 int width, int height, int rowBytes) {
737 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) 800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
738 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block); 801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
739 #elif COMPRESS_R11_EAC_FASTEST 802 #elif COMPRESS_R11_EAC_FASTEST
740 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); 803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
741 #else 804 #else
742 #error "Must choose R11 EAC algorithm" 805 #error "Must choose R11 EAC algorithm"
743 #endif 806 #endif
744 } 807 }
745 808
robertphillips 2014/07/21 18:08:25 // comment ?
krajcevski 2014/07/21 18:24:20 Done.
809 static inline void update_block_columns(
810 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4,
811 const uint32_t curAlphai, const int col, const int colsLeft) {
812 SkASSERT(NULL != blockCol1);
813 SkASSERT(NULL != blockCol2);
814 SkASSERT(NULL != blockCol3);
815 SkASSERT(NULL != blockCol4);
816 SkASSERT(col + colsLeft <= 4);
817 for (int i = col; i < (col + colsLeft); ++i) {
818 switch(i) {
819 case 0:
820 *blockCol1 = curAlphai;
821 break;
822 case 1:
823 *blockCol2 = curAlphai;
824 break;
825 case 2:
826 *blockCol3 = curAlphai;
827 break;
828 case 3:
829 *blockCol4 = curAlphai;
830 break;
831 }
832 }
833 }
834
746 //////////////////////////////////////////////////////////////////////////////// 835 ////////////////////////////////////////////////////////////////////////////////
747 836
748 namespace SkTextureCompressor { 837 namespace SkTextureCompressor {
749 838
750 static inline size_t get_compressed_data_size(Format fmt, int width, int height) { 839 static inline size_t get_compressed_data_size(Format fmt, int width, int height) {
751 switch (fmt) { 840 switch (fmt) {
752 // These formats are 64 bits per 4x4 block. 841 // These formats are 64 bits per 4x4 block.
753 case kR11_EAC_Format: 842 case kR11_EAC_Format:
754 case kLATC_Format: 843 case kLATC_Format:
755 { 844 {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
813 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); 902 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));
814 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), 903 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),
815 bitmap.rowBytes(), format)) { 904 bitmap.rowBytes(), format)) {
816 return SkData::NewFromMalloc(dst, compressedDataSize); 905 return SkData::NewFromMalloc(dst, compressedDataSize);
817 } 906 }
818 907
819 sk_free(dst); 908 sk_free(dst);
820 return NULL; 909 return NULL;
821 } 910 }
822 911
912 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)
913 // 0x7FFE is one minus the largest positive 16-bit int. We use it for
914 // debugging to make sure that we're properly setting the nextX distance
915 // in flushRuns().
916 : kLongestRun(0x7FFE), kZeroAlpha(0)
917 , fNextRun(0)
918 , fWidth(width)
919 , fHeight(height)
920 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))
921 {
922 SkASSERT((width % kR11_EACBlockSz) == 0);
923 SkASSERT((height % kR11_EACBlockSz) == 0);
924 }
925
926 void R11_EACBlitter::blitAntiH(int x, int y,
927 const SkAlpha* antialias,
928 const int16_t* runs) SK_OVERRIDE {
929 // Make sure that the new row to blit is either the first
930 // row that we're blitting, or it's exactly the next scan row
931 // since the last row that we blit. This is to ensure that when
932 // we go to flush the runs, that they are all the same four
933 // runs.
934 if (fNextRun > 0 &&
935 ((x != fBufferedRuns[fNextRun-1].fX) ||
936 (y-1 != fBufferedRuns[fNextRun-1].fY))) {
937 this->flushRuns();
938 }
939
940 // Align the rows to a block boundary. If we receive rows that
941 // are not on a block boundary, then fill in the preceding runs
942 // with zeros. We do this by producing a single RLE that says
robertphillips 2014/07/21 18:08:25 that we _have_ ?
krajcevski 2014/07/21 18:24:20 Done.
943 // that we 0x7FFE pixels of zero (0x7FFE = 32766).
944 const int row = y & ~3;
945 while ((row + fNextRun) < y) {
946 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha;
947 fBufferedRuns[fNextRun].fRuns = &kLongestRun;
948 fBufferedRuns[fNextRun].fX = 0;
949 fBufferedRuns[fNextRun].fY = row + fNextRun;
950 ++fNextRun;
951 }
952
953 // Make sure that our assumptions aren't violated...
954 SkASSERT(fNextRun == (y & 3));
955 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y);
956
957 // Set the values of the next run
958 fBufferedRuns[fNextRun].fAlphas = antialias;
959 fBufferedRuns[fNextRun].fRuns = runs;
960 fBufferedRuns[fNextRun].fX = x;
961 fBufferedRuns[fNextRun].fY = y;
962
963 // If we've output four scanlines in a row that don't violate our
964 // assumptions, then it's time to flush them...
965 if (4 == ++fNextRun) {
966 this->flushRuns();
967 }
968 }
969
970 void R11_EACBlitter::flushRuns() {
971
972 // If we don't have any runs, then just return.
973 if (0 == fNextRun) {
974 return;
975 }
976
977 #ifndef NDEBUG
978 // Make sure that if we have any runs, they all match
979 for (int i = 1; i < fNextRun; ++i) {
980 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1);
981 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX);
982 }
983 #endif
984
985 // If we dont have as many runs as we have rows, fill in the remaining
986 // runs with constant zeros.
987 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) {
988 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i;
989 fBufferedRuns[i].fX = fBufferedRuns[0].fX;
990 fBufferedRuns[i].fAlphas = &kZeroAlpha;
991 fBufferedRuns[i].fRuns = &kLongestRun;
992 }
993
994 // Make sure that our assumptions aren't violated.
995 SkASSERT(fNextRun > 0 && fNextRun <= 4);
996 SkASSERT((fBufferedRuns[0].fY & 3) == 0);
997
998 // The following logic walks four rows at a time and outputs compressed
999 // blocks to the buffer passed into the constructor.
1000 // We do the following:
1001 //
1002 // c1 c2 c3 c4
1003 // -----------------------------------------------------------------------
1004 // ... | | | | | ----> fBufferedRuns[0]
1005 // -----------------------------------------------------------------------
1006 // ... | | | | | ----> fBufferedRuns[1]
1007 // -----------------------------------------------------------------------
1008 // ... | | | | | ----> fBufferedRuns[2]
1009 // -----------------------------------------------------------------------
1010 // ... | | | | | ----> fBufferedRuns[3]
1011 // -----------------------------------------------------------------------
1012 //
1013 // curX -- the macro X value that we've gotten to.
1014 // c1, c2, c3, c4 -- the integers that represent the columns of the current block
1015 // that we're operating on
robertphillips 2014/07/21 18:08:25 curAlphai -> curAlphaColumn ?
krajcevski 2014/07/21 18:24:20 Done.
1016 // curAlphai -- integer containing the column of alpha values from fBuffered Runs.
1017 // nextX -- the next point at which we need to update curAlphai after the va lue of curX.
1018 // finalX -- the minimum of all the nextX values.
1019 //
1020 // curX advances to finalX outputting any blocks that it passes along
1021 // the way. Since finalX will not change when we reach the end of a
1022 // run, the termination criteria will be whenever curX == finalX at the
1023 // end of a loop.
1024
1025 // Setup:
1026 uint32_t c1 = 0;
1027 uint32_t c2 = 0;
1028 uint32_t c3 = 0;
1029 uint32_t c4 = 0;
1030
1031 uint32_t curAlphai = 0;
1032 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphai);
1033
1034 int nextX[kR11_EACBlockSz];
1035 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1036 nextX[i] = 0x7FFFFF;
1037 }
1038
1039 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);
1040
1041 // Populate the first set of runs and figure out how far we need to
1042 // advance on the first step
1043 int curX = 0;
1044 int finalX = 0xFFFFF;
1045 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1046 nextX[i] = *(fBufferedRuns[i].fRuns);
1047 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1048
1049 finalX = SkMin32(nextX[i], finalX);
1050 }
1051
1052 // Make sure that we have a valid right-bound X value
1053 SkASSERT(finalX < 0xFFFFF);
1054
1055 // Run the blitter...
1056 while (curX != finalX) {
1057 SkASSERT(finalX >= curX);
1058
1059 // Do we need to populate the rest of the block?
1060 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {
1061 const int col = curX & 3;
1062 const int colsLeft = 4 - col;
1063 SkASSERT(curX + colsLeft <= finalX);
1064
1065 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphai);
1066
1067 // Write this block
1068 *outPtr = compress_block_vertical(c1, c2, c3, c4);
1069 ++outPtr;
1070 curX += colsLeft;
1071 }
1072
1073 // If we can advance even further, then just keep memsetting the block
1074 if ((finalX - curX) >= kR11_EACBlockSz) {
1075 SkASSERT((curX & 3) == 0);
1076
1077 const int col = 0;
1078 const int colsLeft = kR11_EACBlockSz;
1079
1080 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphai);
1081
1082 // While we can keep advancing, just keep writing the block.
1083 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4);
1084 while((finalX - curX) >= kR11_EACBlockSz) {
1085 *outPtr = lastBlock;
1086 ++outPtr;
1087 curX += kR11_EACBlockSz;
1088 }
1089 }
1090
1091 // If we haven't advanced within the block then do so.
1092 if (curX < finalX) {
1093 const int col = curX & 3;
1094 const int colsLeft = finalX - curX;
1095
1096 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphai);
1097
1098 curX += colsLeft;
1099 }
1100
1101 SkASSERT(curX == finalX);
1102
1103 // Figure out what the next advancement is...
1104 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1105 if (nextX[i] == finalX) {
1106 const int16_t run = *(fBufferedRuns[i].fRuns);
1107 fBufferedRuns[i].fRuns += run;
1108 fBufferedRuns[i].fAlphas += run;
1109 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1110 nextX[i] += *(fBufferedRuns[i].fRuns);
1111 }
1112 }
1113
1114 finalX = 0xFFFFF;
1115 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1116 finalX = SkMin32(nextX[i], finalX);
1117 }
1118 }
1119
1120 // If we didn't land on a block boundary, output the block...
1121 if ((curX & 3) > 1) {
1122 *outPtr = compress_block_vertical(c1, c2, c3, c4);
1123 }
1124
1125 fNextRun = 0;
1126 }
1127
823 } // namespace SkTextureCompressor 1128 } // namespace SkTextureCompressor
OLDNEW
« src/utils/SkTextureCompressor.h ('K') | « src/utils/SkTextureCompressor.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698