Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(70)

Side by Side Diff: src/utils/SkTextureCompressor.cpp

Issue 406693002: First pass at a blitter for R11 EAC alpha masks. This shaves 10ms off (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Get rid of SK_OVERRIDE in module Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/utils/SkTextureCompressor.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkTextureCompressor.h" 8 #include "SkTextureCompressor.h"
9 9
10 #include "SkBitmap.h" 10 #include "SkBitmap.h"
(...skipping 714 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Compress it 725 // Compress it
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); 726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes);
727 ++encPtr; 727 ++encPtr;
728 } 728 }
729 src += 4 * rowBytes; 729 src += 4 * rowBytes;
730 } 730 }
731 return true; 731 return true;
732 } 732 }
733 #endif // COMPRESS_R11_EAC_FASTEST 733 #endif // COMPRESS_R11_EAC_FASTEST
734 734
735 // The R11 EAC format expects that indices are given in column-major order. Sinc e
736 // we receive alpha values in raster order, this usually means that we have to u se
737 // pack6 above to properly pack our indices. However, if our indices come from t he
738 // blitter, then each integer will be a column of indices, and hence can be effi ciently
739 // packed. This function takes the bottom three bits of each byte and places the m in
740 // the least significant 12 bits of the resulting integer.
741 static inline uint32_t pack_indices_vertical(uint32_t x) {
742 #if defined (SK_CPU_BENDIAN)
743 return
744 (x & 7) |
745 ((x >> 5) & (7 << 3)) |
746 ((x >> 10) & (7 << 6)) |
747 ((x >> 15) & (7 << 9));
748 #else
749 return
750 ((x >> 24) & 7) |
751 ((x >> 13) & (7 << 3)) |
752 ((x >> 2) & (7 << 6)) |
753 ((x << 9) & (7 << 9));
754 #endif
755 }
756
757 // This function returns the compressed format of a block given as four columns of
758 // alpha values. Each column is assumed to be loaded from top to bottom, and hen ce
759 // must first be converted to indices and then packed into the resulting 64-bit
760 // integer.
761 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0,
762 const uint32_t alphaColumn1,
763 const uint32_t alphaColumn2,
764 const uint32_t alphaColumn3) {
765
766 if (alphaColumn0 == alphaColumn1 &&
767 alphaColumn2 == alphaColumn3 &&
768 alphaColumn0 == alphaColumn2) {
769
770 if (0 == alphaColumn0) {
771 // Transparent
772 return 0x0020000000002000ULL;
773 }
774 else if (0xFFFFFFFF == alphaColumn0) {
775 // Opaque
776 return 0xFFFFFFFFFFFFFFFFULL;
777 }
778 }
779
780 const uint32_t indexColumn0 = convert_indices(alphaColumn0);
781 const uint32_t indexColumn1 = convert_indices(alphaColumn1);
782 const uint32_t indexColumn2 = convert_indices(alphaColumn2);
783 const uint32_t indexColumn3 = convert_indices(alphaColumn3);
784
785 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0);
786 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1);
787 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2);
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);
789
790 return SkEndian_SwapBE64(0x8490000000000000ULL |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) |
794 static_cast<uint64_t>(packedIndexColumn3));
795
796 }
797
735 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, 798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,
736 int width, int height, int rowBytes) { 799 int width, int height, int rowBytes) {
737 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) 800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
738 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block); 801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
739 #elif COMPRESS_R11_EAC_FASTEST 802 #elif COMPRESS_R11_EAC_FASTEST
740 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); 803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
741 #else 804 #else
742 #error "Must choose R11 EAC algorithm" 805 #error "Must choose R11 EAC algorithm"
743 #endif 806 #endif
744 } 807 }
745 808
809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d
810 // to store, as an integer, the four alpha values that will be placed within eac h
811 // of the columns in the range [col, col+colsLeft).
812 static inline void update_block_columns(
813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4,
814 const int col, const int colsLeft, const uint32_t curAlphai) {
815 SkASSERT(NULL != blockCol1);
816 SkASSERT(NULL != blockCol2);
817 SkASSERT(NULL != blockCol3);
818 SkASSERT(NULL != blockCol4);
819 SkASSERT(col + colsLeft <= 4);
820 for (int i = col; i < (col + colsLeft); ++i) {
821 switch(i) {
822 case 0:
823 *blockCol1 = curAlphai;
824 break;
825 case 1:
826 *blockCol2 = curAlphai;
827 break;
828 case 2:
829 *blockCol3 = curAlphai;
830 break;
831 case 3:
832 *blockCol4 = curAlphai;
833 break;
834 }
835 }
836 }
837
746 //////////////////////////////////////////////////////////////////////////////// 838 ////////////////////////////////////////////////////////////////////////////////
747 839
748 namespace SkTextureCompressor { 840 namespace SkTextureCompressor {
749 841
750 static inline size_t get_compressed_data_size(Format fmt, int width, int height) { 842 static inline size_t get_compressed_data_size(Format fmt, int width, int height) {
751 switch (fmt) { 843 switch (fmt) {
752 // These formats are 64 bits per 4x4 block. 844 // These formats are 64 bits per 4x4 block.
753 case kR11_EAC_Format: 845 case kR11_EAC_Format:
754 case kLATC_Format: 846 case kLATC_Format:
755 { 847 {
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
813 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); 905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize ));
814 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), 906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(),
815 bitmap.rowBytes(), format)) { 907 bitmap.rowBytes(), format)) {
816 return SkData::NewFromMalloc(dst, compressedDataSize); 908 return SkData::NewFromMalloc(dst, compressedDataSize);
817 } 909 }
818 910
819 sk_free(dst); 911 sk_free(dst);
820 return NULL; 912 return NULL;
821 } 913 }
822 914
915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)
916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for
917 // debugging to make sure that we're properly setting the nextX distance
918 // in flushRuns().
919 : kLongestRun(0x7FFE), kZeroAlpha(0)
920 , fNextRun(0)
921 , fWidth(width)
922 , fHeight(height)
923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))
924 {
925 SkASSERT((width % kR11_EACBlockSz) == 0);
926 SkASSERT((height % kR11_EACBlockSz) == 0);
927 }
928
929 void R11_EACBlitter::blitAntiH(int x, int y,
930 const SkAlpha* antialias,
931 const int16_t* runs) {
932 // Make sure that the new row to blit is either the first
933 // row that we're blitting, or it's exactly the next scan row
934 // since the last row that we blit. This is to ensure that when
935 // we go to flush the runs, that they are all the same four
936 // runs.
937 if (fNextRun > 0 &&
938 ((x != fBufferedRuns[fNextRun-1].fX) ||
939 (y-1 != fBufferedRuns[fNextRun-1].fY))) {
940 this->flushRuns();
941 }
942
943 // Align the rows to a block boundary. If we receive rows that
944 // are not on a block boundary, then fill in the preceding runs
945 // with zeros. We do this by producing a single RLE that says
946 // that we have 0x7FFE pixels of zero (0x7FFE = 32766).
947 const int row = y & ~3;
948 while ((row + fNextRun) < y) {
949 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha;
950 fBufferedRuns[fNextRun].fRuns = &kLongestRun;
951 fBufferedRuns[fNextRun].fX = 0;
952 fBufferedRuns[fNextRun].fY = row + fNextRun;
953 ++fNextRun;
954 }
955
956 // Make sure that our assumptions aren't violated...
957 SkASSERT(fNextRun == (y & 3));
958 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y);
959
960 // Set the values of the next run
961 fBufferedRuns[fNextRun].fAlphas = antialias;
962 fBufferedRuns[fNextRun].fRuns = runs;
963 fBufferedRuns[fNextRun].fX = x;
964 fBufferedRuns[fNextRun].fY = y;
965
966 // If we've output four scanlines in a row that don't violate our
967 // assumptions, then it's time to flush them...
968 if (4 == ++fNextRun) {
969 this->flushRuns();
970 }
971 }
972
973 void R11_EACBlitter::flushRuns() {
974
975 // If we don't have any runs, then just return.
976 if (0 == fNextRun) {
977 return;
978 }
979
980 #ifndef NDEBUG
981 // Make sure that if we have any runs, they all match
982 for (int i = 1; i < fNextRun; ++i) {
983 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1);
984 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX);
985 }
986 #endif
987
988 // If we dont have as many runs as we have rows, fill in the remaining
989 // runs with constant zeros.
990 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) {
991 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i;
992 fBufferedRuns[i].fX = fBufferedRuns[0].fX;
993 fBufferedRuns[i].fAlphas = &kZeroAlpha;
994 fBufferedRuns[i].fRuns = &kLongestRun;
995 }
996
997 // Make sure that our assumptions aren't violated.
998 SkASSERT(fNextRun > 0 && fNextRun <= 4);
999 SkASSERT((fBufferedRuns[0].fY & 3) == 0);
1000
1001 // The following logic walks four rows at a time and outputs compressed
1002 // blocks to the buffer passed into the constructor.
1003 // We do the following:
1004 //
1005 // c1 c2 c3 c4
1006 // -----------------------------------------------------------------------
1007 // ... | | | | | ----> fBufferedRuns[0]
1008 // -----------------------------------------------------------------------
1009 // ... | | | | | ----> fBufferedRuns[1]
1010 // -----------------------------------------------------------------------
1011 // ... | | | | | ----> fBufferedRuns[2]
1012 // -----------------------------------------------------------------------
1013 // ... | | | | | ----> fBufferedRuns[3]
1014 // -----------------------------------------------------------------------
1015 //
1016 // curX -- the macro X value that we've gotten to.
1017 // c1, c2, c3, c4 -- the integers that represent the columns of the current block
1018 // that we're operating on
1019 // curAlphaColumn -- integer containing the column of alpha values from fBuf feredRuns.
1020 // nextX -- for each run, the next point at which we need to update curAlpha Column
1021 // after the value of curX.
1022 // finalX -- the minimum of all the nextX values.
1023 //
1024 // curX advances to finalX outputting any blocks that it passes along
1025 // the way. Since finalX will not change when we reach the end of a
1026 // run, the termination criteria will be whenever curX == finalX at the
1027 // end of a loop.
1028
1029 // Setup:
1030 uint32_t c1 = 0;
1031 uint32_t c2 = 0;
1032 uint32_t c3 = 0;
1033 uint32_t c4 = 0;
1034
1035 uint32_t curAlphaColumn = 0;
1036 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn);
1037
1038 int nextX[kR11_EACBlockSz];
1039 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1040 nextX[i] = 0x7FFFFF;
1041 }
1042
1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);
1044
1045 // Populate the first set of runs and figure out how far we need to
1046 // advance on the first step
1047 int curX = 0;
1048 int finalX = 0xFFFFF;
1049 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1050 nextX[i] = *(fBufferedRuns[i].fRuns);
1051 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1052
1053 finalX = SkMin32(nextX[i], finalX);
1054 }
1055
1056 // Make sure that we have a valid right-bound X value
1057 SkASSERT(finalX < 0xFFFFF);
1058
1059 // Run the blitter...
1060 while (curX != finalX) {
1061 SkASSERT(finalX >= curX);
1062
1063 // Do we need to populate the rest of the block?
1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {
1065 const int col = curX & 3;
1066 const int colsLeft = 4 - col;
1067 SkASSERT(curX + colsLeft <= finalX);
1068
1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);
1070
1071 // Write this block
1072 *outPtr = compress_block_vertical(c1, c2, c3, c4);
1073 ++outPtr;
1074 curX += colsLeft;
1075 }
1076
1077 // If we can advance even further, then just keep memsetting the block
1078 if ((finalX - curX) >= kR11_EACBlockSz) {
1079 SkASSERT((curX & 3) == 0);
1080
1081 const int col = 0;
1082 const int colsLeft = kR11_EACBlockSz;
1083
1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);
1085
1086 // While we can keep advancing, just keep writing the block.
1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4);
1088 while((finalX - curX) >= kR11_EACBlockSz) {
1089 *outPtr = lastBlock;
1090 ++outPtr;
1091 curX += kR11_EACBlockSz;
1092 }
1093 }
1094
1095 // If we haven't advanced within the block then do so.
1096 if (curX < finalX) {
1097 const int col = curX & 3;
1098 const int colsLeft = finalX - curX;
1099
1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn);
1101
1102 curX += colsLeft;
1103 }
1104
1105 SkASSERT(curX == finalX);
1106
1107 // Figure out what the next advancement is...
1108 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1109 if (nextX[i] == finalX) {
1110 const int16_t run = *(fBufferedRuns[i].fRuns);
1111 fBufferedRuns[i].fRuns += run;
1112 fBufferedRuns[i].fAlphas += run;
1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1114 nextX[i] += *(fBufferedRuns[i].fRuns);
1115 }
1116 }
1117
1118 finalX = 0xFFFFF;
1119 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1120 finalX = SkMin32(nextX[i], finalX);
1121 }
1122 }
1123
1124 // If we didn't land on a block boundary, output the block...
1125 if ((curX & 3) > 1) {
1126 *outPtr = compress_block_vertical(c1, c2, c3, c4);
1127 }
1128
1129 fNextRun = 0;
1130 }
1131
823 } // namespace SkTextureCompressor 1132 } // namespace SkTextureCompressor
OLDNEW
« no previous file with comments | « src/utils/SkTextureCompressor.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698