OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkTextureCompressor.h" | 8 #include "SkTextureCompressor.h" |
9 | 9 |
10 #include "SkBitmap.h" | 10 #include "SkBitmap.h" |
(...skipping 714 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
725 // Compress it | 725 // Compress it |
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); | 726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); |
727 ++encPtr; | 727 ++encPtr; |
728 } | 728 } |
729 src += 4 * rowBytes; | 729 src += 4 * rowBytes; |
730 } | 730 } |
731 return true; | 731 return true; |
732 } | 732 } |
733 #endif // COMPRESS_R11_EAC_FASTEST | 733 #endif // COMPRESS_R11_EAC_FASTEST |
734 | 734 |
735 // The R11 EAC format expects that indices are given in column-major order. Sinc e | |
736 // we receive alpha values in raster order, this usually means that we have to u se | |
737 // pack6 above to properly pack our indices. However, if our indices come from t he | |
738 // blitter, then each integer will be a column of indices, and hence can be effi ciently | |
739 // packed. This function takes the bottom three bits of each byte and places the m in | |
740 // the least significant 12 bits of the resulting integer. | |
741 static inline uint32_t pack_indices_vertical(uint32_t x) { | |
742 #if defined (SK_CPU_BENDIAN) | |
743 return | |
744 (x & 7) | | |
745 ((x >> 5) & (7 << 3)) | | |
746 ((x >> 10) & (7 << 6)) | | |
747 ((x >> 15) & (7 << 9)); | |
748 #else | |
749 return | |
750 ((x >> 24) & 7) | | |
751 ((x >> 13) & (7 << 3)) | | |
752 ((x >> 2) & (7 << 6)) | | |
753 ((x << 9) & (7 << 9)); | |
754 #endif | |
755 } | |
756 | |
757 // This function returns the compressed format of a block given as four columns of | |
758 // alpha values. Each column is assumed to be loaded from top to bottom, and hen ce | |
759 // must first be converted to indices and then packed into the resulting 64-bit | |
760 // integer. | |
761 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0, | |
762 const uint32_t alphaColumn1, | |
763 const uint32_t alphaColumn2, | |
764 const uint32_t alphaColumn3) { | |
765 | |
766 if (alphaColumn0 == alphaColumn1 && | |
767 alphaColumn2 == alphaColumn3 && | |
768 alphaColumn0 == alphaColumn2) { | |
769 | |
770 if (0 == alphaColumn0) { | |
771 // Transparent | |
772 return 0x0020000000002000ULL; | |
773 } | |
774 else if (0xFFFFFFFF == alphaColumn0) { | |
775 // Opaque | |
776 return 0xFFFFFFFFFFFFFFFFULL; | |
777 } | |
778 } | |
779 | |
780 const uint32_t indexColumn0 = convert_indices(alphaColumn0); | |
781 const uint32_t indexColumn1 = convert_indices(alphaColumn1); | |
782 const uint32_t indexColumn2 = convert_indices(alphaColumn2); | |
783 const uint32_t indexColumn3 = convert_indices(alphaColumn3); | |
784 | |
785 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); | |
786 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); | |
787 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); | |
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); | |
789 | |
790 return SkEndian_SwapBE64(0x8490000000000000ULL | | |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) | | |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) | | |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) | | |
794 static_cast<uint64_t>(packedIndexColumn3)); | |
795 | |
796 } | |
797 | |
735 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, | 798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, |
736 int width, int height, int rowBytes) { | 799 int width, int height, int rowBytes) { |
737 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | 800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) |
738 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block); | 801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block); |
739 #elif COMPRESS_R11_EAC_FASTEST | 802 #elif COMPRESS_R11_EAC_FASTEST |
740 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); | 803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); |
741 #else | 804 #else |
742 #error "Must choose R11 EAC algorithm" | 805 #error "Must choose R11 EAC algorithm" |
743 #endif | 806 #endif |
744 } | 807 } |
745 | 808 |
robertphillips
2014/07/21 18:08:25
// comment ?
krajcevski
2014/07/21 18:24:20
Done.
| |
809 static inline void update_block_columns( | |
810 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4, | |
811 const uint32_t curAlphai, const int col, const int colsLeft) { | |
812 SkASSERT(NULL != blockCol1); | |
813 SkASSERT(NULL != blockCol2); | |
814 SkASSERT(NULL != blockCol3); | |
815 SkASSERT(NULL != blockCol4); | |
816 SkASSERT(col + colsLeft <= 4); | |
817 for (int i = col; i < (col + colsLeft); ++i) { | |
818 switch(i) { | |
819 case 0: | |
820 *blockCol1 = curAlphai; | |
821 break; | |
822 case 1: | |
823 *blockCol2 = curAlphai; | |
824 break; | |
825 case 2: | |
826 *blockCol3 = curAlphai; | |
827 break; | |
828 case 3: | |
829 *blockCol4 = curAlphai; | |
830 break; | |
831 } | |
832 } | |
833 } | |
834 | |
746 //////////////////////////////////////////////////////////////////////////////// | 835 //////////////////////////////////////////////////////////////////////////////// |
747 | 836 |
748 namespace SkTextureCompressor { | 837 namespace SkTextureCompressor { |
749 | 838 |
750 static inline size_t get_compressed_data_size(Format fmt, int width, int height) { | 839 static inline size_t get_compressed_data_size(Format fmt, int width, int height) { |
751 switch (fmt) { | 840 switch (fmt) { |
752 // These formats are 64 bits per 4x4 block. | 841 // These formats are 64 bits per 4x4 block. |
753 case kR11_EAC_Format: | 842 case kR11_EAC_Format: |
754 case kLATC_Format: | 843 case kLATC_Format: |
755 { | 844 { |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
813 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); | 902 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); |
814 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), | 903 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), |
815 bitmap.rowBytes(), format)) { | 904 bitmap.rowBytes(), format)) { |
816 return SkData::NewFromMalloc(dst, compressedDataSize); | 905 return SkData::NewFromMalloc(dst, compressedDataSize); |
817 } | 906 } |
818 | 907 |
819 sk_free(dst); | 908 sk_free(dst); |
820 return NULL; | 909 return NULL; |
821 } | 910 } |
822 | 911 |
912 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) | |
913 // 0x7FFE is one minus the largest positive 16-bit int. We use it for | |
914 // debugging to make sure that we're properly setting the nextX distance | |
915 // in flushRuns(). | |
916 : kLongestRun(0x7FFE), kZeroAlpha(0) | |
917 , fNextRun(0) | |
918 , fWidth(width) | |
919 , fHeight(height) | |
920 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) | |
921 { | |
922 SkASSERT((width % kR11_EACBlockSz) == 0); | |
923 SkASSERT((height % kR11_EACBlockSz) == 0); | |
924 } | |
925 | |
926 void R11_EACBlitter::blitAntiH(int x, int y, | |
927 const SkAlpha* antialias, | |
928 const int16_t* runs) SK_OVERRIDE { | |
929 // Make sure that the new row to blit is either the first | |
930 // row that we're blitting, or it's exactly the next scan row | |
931 // since the last row that we blit. This is to ensure that when | |
932 // we go to flush the runs, that they are all the same four | |
933 // runs. | |
934 if (fNextRun > 0 && | |
935 ((x != fBufferedRuns[fNextRun-1].fX) || | |
936 (y-1 != fBufferedRuns[fNextRun-1].fY))) { | |
937 this->flushRuns(); | |
938 } | |
939 | |
940 // Align the rows to a block boundary. If we receive rows that | |
941 // are not on a block boundary, then fill in the preceding runs | |
942 // with zeros. We do this by producing a single RLE that says | |
robertphillips
2014/07/21 18:08:25
that we _have_ ?
krajcevski
2014/07/21 18:24:20
Done.
| |
943 // that we 0x7FFE pixels of zero (0x7FFE = 32766). | |
944 const int row = y & ~3; | |
945 while ((row + fNextRun) < y) { | |
946 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha; | |
947 fBufferedRuns[fNextRun].fRuns = &kLongestRun; | |
948 fBufferedRuns[fNextRun].fX = 0; | |
949 fBufferedRuns[fNextRun].fY = row + fNextRun; | |
950 ++fNextRun; | |
951 } | |
952 | |
953 // Make sure that our assumptions aren't violated... | |
954 SkASSERT(fNextRun == (y & 3)); | |
955 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y); | |
956 | |
957 // Set the values of the next run | |
958 fBufferedRuns[fNextRun].fAlphas = antialias; | |
959 fBufferedRuns[fNextRun].fRuns = runs; | |
960 fBufferedRuns[fNextRun].fX = x; | |
961 fBufferedRuns[fNextRun].fY = y; | |
962 | |
963 // If we've output four scanlines in a row that don't violate our | |
964 // assumptions, then it's time to flush them... | |
965 if (4 == ++fNextRun) { | |
966 this->flushRuns(); | |
967 } | |
968 } | |
969 | |
970 void R11_EACBlitter::flushRuns() { | |
971 | |
972 // If we don't have any runs, then just return. | |
973 if (0 == fNextRun) { | |
974 return; | |
975 } | |
976 | |
977 #ifndef NDEBUG | |
978 // Make sure that if we have any runs, they all match | |
979 for (int i = 1; i < fNextRun; ++i) { | |
980 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1); | |
981 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX); | |
982 } | |
983 #endif | |
984 | |
985 // If we dont have as many runs as we have rows, fill in the remaining | |
986 // runs with constant zeros. | |
987 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) { | |
988 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i; | |
989 fBufferedRuns[i].fX = fBufferedRuns[0].fX; | |
990 fBufferedRuns[i].fAlphas = &kZeroAlpha; | |
991 fBufferedRuns[i].fRuns = &kLongestRun; | |
992 } | |
993 | |
994 // Make sure that our assumptions aren't violated. | |
995 SkASSERT(fNextRun > 0 && fNextRun <= 4); | |
996 SkASSERT((fBufferedRuns[0].fY & 3) == 0); | |
997 | |
998 // The following logic walks four rows at a time and outputs compressed | |
999 // blocks to the buffer passed into the constructor. | |
1000 // We do the following: | |
1001 // | |
1002 // c1 c2 c3 c4 | |
1003 // ----------------------------------------------------------------------- | |
1004 // ... | | | | | ----> fBufferedRuns[0] | |
1005 // ----------------------------------------------------------------------- | |
1006 // ... | | | | | ----> fBufferedRuns[1] | |
1007 // ----------------------------------------------------------------------- | |
1008 // ... | | | | | ----> fBufferedRuns[2] | |
1009 // ----------------------------------------------------------------------- | |
1010 // ... | | | | | ----> fBufferedRuns[3] | |
1011 // ----------------------------------------------------------------------- | |
1012 // | |
1013 // curX -- the macro X value that we've gotten to. | |
1014 // c1, c2, c3, c4 -- the integers that represent the columns of the current block | |
1015 // that we're operating on | |
robertphillips
2014/07/21 18:08:25
curAlphai -> curAlphaColumn ?
krajcevski
2014/07/21 18:24:20
Done.
| |
1016 // curAlphai -- integer containing the column of alpha values from fBuffered Runs. | |
1017 // nextX -- the next point at which we need to update curAlphai after the va lue of curX. | |
1018 // finalX -- the minimum of all the nextX values. | |
1019 // | |
1020 // curX advances to finalX outputting any blocks that it passes along | |
1021 // the way. Since finalX will not change when we reach the end of a | |
1022 // run, the termination criteria will be whenever curX == finalX at the | |
1023 // end of a loop. | |
1024 | |
1025 // Setup: | |
1026 uint32_t c1 = 0; | |
1027 uint32_t c2 = 0; | |
1028 uint32_t c3 = 0; | |
1029 uint32_t c4 = 0; | |
1030 | |
1031 uint32_t curAlphai = 0; | |
1032 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphai); | |
1033 | |
1034 int nextX[kR11_EACBlockSz]; | |
1035 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1036 nextX[i] = 0x7FFFFF; | |
1037 } | |
1038 | |
1039 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); | |
1040 | |
1041 // Populate the first set of runs and figure out how far we need to | |
1042 // advance on the first step | |
1043 int curX = 0; | |
1044 int finalX = 0xFFFFF; | |
1045 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1046 nextX[i] = *(fBufferedRuns[i].fRuns); | |
1047 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
1048 | |
1049 finalX = SkMin32(nextX[i], finalX); | |
1050 } | |
1051 | |
1052 // Make sure that we have a valid right-bound X value | |
1053 SkASSERT(finalX < 0xFFFFF); | |
1054 | |
1055 // Run the blitter... | |
1056 while (curX != finalX) { | |
1057 SkASSERT(finalX >= curX); | |
1058 | |
1059 // Do we need to populate the rest of the block? | |
1060 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { | |
1061 const int col = curX & 3; | |
1062 const int colsLeft = 4 - col; | |
1063 SkASSERT(curX + colsLeft <= finalX); | |
1064 | |
1065 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphai); | |
1066 | |
1067 // Write this block | |
1068 *outPtr = compress_block_vertical(c1, c2, c3, c4); | |
1069 ++outPtr; | |
1070 curX += colsLeft; | |
1071 } | |
1072 | |
1073 // If we can advance even further, then just keep memsetting the block | |
1074 if ((finalX - curX) >= kR11_EACBlockSz) { | |
1075 SkASSERT((curX & 3) == 0); | |
1076 | |
1077 const int col = 0; | |
1078 const int colsLeft = kR11_EACBlockSz; | |
1079 | |
1080 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphai); | |
1081 | |
1082 // While we can keep advancing, just keep writing the block. | |
1083 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4); | |
1084 while((finalX - curX) >= kR11_EACBlockSz) { | |
1085 *outPtr = lastBlock; | |
1086 ++outPtr; | |
1087 curX += kR11_EACBlockSz; | |
1088 } | |
1089 } | |
1090 | |
1091 // If we haven't advanced within the block then do so. | |
1092 if (curX < finalX) { | |
1093 const int col = curX & 3; | |
1094 const int colsLeft = finalX - curX; | |
1095 | |
1096 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphai); | |
1097 | |
1098 curX += colsLeft; | |
1099 } | |
1100 | |
1101 SkASSERT(curX == finalX); | |
1102 | |
1103 // Figure out what the next advancement is... | |
1104 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1105 if (nextX[i] == finalX) { | |
1106 const int16_t run = *(fBufferedRuns[i].fRuns); | |
1107 fBufferedRuns[i].fRuns += run; | |
1108 fBufferedRuns[i].fAlphas += run; | |
1109 curAlpha[i] = *(fBufferedRuns[i].fAlphas); | |
1110 nextX[i] += *(fBufferedRuns[i].fRuns); | |
1111 } | |
1112 } | |
1113 | |
1114 finalX = 0xFFFFF; | |
1115 for (int i = 0; i < kR11_EACBlockSz; ++i) { | |
1116 finalX = SkMin32(nextX[i], finalX); | |
1117 } | |
1118 } | |
1119 | |
1120 // If we didn't land on a block boundary, output the block... | |
1121 if ((curX & 3) > 1) { | |
1122 *outPtr = compress_block_vertical(c1, c2, c3, c4); | |
1123 } | |
1124 | |
1125 fNextRun = 0; | |
1126 } | |
1127 | |
823 } // namespace SkTextureCompressor | 1128 } // namespace SkTextureCompressor |
OLD | NEW |