OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 Google Inc. | 2 * Copyright 2014 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkTextureCompressor.h" | 8 #include "SkTextureCompressor.h" |
9 | 9 |
10 #include "SkBitmap.h" | 10 #include "SkBitmap.h" |
(...skipping 714 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
725 // Compress it | 725 // Compress it |
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); | 726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); |
727 ++encPtr; | 727 ++encPtr; |
728 } | 728 } |
729 src += 4 * rowBytes; | 729 src += 4 * rowBytes; |
730 } | 730 } |
731 return true; | 731 return true; |
732 } | 732 } |
733 #endif // COMPRESS_R11_EAC_FASTEST | 733 #endif // COMPRESS_R11_EAC_FASTEST |
734 | 734 |
| 735 // The R11 EAC format expects that indices are given in column-major order. Sinc
e |
| 736 // we receive alpha values in raster order, this usually means that we have to u
se |
| 737 // pack6 above to properly pack our indices. However, if our indices come from t
he |
| 738 // blitter, then each integer will be a column of indices, and hence can be effi
ciently |
| 739 // packed. This function takes the bottom three bits of each byte and places the
m in |
| 740 // the least significant 12 bits of the resulting integer. |
| 741 static inline uint32_t pack_indices_vertical(uint32_t x) { |
| 742 #if defined (SK_CPU_BENDIAN) |
| 743 return |
| 744 (x & 7) | |
| 745 ((x >> 5) & (7 << 3)) | |
| 746 ((x >> 10) & (7 << 6)) | |
| 747 ((x >> 15) & (7 << 9)); |
| 748 #else |
| 749 return |
| 750 ((x >> 24) & 7) | |
| 751 ((x >> 13) & (7 << 3)) | |
| 752 ((x >> 2) & (7 << 6)) | |
| 753 ((x << 9) & (7 << 9)); |
| 754 #endif |
| 755 } |
| 756 |
| 757 // This function returns the compressed format of a block given as four columns
of |
| 758 // alpha values. Each column is assumed to be loaded from top to bottom, and hen
ce |
| 759 // must first be converted to indices and then packed into the resulting 64-bit |
| 760 // integer. |
| 761 static inline uint64_t compress_block_vertical(const uint32_t alphaColumn0, |
| 762 const uint32_t alphaColumn1, |
| 763 const uint32_t alphaColumn2, |
| 764 const uint32_t alphaColumn3) { |
| 765 |
| 766 if (alphaColumn0 == alphaColumn1 && |
| 767 alphaColumn2 == alphaColumn3 && |
| 768 alphaColumn0 == alphaColumn2) { |
| 769 |
| 770 if (0 == alphaColumn0) { |
| 771 // Transparent |
| 772 return 0x0020000000002000ULL; |
| 773 } |
| 774 else if (0xFFFFFFFF == alphaColumn0) { |
| 775 // Opaque |
| 776 return 0xFFFFFFFFFFFFFFFFULL; |
| 777 } |
| 778 } |
| 779 |
| 780 const uint32_t indexColumn0 = convert_indices(alphaColumn0); |
| 781 const uint32_t indexColumn1 = convert_indices(alphaColumn1); |
| 782 const uint32_t indexColumn2 = convert_indices(alphaColumn2); |
| 783 const uint32_t indexColumn3 = convert_indices(alphaColumn3); |
| 784 |
| 785 const uint32_t packedIndexColumn0 = pack_indices_vertical(indexColumn0); |
| 786 const uint32_t packedIndexColumn1 = pack_indices_vertical(indexColumn1); |
| 787 const uint32_t packedIndexColumn2 = pack_indices_vertical(indexColumn2); |
| 788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); |
| 789 |
| 790 return SkEndian_SwapBE64(0x8490000000000000ULL | |
| 791 (static_cast<uint64_t>(packedIndexColumn0) << 36) | |
| 792 (static_cast<uint64_t>(packedIndexColumn1) << 24) | |
| 793 static_cast<uint64_t>(packedIndexColumn2 << 12) | |
| 794 static_cast<uint64_t>(packedIndexColumn3)); |
| 795 |
| 796 } |
| 797 |
735 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, | 798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src, |
736 int width, int height, int rowBytes) { | 799 int width, int height, int rowBytes) { |
737 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) | 800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) |
738 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); | 801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_
r11eac_block); |
739 #elif COMPRESS_R11_EAC_FASTEST | 802 #elif COMPRESS_R11_EAC_FASTEST |
740 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); | 803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes); |
741 #else | 804 #else |
742 #error "Must choose R11 EAC algorithm" | 805 #error "Must choose R11 EAC algorithm" |
743 #endif | 806 #endif |
744 } | 807 } |
745 | 808 |
| 809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte
d |
| 810 // to store, as an integer, the four alpha values that will be placed within eac
h |
| 811 // of the columns in the range [col, col+colsLeft). |
| 812 static inline void update_block_columns( |
| 813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo
ckCol4, |
| 814 const int col, const int colsLeft, const uint32_t curAlphai) { |
| 815 SkASSERT(NULL != blockCol1); |
| 816 SkASSERT(NULL != blockCol2); |
| 817 SkASSERT(NULL != blockCol3); |
| 818 SkASSERT(NULL != blockCol4); |
| 819 SkASSERT(col + colsLeft <= 4); |
| 820 for (int i = col; i < (col + colsLeft); ++i) { |
| 821 switch(i) { |
| 822 case 0: |
| 823 *blockCol1 = curAlphai; |
| 824 break; |
| 825 case 1: |
| 826 *blockCol2 = curAlphai; |
| 827 break; |
| 828 case 2: |
| 829 *blockCol3 = curAlphai; |
| 830 break; |
| 831 case 3: |
| 832 *blockCol4 = curAlphai; |
| 833 break; |
| 834 } |
| 835 } |
| 836 } |
| 837 |
746 //////////////////////////////////////////////////////////////////////////////// | 838 //////////////////////////////////////////////////////////////////////////////// |
747 | 839 |
748 namespace SkTextureCompressor { | 840 namespace SkTextureCompressor { |
749 | 841 |
750 static inline size_t get_compressed_data_size(Format fmt, int width, int height)
{ | 842 static inline size_t get_compressed_data_size(Format fmt, int width, int height)
{ |
751 switch (fmt) { | 843 switch (fmt) { |
752 // These formats are 64 bits per 4x4 block. | 844 // These formats are 64 bits per 4x4 block. |
753 case kR11_EAC_Format: | 845 case kR11_EAC_Format: |
754 case kLATC_Format: | 846 case kLATC_Format: |
755 { | 847 { |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
813 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize
)); | 905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize
)); |
814 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit
map.height(), | 906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit
map.height(), |
815 bitmap.rowBytes(), format)) { | 907 bitmap.rowBytes(), format)) { |
816 return SkData::NewFromMalloc(dst, compressedDataSize); | 908 return SkData::NewFromMalloc(dst, compressedDataSize); |
817 } | 909 } |
818 | 910 |
819 sk_free(dst); | 911 sk_free(dst); |
820 return NULL; | 912 return NULL; |
821 } | 913 } |
822 | 914 |
| 915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) |
| 916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for |
| 917 // debugging to make sure that we're properly setting the nextX distance |
| 918 // in flushRuns(). |
| 919 : kLongestRun(0x7FFE), kZeroAlpha(0) |
| 920 , fNextRun(0) |
| 921 , fWidth(width) |
| 922 , fHeight(height) |
| 923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) |
| 924 { |
| 925 SkASSERT((width % kR11_EACBlockSz) == 0); |
| 926 SkASSERT((height % kR11_EACBlockSz) == 0); |
| 927 } |
| 928 |
| 929 void R11_EACBlitter::blitAntiH(int x, int y, |
| 930 const SkAlpha* antialias, |
| 931 const int16_t* runs) { |
| 932 // Make sure that the new row to blit is either the first |
| 933 // row that we're blitting, or it's exactly the next scan row |
| 934 // since the last row that we blit. This is to ensure that when |
| 935 // we go to flush the runs, that they are all the same four |
| 936 // runs. |
| 937 if (fNextRun > 0 && |
| 938 ((x != fBufferedRuns[fNextRun-1].fX) || |
| 939 (y-1 != fBufferedRuns[fNextRun-1].fY))) { |
| 940 this->flushRuns(); |
| 941 } |
| 942 |
| 943 // Align the rows to a block boundary. If we receive rows that |
| 944 // are not on a block boundary, then fill in the preceding runs |
| 945 // with zeros. We do this by producing a single RLE that says |
| 946 // that we have 0x7FFE pixels of zero (0x7FFE = 32766). |
| 947 const int row = y & ~3; |
| 948 while ((row + fNextRun) < y) { |
| 949 fBufferedRuns[fNextRun].fAlphas = &kZeroAlpha; |
| 950 fBufferedRuns[fNextRun].fRuns = &kLongestRun; |
| 951 fBufferedRuns[fNextRun].fX = 0; |
| 952 fBufferedRuns[fNextRun].fY = row + fNextRun; |
| 953 ++fNextRun; |
| 954 } |
| 955 |
| 956 // Make sure that our assumptions aren't violated... |
| 957 SkASSERT(fNextRun == (y & 3)); |
| 958 SkASSERT(fNextRun == 0 || fBufferedRuns[fNextRun - 1].fY < y); |
| 959 |
| 960 // Set the values of the next run |
| 961 fBufferedRuns[fNextRun].fAlphas = antialias; |
| 962 fBufferedRuns[fNextRun].fRuns = runs; |
| 963 fBufferedRuns[fNextRun].fX = x; |
| 964 fBufferedRuns[fNextRun].fY = y; |
| 965 |
| 966 // If we've output four scanlines in a row that don't violate our |
| 967 // assumptions, then it's time to flush them... |
| 968 if (4 == ++fNextRun) { |
| 969 this->flushRuns(); |
| 970 } |
| 971 } |
| 972 |
| 973 void R11_EACBlitter::flushRuns() { |
| 974 |
| 975 // If we don't have any runs, then just return. |
| 976 if (0 == fNextRun) { |
| 977 return; |
| 978 } |
| 979 |
| 980 #ifndef NDEBUG |
| 981 // Make sure that if we have any runs, they all match |
| 982 for (int i = 1; i < fNextRun; ++i) { |
| 983 SkASSERT(fBufferedRuns[i].fY == fBufferedRuns[i-1].fY + 1); |
| 984 SkASSERT(fBufferedRuns[i].fX == fBufferedRuns[i-1].fX); |
| 985 } |
| 986 #endif |
| 987 |
| 988 // If we dont have as many runs as we have rows, fill in the remaining |
| 989 // runs with constant zeros. |
| 990 for (int i = fNextRun; i < kR11_EACBlockSz; ++i) { |
| 991 fBufferedRuns[i].fY = fBufferedRuns[0].fY + i; |
| 992 fBufferedRuns[i].fX = fBufferedRuns[0].fX; |
| 993 fBufferedRuns[i].fAlphas = &kZeroAlpha; |
| 994 fBufferedRuns[i].fRuns = &kLongestRun; |
| 995 } |
| 996 |
| 997 // Make sure that our assumptions aren't violated. |
| 998 SkASSERT(fNextRun > 0 && fNextRun <= 4); |
| 999 SkASSERT((fBufferedRuns[0].fY & 3) == 0); |
| 1000 |
| 1001 // The following logic walks four rows at a time and outputs compressed |
| 1002 // blocks to the buffer passed into the constructor. |
| 1003 // We do the following: |
| 1004 // |
| 1005 // c1 c2 c3 c4 |
| 1006 // ----------------------------------------------------------------------- |
| 1007 // ... | | | | | ----> fBufferedRuns[0] |
| 1008 // ----------------------------------------------------------------------- |
| 1009 // ... | | | | | ----> fBufferedRuns[1] |
| 1010 // ----------------------------------------------------------------------- |
| 1011 // ... | | | | | ----> fBufferedRuns[2] |
| 1012 // ----------------------------------------------------------------------- |
| 1013 // ... | | | | | ----> fBufferedRuns[3] |
| 1014 // ----------------------------------------------------------------------- |
| 1015 // |
| 1016 // curX -- the macro X value that we've gotten to. |
| 1017 // c1, c2, c3, c4 -- the integers that represent the columns of the current
block |
| 1018 // that we're operating on |
| 1019 // curAlphaColumn -- integer containing the column of alpha values from fBuf
feredRuns. |
| 1020 // nextX -- for each run, the next point at which we need to update curAlpha
Column |
| 1021 // after the value of curX. |
| 1022 // finalX -- the minimum of all the nextX values. |
| 1023 // |
| 1024 // curX advances to finalX outputting any blocks that it passes along |
| 1025 // the way. Since finalX will not change when we reach the end of a |
| 1026 // run, the termination criteria will be whenever curX == finalX at the |
| 1027 // end of a loop. |
| 1028 |
| 1029 // Setup: |
| 1030 uint32_t c1 = 0; |
| 1031 uint32_t c2 = 0; |
| 1032 uint32_t c3 = 0; |
| 1033 uint32_t c4 = 0; |
| 1034 |
| 1035 uint32_t curAlphaColumn = 0; |
| 1036 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn); |
| 1037 |
| 1038 int nextX[kR11_EACBlockSz]; |
| 1039 for (int i = 0; i < kR11_EACBlockSz; ++i) { |
| 1040 nextX[i] = 0x7FFFFF; |
| 1041 } |
| 1042 |
| 1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); |
| 1044 |
| 1045 // Populate the first set of runs and figure out how far we need to |
| 1046 // advance on the first step |
| 1047 int curX = 0; |
| 1048 int finalX = 0xFFFFF; |
| 1049 for (int i = 0; i < kR11_EACBlockSz; ++i) { |
| 1050 nextX[i] = *(fBufferedRuns[i].fRuns); |
| 1051 curAlpha[i] = *(fBufferedRuns[i].fAlphas); |
| 1052 |
| 1053 finalX = SkMin32(nextX[i], finalX); |
| 1054 } |
| 1055 |
| 1056 // Make sure that we have a valid right-bound X value |
| 1057 SkASSERT(finalX < 0xFFFFF); |
| 1058 |
| 1059 // Run the blitter... |
| 1060 while (curX != finalX) { |
| 1061 SkASSERT(finalX >= curX); |
| 1062 |
| 1063 // Do we need to populate the rest of the block? |
| 1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { |
| 1065 const int col = curX & 3; |
| 1066 const int colsLeft = 4 - col; |
| 1067 SkASSERT(curX + colsLeft <= finalX); |
| 1068 |
| 1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu
mn); |
| 1070 |
| 1071 // Write this block |
| 1072 *outPtr = compress_block_vertical(c1, c2, c3, c4); |
| 1073 ++outPtr; |
| 1074 curX += colsLeft; |
| 1075 } |
| 1076 |
| 1077 // If we can advance even further, then just keep memsetting the block |
| 1078 if ((finalX - curX) >= kR11_EACBlockSz) { |
| 1079 SkASSERT((curX & 3) == 0); |
| 1080 |
| 1081 const int col = 0; |
| 1082 const int colsLeft = kR11_EACBlockSz; |
| 1083 |
| 1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu
mn); |
| 1085 |
| 1086 // While we can keep advancing, just keep writing the block. |
| 1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4); |
| 1088 while((finalX - curX) >= kR11_EACBlockSz) { |
| 1089 *outPtr = lastBlock; |
| 1090 ++outPtr; |
| 1091 curX += kR11_EACBlockSz; |
| 1092 } |
| 1093 } |
| 1094 |
| 1095 // If we haven't advanced within the block then do so. |
| 1096 if (curX < finalX) { |
| 1097 const int col = curX & 3; |
| 1098 const int colsLeft = finalX - curX; |
| 1099 |
| 1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu
mn); |
| 1101 |
| 1102 curX += colsLeft; |
| 1103 } |
| 1104 |
| 1105 SkASSERT(curX == finalX); |
| 1106 |
| 1107 // Figure out what the next advancement is... |
| 1108 for (int i = 0; i < kR11_EACBlockSz; ++i) { |
| 1109 if (nextX[i] == finalX) { |
| 1110 const int16_t run = *(fBufferedRuns[i].fRuns); |
| 1111 fBufferedRuns[i].fRuns += run; |
| 1112 fBufferedRuns[i].fAlphas += run; |
| 1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas); |
| 1114 nextX[i] += *(fBufferedRuns[i].fRuns); |
| 1115 } |
| 1116 } |
| 1117 |
| 1118 finalX = 0xFFFFF; |
| 1119 for (int i = 0; i < kR11_EACBlockSz; ++i) { |
| 1120 finalX = SkMin32(nextX[i], finalX); |
| 1121 } |
| 1122 } |
| 1123 |
| 1124 // If we didn't land on a block boundary, output the block... |
| 1125 if ((curX & 3) > 1) { |
| 1126 *outPtr = compress_block_vertical(c1, c2, c3, c4); |
| 1127 } |
| 1128 |
| 1129 fNextRun = 0; |
| 1130 } |
| 1131 |
823 } // namespace SkTextureCompressor | 1132 } // namespace SkTextureCompressor |
OLD | NEW |