Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/utils/SkTextureCompressor_R11EAC.cpp

Issue 403383003: Refactor texture compressors into separate files (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Handle improper dimensions Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/utils/SkTextureCompressor_R11EAC.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkTextureCompressor.h" 8 #include "SkTextureCompressor.h"
9 9
10 #include "SkBitmap.h"
11 #include "SkData.h"
12 #include "SkEndian.h" 10 #include "SkEndian.h"
13 11
14 #include "SkTextureCompression_opts.h"
15
16 ////////////////////////////////////////////////////////////////////////////////
17 //
18 // Utility Functions
19 //
20 ////////////////////////////////////////////////////////////////////////////////
21
22 // Absolute difference between two values. More correct than SkTAbs(a - b)
23 // because it works on unsigned values.
24 template <typename T> inline T abs_diff(const T &a, const T &b) {
25 return (a > b) ? (a - b) : (b - a);
26 }
27
28 static bool is_extremal(uint8_t pixel) {
29 return 0 == pixel || 255 == pixel;
30 }
31
32 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
33
34 // This function is used by both R11 EAC and LATC to compress 4x4 blocks
35 // of 8-bit alpha into 64-bit values that comprise the compressed data.
36 // For both formats, we need to make sure that the dimensions of the
37 // src pixels are divisible by 4, and copy 4x4 blocks one at a time
38 // for compression.
39 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
40 int width, int height, int rowBytes,
41 A84x4To64BitProc proc) {
42 // Make sure that our data is well-formed enough to be considered for compre ssion
43 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
44 return false;
45 }
46
47 int blocksX = width >> 2;
48 int blocksY = height >> 2;
49
50 uint8_t block[16];
51 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
52 for (int y = 0; y < blocksY; ++y) {
53 for (int x = 0; x < blocksX; ++x) {
54 // Load block
55 for (int k = 0; k < 4; ++k) {
56 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
57 }
58
59 // Compress it
60 *encPtr = proc(block);
61 ++encPtr;
62 }
63 src += 4 * rowBytes;
64 }
65
66 return true;
67 }
68
69 ////////////////////////////////////////////////////////////////////////////////
70 //
71 // LATC compressor
72 //
73 ////////////////////////////////////////////////////////////////////////////////
74
75 // LATC compressed texels down into square 4x4 blocks
76 static const int kLATCPaletteSize = 8;
77 static const int kLATCBlockSize = 4;
78 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
79
80 // Generates an LATC palette. LATC constructs
81 // a palette of eight colors from LUM0 and LUM1 using the algorithm:
82 //
83 // LUM0, if lum0 > lum1 and code(x,y) == 0
84 // LUM1, if lum0 > lum1 and code(x,y) == 1
85 // (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
86 // (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
87 // (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
88 // (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
89 // (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
90 // ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
91 //
92 // LUM0, if lum0 <= lum1 and code(x,y) == 0
93 // LUM1, if lum0 <= lum1 and code(x,y) == 1
94 // (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
95 // (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
96 // (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
97 // ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
98 // 0, if lum0 <= lum1 and code(x,y) == 6
99 // 255, if lum0 <= lum1 and code(x,y) == 7
100
101 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
102 palette[0] = lum0;
103 palette[1] = lum1;
104 if (lum0 > lum1) {
105 for (int i = 1; i < 7; i++) {
106 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
107 }
108 } else {
109 for (int i = 1; i < 5; i++) {
110 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
111 }
112 palette[6] = 0;
113 palette[7] = 255;
114 }
115 }
116
117 // Compress a block by using the bounding box of the pixels. It is assumed that
118 // there are no extremal pixels in this block otherwise we would have used
119 // compressBlockBBIgnoreExtremal.
120 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
121 uint8_t minVal = 255;
122 uint8_t maxVal = 0;
123 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
124 minVal = SkTMin(pixels[i], minVal);
125 maxVal = SkTMax(pixels[i], maxVal);
126 }
127
128 SkASSERT(!is_extremal(minVal));
129 SkASSERT(!is_extremal(maxVal));
130
131 uint8_t palette[kLATCPaletteSize];
132 generate_latc_palette(palette, maxVal, minVal);
133
134 uint64_t indices = 0;
135 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
136
137 // Find the best palette index
138 uint8_t bestError = abs_diff(pixels[i], palette[0]);
139 uint8_t idx = 0;
140 for (int j = 1; j < kLATCPaletteSize; ++j) {
141 uint8_t error = abs_diff(pixels[i], palette[j]);
142 if (error < bestError) {
143 bestError = error;
144 idx = j;
145 }
146 }
147
148 indices <<= 3;
149 indices |= idx;
150 }
151
152 return
153 SkEndian_SwapLE64(
154 static_cast<uint64_t>(maxVal) |
155 (static_cast<uint64_t>(minVal) << 8) |
156 (indices << 16));
157 }
158
159 // Compress a block by using the bounding box of the pixels without taking into
160 // account the extremal values. The generated palette will contain extremal valu es
161 // and fewer points along the line segment to interpolate.
162 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
163 uint8_t minVal = 255;
164 uint8_t maxVal = 0;
165 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
166 if (is_extremal(pixels[i])) {
167 continue;
168 }
169
170 minVal = SkTMin(pixels[i], minVal);
171 maxVal = SkTMax(pixels[i], maxVal);
172 }
173
174 SkASSERT(!is_extremal(minVal));
175 SkASSERT(!is_extremal(maxVal));
176
177 uint8_t palette[kLATCPaletteSize];
178 generate_latc_palette(palette, minVal, maxVal);
179
180 uint64_t indices = 0;
181 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
182
183 // Find the best palette index
184 uint8_t idx = 0;
185 if (is_extremal(pixels[i])) {
186 if (0xFF == pixels[i]) {
187 idx = 7;
188 } else if (0 == pixels[i]) {
189 idx = 6;
190 } else {
191 SkFAIL("Pixel is extremal but not really?!");
192 }
193 } else {
194 uint8_t bestError = abs_diff(pixels[i], palette[0]);
195 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
196 uint8_t error = abs_diff(pixels[i], palette[j]);
197 if (error < bestError) {
198 bestError = error;
199 idx = j;
200 }
201 }
202 }
203
204 indices <<= 3;
205 indices |= idx;
206 }
207
208 return
209 SkEndian_SwapLE64(
210 static_cast<uint64_t>(minVal) |
211 (static_cast<uint64_t>(maxVal) << 8) |
212 (indices << 16));
213 }
214
215
216 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from tw o
217 // values LUM0 and LUM1, and an index into the generated palette. Details of how
218 // the palette is generated can be found in the comments of generatePalette abov e.
219 //
220 // We choose which palette type to use based on whether or not 'pixels' contains
221 // any extremal values (0 or 255). If there are extremal values, then we use the
222 // palette that has the extremal values built in. Otherwise, we use the full bou nding
223 // box.
224
225 static uint64_t compress_latc_block(const uint8_t pixels[]) {
226 // Collect unique pixels
227 int nUniquePixels = 0;
228 uint8_t uniquePixels[kLATCPixelsPerBlock];
229 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
230 bool foundPixel = false;
231 for (int j = 0; j < nUniquePixels; ++j) {
232 foundPixel = foundPixel || uniquePixels[j] == pixels[i];
233 }
234
235 if (!foundPixel) {
236 uniquePixels[nUniquePixels] = pixels[i];
237 ++nUniquePixels;
238 }
239 }
240
241 // If there's only one unique pixel, then our compression is easy.
242 if (1 == nUniquePixels) {
243 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
244
245 // Similarly, if there are only two unique pixels, then our compression is
246 // easy again: place the pixels in the block header, and assign the indices
247 // with one or zero depending on which pixel they belong to.
248 } else if (2 == nUniquePixels) {
249 uint64_t outBlock = 0;
250 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
251 int idx = 0;
252 if (pixels[i] == uniquePixels[1]) {
253 idx = 1;
254 }
255
256 outBlock <<= 3;
257 outBlock |= idx;
258 }
259 outBlock <<= 16;
260 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
261 return SkEndian_SwapLE64(outBlock);
262 }
263
264 // Count non-maximal pixel values
265 int nonExtremalPixels = 0;
266 for (int i = 0; i < nUniquePixels; ++i) {
267 if (!is_extremal(uniquePixels[i])) {
268 ++nonExtremalPixels;
269 }
270 }
271
272 // If all the pixels are nonmaximal then compute the palette using
273 // the bounding box of all the pixels.
274 if (nonExtremalPixels == nUniquePixels) {
275 // This is really just for correctness, in all of my tests we
276 // never take this step. We don't lose too much perf here because
277 // most of the processing in this function is worth it for the
278 // 1 == nUniquePixels optimization.
279 return compress_latc_block_bb(pixels);
280 } else {
281 return compress_latc_block_bb_ignore_extremal(pixels);
282 }
283 }
284
285 static inline bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src,
286 int width, int height, int rowBytes) {
287 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block);
288 }
289
290 ////////////////////////////////////////////////////////////////////////////////
291 //
292 // R11 EAC Compressor
293 //
294 ////////////////////////////////////////////////////////////////////////////////
295
296 // #define COMPRESS_R11_EAC_SLOW 1 12 // #define COMPRESS_R11_EAC_SLOW 1
297 // #define COMPRESS_R11_EAC_FAST 1 13 // #define COMPRESS_R11_EAC_FAST 1
298 #define COMPRESS_R11_EAC_FASTEST 1 14 #define COMPRESS_R11_EAC_FASTEST 1
299 15
300 // Blocks compressed into R11 EAC are represented as follows: 16 // Blocks compressed into R11 EAC are represented as follows:
301 // 0000000000000000000000000000000000000000000000000000000000000000 17 // 0000000000000000000000000000000000000000000000000000000000000000
302 // |base_cw|mod|mul| ----------------- indices ------------------- 18 // |base_cw|mod|mul| ----------------- indices -------------------
303 // 19 //
304 // To reconstruct the value of a given pixel, we use the formula: 20 // To reconstruct the value of a given pixel, we use the formula:
305 // clamp[0, 2047](base_cw * 8 + 4 + mod_val*mul*8) 21 // clamp[0, 2047](base_cw * 8 + 4 + mod_val*mul*8)
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
541 // primarily for compressing alpha maps. Usually the only 257 // primarily for compressing alpha maps. Usually the only
542 // non-fullly opaque or fully transparent blocks are not a solid 258 // non-fullly opaque or fully transparent blocks are not a solid
543 // intermediate color. If we notice that they are, then we can 259 // intermediate color. If we notice that they are, then we can
544 // add another optimization... 260 // add another optimization...
545 break; 261 break;
546 } 262 }
547 } 263 }
548 264
549 return compress_heterogeneous_r11eac_block(block); 265 return compress_heterogeneous_r11eac_block(block);
550 } 266 }
267
268 // This function is used by R11 EAC to compress 4x4 blocks
269 // of 8-bit alpha into 64-bit values that comprise the compressed data.
270 // We need to make sure that the dimensions of the src pixels are divisible
271 // by 4, and copy 4x4 blocks one at a time for compression.
272 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
273
274 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
275 int width, int height, int rowBytes,
276 A84x4To64BitProc proc) {
277 // Make sure that our data is well-formed enough to be considered for compre ssion
278 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
279 return false;
280 }
281
282 int blocksX = width >> 2;
283 int blocksY = height >> 2;
284
285 uint8_t block[16];
286 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
287 for (int y = 0; y < blocksY; ++y) {
288 for (int x = 0; x < blocksX; ++x) {
289 // Load block
290 for (int k = 0; k < 4; ++k) {
291 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
292 }
293
294 // Compress it
295 *encPtr = proc(block);
296 ++encPtr;
297 }
298 src += 4 * rowBytes;
299 }
300
301 return true;
302 }
551 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) 303 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
552 304
553 #if COMPRESS_R11_EAC_FASTEST 305 #if COMPRESS_R11_EAC_FASTEST
306 template<unsigned shift>
307 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) {
308 const uint64_t t = (x ^ (x >> shift)) & mask;
309 return x ^ t ^ (t << shift);
310 }
311
554 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { 312 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {
555 // If our 3-bit block indices are laid out as: 313 // If our 3-bit block indices are laid out as:
556 // a b c d 314 // a b c d
557 // e f g h 315 // e f g h
558 // i j k l 316 // i j k l
559 // m n o p 317 // m n o p
560 // 318 //
561 // This function expects topRows and bottomRows to contain the first two row s 319 // This function expects topRows and bottomRows to contain the first two row s
562 // of indices interleaved in the least significant bits of a and b. In other words... 320 // of indices interleaved in the least significant bits of a and b. In other words...
563 // 321 //
(...skipping 13 matching lines...) Expand all
577 // 335 //
578 // !SPEED! this function might be even faster if certain SIMD intrinsics are 336 // !SPEED! this function might be even faster if certain SIMD intrinsics are
579 // used.. 337 // used..
580 338
581 // For both architectures, we can figure out a packing of the bits by 339 // For both architectures, we can figure out a packing of the bits by
582 // using a shuffle and a few shift-rotates... 340 // using a shuffle and a few shift-rotates...
583 uint64_t x = (static_cast<uint64_t>(topRows) << 32) | static_cast<uint64_t>( bottomRows); 341 uint64_t x = (static_cast<uint64_t>(topRows) << 32) | static_cast<uint64_t>( bottomRows);
584 342
585 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p 343 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p
586 344
587 uint64_t t = (x ^ (x >> 10)) & 0x3FC0003FC00000ULL; 345 x = swap_shift<10>(x, 0x3FC0003FC00000ULL);
588 x = x ^ t ^ (t << 10);
589 346
590 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p 347 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p
591 348
592 x = (x | ((x << 52) & (0x3FULL << 52)) | ((x << 20) & (0x3FULL << 28))) >> 1 6; 349 x = (x | ((x << 52) & (0x3FULL << 52)) | ((x << 20) & (0x3FULL << 28))) >> 1 6;
593 350
594 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n 351 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n
595 352
596 t = (x ^ (x >> 6)) & 0xFC0000ULL; 353 x = swap_shift<6>(x, 0xFC0000ULL);
597 x = x ^ t ^ (t << 6);
598 354
599 #if defined (SK_CPU_BENDIAN) 355 #if defined (SK_CPU_BENDIAN)
600 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n 356 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n
601 357
602 t = (x ^ (x >> 36)) & 0x3FULL; 358 x = swap_shift<36>(x, 0x3FULL);
603 x = x ^ t ^ (t << 36);
604 359
605 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p 360 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p
606 361
607 t = (x ^ (x >> 12)) & 0xFFF000000ULL; 362 x = swap_shift<12>(x, 0xFFF000000ULL);
608 x = x ^ t ^ (t << 12);
609
610 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p
611 return x;
612 #else 363 #else
613 // If our CPU is little endian, then the above logic will 364 // If our CPU is little endian, then the above logic will
614 // produce the following indices: 365 // produce the following indices:
615 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o 366 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o
616 367
617 t = (x ^ (x >> 36)) & 0xFC0ULL; 368 x = swap_shift<36>(x, 0xFC0ULL);
618 x = x ^ t ^ (t << 36);
619 369
620 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o 370 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o
621 371
622 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU LL); 372 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU LL);
373 #endif
623 374
624 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p 375 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p
625
626 return x; 376 return x;
627 #endif
628 } 377 }
629 378
630 // This function converts an integer containing four bytes of alpha 379 // This function converts an integer containing four bytes of alpha
631 // values into an integer containing four bytes of indices into R11 EAC. 380 // values into an integer containing four bytes of indices into R11 EAC.
632 // Note, there needs to be a mapping of indices: 381 // Note, there needs to be a mapping of indices:
633 // 0 1 2 3 4 5 6 7 382 // 0 1 2 3 4 5 6 7
634 // 3 2 1 0 4 5 6 7 383 // 3 2 1 0 4 5 6 7
635 // 384 //
636 // To compute this, we first negate each byte, and then add three, which 385 // To compute this, we first negate each byte, and then add three, which
637 // gives the mapping 386 // gives the mapping
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Compress it 474 // Compress it
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); 475 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes);
727 ++encPtr; 476 ++encPtr;
728 } 477 }
729 src += 4 * rowBytes; 478 src += 4 * rowBytes;
730 } 479 }
731 return true; 480 return true;
732 } 481 }
733 #endif // COMPRESS_R11_EAC_FASTEST 482 #endif // COMPRESS_R11_EAC_FASTEST
734 483
484 ////////////////////////////////////////////////////////////////////////////////
485 //
486 // Utility functions used by the blitter
487 //
488 ////////////////////////////////////////////////////////////////////////////////
489
735 // The R11 EAC format expects that indices are given in column-major order. Sinc e 490 // The R11 EAC format expects that indices are given in column-major order. Sinc e
736 // we receive alpha values in raster order, this usually means that we have to u se 491 // we receive alpha values in raster order, this usually means that we have to u se
737 // pack6 above to properly pack our indices. However, if our indices come from t he 492 // pack6 above to properly pack our indices. However, if our indices come from t he
738 // blitter, then each integer will be a column of indices, and hence can be effi ciently 493 // blitter, then each integer will be a column of indices, and hence can be effi ciently
739 // packed. This function takes the bottom three bits of each byte and places the m in 494 // packed. This function takes the bottom three bits of each byte and places the m in
740 // the least significant 12 bits of the resulting integer. 495 // the least significant 12 bits of the resulting integer.
741 static inline uint32_t pack_indices_vertical(uint32_t x) { 496 static inline uint32_t pack_indices_vertical(uint32_t x) {
742 #if defined (SK_CPU_BENDIAN) 497 #if defined (SK_CPU_BENDIAN)
743 return 498 return
744 (x & 7) | 499 (x & 7) |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); 543 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);
789 544
790 return SkEndian_SwapBE64(0x8490000000000000ULL | 545 return SkEndian_SwapBE64(0x8490000000000000ULL |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) | 546 (static_cast<uint64_t>(packedIndexColumn0) << 36) |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) | 547 (static_cast<uint64_t>(packedIndexColumn1) << 24) |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) | 548 static_cast<uint64_t>(packedIndexColumn2 << 12) |
794 static_cast<uint64_t>(packedIndexColumn3)); 549 static_cast<uint64_t>(packedIndexColumn3));
795 550
796 } 551 }
797 552
798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,
799 int width, int height, int rowBytes) {
800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
802 #elif COMPRESS_R11_EAC_FASTEST
803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
804 #else
805 #error "Must choose R11 EAC algorithm"
806 #endif
807 }
808
809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d 553 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d
810 // to store, as an integer, the four alpha values that will be placed within eac h 554 // to store, as an integer, the four alpha values that will be placed within eac h
811 // of the columns in the range [col, col+colsLeft). 555 // of the columns in the range [col, col+colsLeft).
812 static inline void update_block_columns( 556 static inline void update_block_columns(uint32_t* block, const int col,
813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4, 557 const int colsLeft, const uint32_t curAl phai) {
814 const int col, const int colsLeft, const uint32_t curAlphai) { 558 SkASSERT(NULL != block);
815 SkASSERT(NULL != blockCol1);
816 SkASSERT(NULL != blockCol2);
817 SkASSERT(NULL != blockCol3);
818 SkASSERT(NULL != blockCol4);
819 SkASSERT(col + colsLeft <= 4); 559 SkASSERT(col + colsLeft <= 4);
560
820 for (int i = col; i < (col + colsLeft); ++i) { 561 for (int i = col; i < (col + colsLeft); ++i) {
821 switch(i) { 562 block[i] = curAlphai;
822 case 0:
823 *blockCol1 = curAlphai;
824 break;
825 case 1:
826 *blockCol2 = curAlphai;
827 break;
828 case 2:
829 *blockCol3 = curAlphai;
830 break;
831 case 3:
832 *blockCol4 = curAlphai;
833 break;
834 }
835 } 563 }
836 } 564 }
837 565
838 //////////////////////////////////////////////////////////////////////////////// 566 ////////////////////////////////////////////////////////////////////////////////
839 567
840 namespace SkTextureCompressor { 568 namespace SkTextureCompressor {
841 569
842 static inline size_t get_compressed_data_size(Format fmt, int width, int height) { 570 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
843 switch (fmt) {
844 // These formats are 64 bits per 4x4 block.
845 case kR11_EAC_Format:
846 case kLATC_Format:
847 {
848 static const int kLATCEncodedBlockSize = 8;
849 571
850 const int blocksX = width / kLATCBlockSize; 572 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
851 const int blocksY = height / kLATCBlockSize;
852 573
853 return blocksX * blocksY * kLATCEncodedBlockSize; 574 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
854 }
855 575
856 default: 576 #elif COMPRESS_R11_EAC_FASTEST
857 SkFAIL("Unknown compressed format!"); 577
858 return 0; 578 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
859 } 579
580 #else
581 #error "Must choose R11 EAC algorithm"
582 #endif
860 } 583 }
861 584
862 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol orType, 585 // This class implements a blitter that blits directly into a buffer that will
863 int width, int height, int rowBytes, Format format, bool opt) { 586 // be used as an R11 EAC compressed texture. We compute this buffer by
864 CompressionProc proc = NULL; 587 // buffering four scan lines and then outputting them all at once. This blitter
865 if (opt) { 588 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType.
866 proc = SkTextureCompressorGetPlatformProc(srcColorType, format); 589 class R11_EACBlitter : public SkBlitter {
590 public:
591 R11_EACBlitter(int width, int height, void *compressedBuffer);
592 virtual ~R11_EACBlitter() { this->flushRuns(); }
593
594 // Blit a horizontal run of one or more pixels.
595 virtual void blitH(int x, int y, int width) SK_OVERRIDE {
596 // This function is intended to be called from any standard RGB
597 // buffer, so we should never encounter it. However, if some code
598 // path does end up here, then this needs to be investigated.
599 SkFAIL("Not implemented!");
600 }
601
602 // Blit a horizontal run of antialiased pixels; runs[] is a *sparse*
603 // zero-terminated run-length encoding of spans of constant alpha values.
604 virtual void blitAntiH(int x, int y,
605 const SkAlpha antialias[],
606 const int16_t runs[]) SK_OVERRIDE;
607
608 // Blit a vertical run of pixels with a constant alpha value.
609 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE {
610 // This function is currently not implemented. It is not explicitly
611 // required by the contract, but if at some time a code path runs into
612 // this function (which is entirely possible), it needs to be implemente d.
613 //
614 // TODO (krajcevski):
615 // This function will be most easily implemented in one of two ways:
616 // 1. Buffer each vertical column value and then construct a list
617 // of alpha values and output all of the blocks at once. This only
618 // requires a write to the compressed buffer
619 // 2. Replace the indices of each block with the proper indices based
620 // on the alpha value. This requires a read and write of the compress ed
621 // buffer, but much less overhead.
622 SkFAIL("Not implemented!");
867 } 623 }
868 624
869 if (NULL == proc) { 625 // Blit a solid rectangle one or more pixels wide.
870 switch (srcColorType) { 626 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE {
871 case kAlpha_8_SkColorType: 627 // Analogous to blitRow, this function is intended for RGB targets
872 { 628 // and should never be called by this blitter. Any calls to this functio n
873 switch (format) { 629 // are probably a bug and should be investigated.
874 case kLATC_Format: 630 SkFAIL("Not implemented!");
875 proc = compress_a8_to_latc;
876 break;
877 case kR11_EAC_Format:
878 proc = compress_a8_to_r11eac;
879 break;
880 default:
881 // Do nothing...
882 break;
883 }
884 }
885 break;
886
887 default:
888 // Do nothing...
889 break;
890 }
891 } 631 }
892 632
893 if (NULL != proc) { 633 // Blit a rectangle with one alpha-blended column on the left,
894 return proc(dst, src, width, height, rowBytes); 634 // width (zero or more) opaque pixels, and one alpha-blended column
635 // on the right. The result will always be at least two pixels wide.
636 virtual void blitAntiRect(int x, int y, int width, int height,
637 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE {
638 // This function is currently not implemented. It is not explicitly
639 // required by the contract, but if at some time a code path runs into
640 // this function (which is entirely possible), it needs to be implemente d.
641 //
642 // TODO (krajcevski):
643 // This function will be most easily implemented as follows:
644 // 1. If width/height are smaller than a block, then update the
645 // indices of the affected blocks.
646 // 2. If width/height are larger than a block, then construct a 9-patch
647 // of block encodings that represent the rectangle, and write them
648 // to the compressed buffer as necessary. Whether or not the blocks
649 // are overwritten by zeros or just their indices are updated is up
650 // to debate.
651 SkFAIL("Not implemented!");
895 } 652 }
896 653
897 return false; 654 // Blit a pattern of pixels defined by a rectangle-clipped mask;
898 } 655 // typically used for text.
899 656 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE {
900 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) { 657 // This function is currently not implemented. It is not explicitly
901 SkAutoLockPixels alp(bitmap); 658 // required by the contract, but if at some time a code path runs into
902 659 // this function (which is entirely possible), it needs to be implemente d.
903 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi tmap.height()); 660 //
904 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels()); 661 // TODO (krajcevski):
905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); 662 // This function will be most easily implemented in the same way as
906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), 663 // blitAntiRect above.
907 bitmap.rowBytes(), format)) { 664 SkFAIL("Not implemented!");
908 return SkData::NewFromMalloc(dst, compressedDataSize);
909 } 665 }
910 666
911 sk_free(dst); 667 // If the blitter just sets a single value for each pixel, return the
912 return NULL; 668 // bitmap it draws into, and assign value. If not, return NULL and ignore
913 } 669 // the value parameter.
670 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE {
671 return NULL;
672 }
673
674 /**
675 * Compressed texture blitters only really work correctly if they get
676 * four blocks at a time. That being said, this blitter tries it's best
677 * to preserve semantics if blitAntiH doesn't get called in too many
678 * weird ways...
679 */
680 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; }
681
682 protected:
683 virtual void onNotifyFinished() { this->flushRuns(); }
684
685 private:
686 static const int kR11_EACBlockSz = 4;
687 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz;
688
689 // The longest possible run of pixels that this blitter will receive.
690 // This is initialized in the constructor to 0x7FFE, which is one less
691 // than the largest positive 16-bit integer. We make sure that it's one
692 // less for debugging purposes. We also don't make this variable static
693 // in order to make sure that we can construct a valid pointer to it.
694 const int16_t kLongestRun;
695
696 // Usually used in conjunction with kLongestRun. This is initialized to
697 // zero.
698 const SkAlpha kZeroAlpha;
699
700 // This is the information that we buffer whenever we're asked to blit
701 // a row with this blitter.
702 struct BufferedRun {
703 const SkAlpha* fAlphas;
704 const int16_t* fRuns;
705 int fX, fY;
706 } fBufferedRuns[kR11_EACBlockSz];
707
708 // The next row (0-3) that we need to blit. This value should never exceed
709 // the number of rows that we have (kR11_EACBlockSz)
710 int fNextRun;
711
712 // The width and height of the image that we're blitting
713 const int fWidth;
714 const int fHeight;
715
716 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe r
717 // is large enough to store a compressed image of size fWidth*fHeight.
718 uint64_t* const fBuffer;
719
720 // Various utility functions
721 int blocksWide() const { return fWidth / kR11_EACBlockSz; }
722 int blocksTall() const { return fHeight / kR11_EACBlockSz; }
723 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; }
724
725 // Returns the block index for the block containing pixel (x, y). Block
726 // indices start at zero and proceed in raster order.
727 int getBlockOffset(int x, int y) const {
728 SkASSERT(x < fWidth);
729 SkASSERT(y < fHeight);
730 const int blockCol = x / kR11_EACBlockSz;
731 const int blockRow = y / kR11_EACBlockSz;
732 return blockRow * this->blocksWide() + blockCol;
733 }
734
735 // Returns a pointer to the block containing pixel (x, y)
736 uint64_t *getBlock(int x, int y) const {
737 return fBuffer + this->getBlockOffset(x, y);
738 }
739
740 // The following function writes the buffered runs to compressed blocks.
741 // If fNextRun < 4, then we fill the runs that we haven't buffered with
742 // the constant zero buffer.
743 void flushRuns();
744 };
745
914 746
915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) 747 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)
916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for 748 // 0x7FFE is one minus the largest positive 16-bit int. We use it for
917 // debugging to make sure that we're properly setting the nextX distance 749 // debugging to make sure that we're properly setting the nextX distance
918 // in flushRuns(). 750 // in flushRuns().
919 : kLongestRun(0x7FFE), kZeroAlpha(0) 751 : kLongestRun(0x7FFE), kZeroAlpha(0)
920 , fNextRun(0) 752 , fNextRun(0)
921 , fWidth(width) 753 , fWidth(width)
922 , fHeight(height) 754 , fHeight(height)
923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) 755 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
1020 // nextX -- for each run, the next point at which we need to update curAlpha Column 852 // nextX -- for each run, the next point at which we need to update curAlpha Column
1021 // after the value of curX. 853 // after the value of curX.
1022 // finalX -- the minimum of all the nextX values. 854 // finalX -- the minimum of all the nextX values.
1023 // 855 //
1024 // curX advances to finalX outputting any blocks that it passes along 856 // curX advances to finalX outputting any blocks that it passes along
1025 // the way. Since finalX will not change when we reach the end of a 857 // the way. Since finalX will not change when we reach the end of a
1026 // run, the termination criteria will be whenever curX == finalX at the 858 // run, the termination criteria will be whenever curX == finalX at the
1027 // end of a loop. 859 // end of a loop.
1028 860
1029 // Setup: 861 // Setup:
1030 uint32_t c1 = 0; 862 uint32_t c[4] = { 0, 0, 0, 0 };
1031 uint32_t c2 = 0;
1032 uint32_t c3 = 0;
1033 uint32_t c4 = 0;
1034
1035 uint32_t curAlphaColumn = 0; 863 uint32_t curAlphaColumn = 0;
1036 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn); 864 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn);
1037 865
1038 int nextX[kR11_EACBlockSz]; 866 int nextX[kR11_EACBlockSz];
1039 for (int i = 0; i < kR11_EACBlockSz; ++i) { 867 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1040 nextX[i] = 0x7FFFFF; 868 nextX[i] = 0x7FFFFF;
1041 } 869 }
1042 870
1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); 871 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);
1044 872
(...skipping 14 matching lines...) Expand all
1059 // Run the blitter... 887 // Run the blitter...
1060 while (curX != finalX) { 888 while (curX != finalX) {
1061 SkASSERT(finalX >= curX); 889 SkASSERT(finalX >= curX);
1062 890
1063 // Do we need to populate the rest of the block? 891 // Do we need to populate the rest of the block?
1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { 892 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {
1065 const int col = curX & 3; 893 const int col = curX & 3;
1066 const int colsLeft = 4 - col; 894 const int colsLeft = 4 - col;
1067 SkASSERT(curX + colsLeft <= finalX); 895 SkASSERT(curX + colsLeft <= finalX);
1068 896
1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn); 897 update_block_columns(c, col, colsLeft, curAlphaColumn);
1070 898
1071 // Write this block 899 // Write this block
1072 *outPtr = compress_block_vertical(c1, c2, c3, c4); 900 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);
1073 ++outPtr; 901 ++outPtr;
1074 curX += colsLeft; 902 curX += colsLeft;
1075 } 903 }
1076 904
1077 // If we can advance even further, then just keep memsetting the block 905 // If we can advance even further, then just keep memsetting the block
1078 if ((finalX - curX) >= kR11_EACBlockSz) { 906 if ((finalX - curX) >= kR11_EACBlockSz) {
1079 SkASSERT((curX & 3) == 0); 907 SkASSERT((curX & 3) == 0);
1080 908
1081 const int col = 0; 909 const int col = 0;
1082 const int colsLeft = kR11_EACBlockSz; 910 const int colsLeft = kR11_EACBlockSz;
1083 911
1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn); 912 update_block_columns(c, col, colsLeft, curAlphaColumn);
1085 913
1086 // While we can keep advancing, just keep writing the block. 914 // While we can keep advancing, just keep writing the block.
1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4); 915 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3]) ;
1088 while((finalX - curX) >= kR11_EACBlockSz) { 916 while((finalX - curX) >= kR11_EACBlockSz) {
1089 *outPtr = lastBlock; 917 *outPtr = lastBlock;
1090 ++outPtr; 918 ++outPtr;
1091 curX += kR11_EACBlockSz; 919 curX += kR11_EACBlockSz;
1092 } 920 }
1093 } 921 }
1094 922
1095 // If we haven't advanced within the block then do so. 923 // If we haven't advanced within the block then do so.
1096 if (curX < finalX) { 924 if (curX < finalX) {
1097 const int col = curX & 3; 925 const int col = curX & 3;
1098 const int colsLeft = finalX - curX; 926 const int colsLeft = finalX - curX;
1099 927
1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn); 928 update_block_columns(c, col, colsLeft, curAlphaColumn);
1101 929
1102 curX += colsLeft; 930 curX += colsLeft;
1103 } 931 }
1104 932
1105 SkASSERT(curX == finalX); 933 SkASSERT(curX == finalX);
1106 934
1107 // Figure out what the next advancement is... 935 // Figure out what the next advancement is...
1108 for (int i = 0; i < kR11_EACBlockSz; ++i) { 936 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1109 if (nextX[i] == finalX) { 937 if (nextX[i] == finalX) {
1110 const int16_t run = *(fBufferedRuns[i].fRuns); 938 const int16_t run = *(fBufferedRuns[i].fRuns);
1111 fBufferedRuns[i].fRuns += run; 939 fBufferedRuns[i].fRuns += run;
1112 fBufferedRuns[i].fAlphas += run; 940 fBufferedRuns[i].fAlphas += run;
1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas); 941 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1114 nextX[i] += *(fBufferedRuns[i].fRuns); 942 nextX[i] += *(fBufferedRuns[i].fRuns);
1115 } 943 }
1116 } 944 }
1117 945
1118 finalX = 0xFFFFF; 946 finalX = 0xFFFFF;
1119 for (int i = 0; i < kR11_EACBlockSz; ++i) { 947 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1120 finalX = SkMin32(nextX[i], finalX); 948 finalX = SkMin32(nextX[i], finalX);
1121 } 949 }
1122 } 950 }
1123 951
1124 // If we didn't land on a block boundary, output the block... 952 // If we didn't land on a block boundary, output the block...
1125 if ((curX & 3) > 1) { 953 if ((curX & 3) > 1) {
1126 *outPtr = compress_block_vertical(c1, c2, c3, c4); 954 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);
1127 } 955 }
1128 956
1129 fNextRun = 0; 957 fNextRun = 0;
1130 } 958 }
1131 959
960 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) {
961 return new R11_EACBlitter(width, height, outputBuffer);
962 }
963
1132 } // namespace SkTextureCompressor 964 } // namespace SkTextureCompressor
OLDNEW
« no previous file with comments | « src/utils/SkTextureCompressor_R11EAC.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698