Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: src/utils/SkTextureCompressor_R11EAC.cpp

Issue 403383003: Refactor texture compressors into separate files (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2014 Google Inc. 2 * Copyright 2014 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #include "SkTextureCompressor.h" 8 #include "SkTextureCompressor.h"
9 9
10 #include "SkBitmap.h"
11 #include "SkData.h"
12 #include "SkEndian.h" 10 #include "SkEndian.h"
13 11
14 #include "SkTextureCompression_opts.h"
15
16 ////////////////////////////////////////////////////////////////////////////////
17 //
18 // Utility Functions
19 //
20 ////////////////////////////////////////////////////////////////////////////////
21
22 // Absolute difference between two values. More correct than SkTAbs(a - b)
23 // because it works on unsigned values.
24 template <typename T> inline T abs_diff(const T &a, const T &b) {
25 return (a > b) ? (a - b) : (b - a);
26 }
27
28 static bool is_extremal(uint8_t pixel) {
29 return 0 == pixel || 255 == pixel;
30 }
31
32 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
33
34 // This function is used by both R11 EAC and LATC to compress 4x4 blocks
35 // of 8-bit alpha into 64-bit values that comprise the compressed data.
36 // For both formats, we need to make sure that the dimensions of the
37 // src pixels are divisible by 4, and copy 4x4 blocks one at a time
38 // for compression.
39 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
40 int width, int height, int rowBytes,
41 A84x4To64BitProc proc) {
42 // Make sure that our data is well-formed enough to be considered for compre ssion
43 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
44 return false;
45 }
46
47 int blocksX = width >> 2;
48 int blocksY = height >> 2;
49
50 uint8_t block[16];
51 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
52 for (int y = 0; y < blocksY; ++y) {
53 for (int x = 0; x < blocksX; ++x) {
54 // Load block
55 for (int k = 0; k < 4; ++k) {
56 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
57 }
58
59 // Compress it
60 *encPtr = proc(block);
61 ++encPtr;
62 }
63 src += 4 * rowBytes;
64 }
65
66 return true;
67 }
68
69 ////////////////////////////////////////////////////////////////////////////////
70 //
71 // LATC compressor
72 //
73 ////////////////////////////////////////////////////////////////////////////////
74
75 // LATC compressed texels down into square 4x4 blocks
76 static const int kLATCPaletteSize = 8;
77 static const int kLATCBlockSize = 4;
78 static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize;
79
80 // Generates an LATC palette. LATC constructs
81 // a palette of eight colors from LUM0 and LUM1 using the algorithm:
82 //
83 // LUM0, if lum0 > lum1 and code(x,y) == 0
84 // LUM1, if lum0 > lum1 and code(x,y) == 1
85 // (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2
86 // (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3
87 // (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4
88 // (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5
89 // (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6
90 // ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7
91 //
92 // LUM0, if lum0 <= lum1 and code(x,y) == 0
93 // LUM1, if lum0 <= lum1 and code(x,y) == 1
94 // (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2
95 // (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3
96 // (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4
97 // ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5
98 // 0, if lum0 <= lum1 and code(x,y) == 6
99 // 255, if lum0 <= lum1 and code(x,y) == 7
100
101 static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) {
102 palette[0] = lum0;
103 palette[1] = lum1;
104 if (lum0 > lum1) {
105 for (int i = 1; i < 7; i++) {
106 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7;
107 }
108 } else {
109 for (int i = 1; i < 5; i++) {
110 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5;
111 }
112 palette[6] = 0;
113 palette[7] = 255;
114 }
115 }
116
117 // Compress a block by using the bounding box of the pixels. It is assumed that
118 // there are no extremal pixels in this block otherwise we would have used
119 // compressBlockBBIgnoreExtremal.
120 static uint64_t compress_latc_block_bb(const uint8_t pixels[]) {
121 uint8_t minVal = 255;
122 uint8_t maxVal = 0;
123 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
124 minVal = SkTMin(pixels[i], minVal);
125 maxVal = SkTMax(pixels[i], maxVal);
126 }
127
128 SkASSERT(!is_extremal(minVal));
129 SkASSERT(!is_extremal(maxVal));
130
131 uint8_t palette[kLATCPaletteSize];
132 generate_latc_palette(palette, maxVal, minVal);
133
134 uint64_t indices = 0;
135 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
136
137 // Find the best palette index
138 uint8_t bestError = abs_diff(pixels[i], palette[0]);
139 uint8_t idx = 0;
140 for (int j = 1; j < kLATCPaletteSize; ++j) {
141 uint8_t error = abs_diff(pixels[i], palette[j]);
142 if (error < bestError) {
143 bestError = error;
144 idx = j;
145 }
146 }
147
148 indices <<= 3;
149 indices |= idx;
150 }
151
152 return
153 SkEndian_SwapLE64(
154 static_cast<uint64_t>(maxVal) |
155 (static_cast<uint64_t>(minVal) << 8) |
156 (indices << 16));
157 }
158
159 // Compress a block by using the bounding box of the pixels without taking into
160 // account the extremal values. The generated palette will contain extremal valu es
161 // and fewer points along the line segment to interpolate.
162 static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) {
163 uint8_t minVal = 255;
164 uint8_t maxVal = 0;
165 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
166 if (is_extremal(pixels[i])) {
167 continue;
168 }
169
170 minVal = SkTMin(pixels[i], minVal);
171 maxVal = SkTMax(pixels[i], maxVal);
172 }
173
174 SkASSERT(!is_extremal(minVal));
175 SkASSERT(!is_extremal(maxVal));
176
177 uint8_t palette[kLATCPaletteSize];
178 generate_latc_palette(palette, minVal, maxVal);
179
180 uint64_t indices = 0;
181 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
182
183 // Find the best palette index
184 uint8_t idx = 0;
185 if (is_extremal(pixels[i])) {
186 if (0xFF == pixels[i]) {
187 idx = 7;
188 } else if (0 == pixels[i]) {
189 idx = 6;
190 } else {
191 SkFAIL("Pixel is extremal but not really?!");
192 }
193 } else {
194 uint8_t bestError = abs_diff(pixels[i], palette[0]);
195 for (int j = 1; j < kLATCPaletteSize - 2; ++j) {
196 uint8_t error = abs_diff(pixels[i], palette[j]);
197 if (error < bestError) {
198 bestError = error;
199 idx = j;
200 }
201 }
202 }
203
204 indices <<= 3;
205 indices |= idx;
206 }
207
208 return
209 SkEndian_SwapLE64(
210 static_cast<uint64_t>(minVal) |
211 (static_cast<uint64_t>(maxVal) << 8) |
212 (indices << 16));
213 }
214
215
216 // Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from tw o
217 // values LUM0 and LUM1, and an index into the generated palette. Details of how
218 // the palette is generated can be found in the comments of generatePalette abov e.
219 //
220 // We choose which palette type to use based on whether or not 'pixels' contains
221 // any extremal values (0 or 255). If there are extremal values, then we use the
222 // palette that has the extremal values built in. Otherwise, we use the full bou nding
223 // box.
224
225 static uint64_t compress_latc_block(const uint8_t pixels[]) {
226 // Collect unique pixels
227 int nUniquePixels = 0;
228 uint8_t uniquePixels[kLATCPixelsPerBlock];
229 for (int i = 0; i < kLATCPixelsPerBlock; ++i) {
230 bool foundPixel = false;
231 for (int j = 0; j < nUniquePixels; ++j) {
232 foundPixel = foundPixel || uniquePixels[j] == pixels[i];
233 }
234
235 if (!foundPixel) {
236 uniquePixels[nUniquePixels] = pixels[i];
237 ++nUniquePixels;
238 }
239 }
240
241 // If there's only one unique pixel, then our compression is easy.
242 if (1 == nUniquePixels) {
243 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8));
244
245 // Similarly, if there are only two unique pixels, then our compression is
246 // easy again: place the pixels in the block header, and assign the indices
247 // with one or zero depending on which pixel they belong to.
248 } else if (2 == nUniquePixels) {
249 uint64_t outBlock = 0;
250 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) {
251 int idx = 0;
252 if (pixels[i] == uniquePixels[1]) {
253 idx = 1;
254 }
255
256 outBlock <<= 3;
257 outBlock |= idx;
258 }
259 outBlock <<= 16;
260 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8));
261 return SkEndian_SwapLE64(outBlock);
262 }
263
264 // Count non-maximal pixel values
265 int nonExtremalPixels = 0;
266 for (int i = 0; i < nUniquePixels; ++i) {
267 if (!is_extremal(uniquePixels[i])) {
268 ++nonExtremalPixels;
269 }
270 }
271
272 // If all the pixels are nonmaximal then compute the palette using
273 // the bounding box of all the pixels.
274 if (nonExtremalPixels == nUniquePixels) {
275 // This is really just for correctness, in all of my tests we
276 // never take this step. We don't lose too much perf here because
277 // most of the processing in this function is worth it for the
278 // 1 == nUniquePixels optimization.
279 return compress_latc_block_bb(pixels);
280 } else {
281 return compress_latc_block_bb_ignore_extremal(pixels);
282 }
283 }
284
285 static inline bool compress_a8_to_latc(uint8_t* dst, const uint8_t* src,
286 int width, int height, int rowBytes) {
287 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ latc_block);
288 }
289
290 ////////////////////////////////////////////////////////////////////////////////
291 //
292 // R11 EAC Compressor
293 //
294 ////////////////////////////////////////////////////////////////////////////////
295
296 // #define COMPRESS_R11_EAC_SLOW 1 12 // #define COMPRESS_R11_EAC_SLOW 1
297 // #define COMPRESS_R11_EAC_FAST 1 13 // #define COMPRESS_R11_EAC_FAST 1
298 #define COMPRESS_R11_EAC_FASTEST 1 14 #define COMPRESS_R11_EAC_FASTEST 1
299 15
300 // Blocks compressed into R11 EAC are represented as follows: 16 // Blocks compressed into R11 EAC are represented as follows:
301 // 0000000000000000000000000000000000000000000000000000000000000000 17 // 0000000000000000000000000000000000000000000000000000000000000000
302 // |base_cw|mod|mul| ----------------- indices ------------------- 18 // |base_cw|mod|mul| ----------------- indices -------------------
303 // 19 //
304 // To reconstruct the value of a given pixel, we use the formula: 20 // To reconstruct the value of a given pixel, we use the formula:
305 // clamp[0, 2047](base_cw * 8 + 4 + mod_val*mul*8) 21 // clamp[0, 2047](base_cw * 8 + 4 + mod_val*mul*8)
(...skipping 235 matching lines...) Expand 10 before | Expand all | Expand 10 after
541 // primarily for compressing alpha maps. Usually the only 257 // primarily for compressing alpha maps. Usually the only
542 // non-fullly opaque or fully transparent blocks are not a solid 258 // non-fullly opaque or fully transparent blocks are not a solid
543 // intermediate color. If we notice that they are, then we can 259 // intermediate color. If we notice that they are, then we can
544 // add another optimization... 260 // add another optimization...
545 break; 261 break;
546 } 262 }
547 } 263 }
548 264
549 return compress_heterogeneous_r11eac_block(block); 265 return compress_heterogeneous_r11eac_block(block);
550 } 266 }
267
268 // This function is used by R11 EAC to compress 4x4 blocks
269 // of 8-bit alpha into 64-bit values that comprise the compressed data.
robertphillips 2014/07/21 21:56:57 both formats ?
krajcevski 2014/07/21 22:03:55 Done.
270 // For both formats, we need to make sure that the dimensions of the
271 // src pixels are divisible by 4, and copy 4x4 blocks one at a time
272 // for compression.
273 typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]);
274
275 static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src,
276 int width, int height, int rowBytes,
277 A84x4To64BitProc proc) {
278 // Make sure that our data is well-formed enough to be considered for compre ssion
279 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) {
280 return false;
281 }
282
283 int blocksX = width >> 2;
284 int blocksY = height >> 2;
285
286 uint8_t block[16];
287 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst);
288 for (int y = 0; y < blocksY; ++y) {
289 for (int x = 0; x < blocksX; ++x) {
290 // Load block
291 for (int k = 0; k < 4; ++k) {
292 memcpy(block + k*4, src + k*rowBytes + 4*x, 4);
293 }
294
295 // Compress it
296 *encPtr = proc(block);
297 ++encPtr;
298 }
299 src += 4 * rowBytes;
300 }
301
302 return true;
303 }
551 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST) 304 #endif // (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
552 305
553 #if COMPRESS_R11_EAC_FASTEST 306 #if COMPRESS_R11_EAC_FASTEST
307 template<unsigned shift>
308 static inline uint64_t swap_shift(uint64_t x, uint64_t mask) {
309 const uint64_t t = (x ^ (x >> shift)) & mask;
310 return x ^ t ^ (t << shift);
311 }
312
554 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) { 313 static inline uint64_t interleave6(uint64_t topRows, uint64_t bottomRows) {
555 // If our 3-bit block indices are laid out as: 314 // If our 3-bit block indices are laid out as:
556 // a b c d 315 // a b c d
557 // e f g h 316 // e f g h
558 // i j k l 317 // i j k l
559 // m n o p 318 // m n o p
560 // 319 //
561 // This function expects topRows and bottomRows to contain the first two row s 320 // This function expects topRows and bottomRows to contain the first two row s
562 // of indices interleaved in the least significant bits of a and b. In other words... 321 // of indices interleaved in the least significant bits of a and b. In other words...
563 // 322 //
(...skipping 13 matching lines...) Expand all
577 // 336 //
578 // !SPEED! this function might be even faster if certain SIMD intrinsics are 337 // !SPEED! this function might be even faster if certain SIMD intrinsics are
579 // used.. 338 // used..
580 339
581 // For both architectures, we can figure out a packing of the bits by 340 // For both architectures, we can figure out a packing of the bits by
582 // using a shuffle and a few shift-rotates... 341 // using a shuffle and a few shift-rotates...
583 uint64_t x = (static_cast<uint64_t>(topRows) << 32) | static_cast<uint64_t>( bottomRows); 342 uint64_t x = (static_cast<uint64_t>(topRows) << 32) | static_cast<uint64_t>( bottomRows);
584 343
585 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p 344 // x: 00 a e 00 b f 00 c g 00 d h 00 i m 00 j n 00 k o 00 l p
586 345
587 uint64_t t = (x ^ (x >> 10)) & 0x3FC0003FC00000ULL; 346 x = swap_shift<10>(x, 0x3FC0003FC00000ULL);
588 x = x ^ t ^ (t << 10);
589 347
590 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p 348 // x: b f 00 00 00 a e c g i m 00 00 00 d h j n 00 k o 00 l p
591 349
592 x = (x | ((x << 52) & (0x3FULL << 52)) | ((x << 20) & (0x3FULL << 28))) >> 1 6; 350 x = (x | ((x << 52) & (0x3FULL << 52)) | ((x << 20) & (0x3FULL << 28))) >> 1 6;
593 351
594 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n 352 // x: 00 00 00 00 00 00 00 00 b f l p a e c g i m k o d h j n
595 353
596 t = (x ^ (x >> 6)) & 0xFC0000ULL; 354 x = swap_shift<6>(x, 0xFC0000ULL);
597 x = x ^ t ^ (t << 6);
598 355
599 #if defined (SK_CPU_BENDIAN) 356 #if defined (SK_CPU_BENDIAN)
600 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n 357 // x: 00 00 00 00 00 00 00 00 b f l p a e i m c g k o d h j n
601 358
602 t = (x ^ (x >> 36)) & 0x3FULL; 359 x = swap_shift<36>(x, 0x3FULL);
603 x = x ^ t ^ (t << 36);
604 360
605 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p 361 // x: 00 00 00 00 00 00 00 00 b f j n a e i m c g k o d h l p
606 362
607 t = (x ^ (x >> 12)) & 0xFFF000000ULL; 363 x = swap_shift<12>(x, 0xFFF000000ULL);
608 x = x ^ t ^ (t << 12);
609
610 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p
611 return x;
612 #else 364 #else
613 // If our CPU is little endian, then the above logic will 365 // If our CPU is little endian, then the above logic will
614 // produce the following indices: 366 // produce the following indices:
615 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o 367 // x: 00 00 00 00 00 00 00 00 c g i m d h l p b f j n a e k o
616 368
617 t = (x ^ (x >> 36)) & 0xFC0ULL; 369 x = swap_shift<36>(x, 0xFC0ULL);
618 x = x ^ t ^ (t << 36);
619 370
620 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o 371 // x: 00 00 00 00 00 00 00 00 a e i m d h l p b f j n c g k o
621 372
622 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU LL); 373 x = (x & (0xFFFULL << 36)) | ((x & 0xFFFFFFULL) << 12) | ((x >> 24) & 0xFFFU LL);
374 #endif
623 375
624 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p 376 // x: 00 00 00 00 00 00 00 00 a e i m b f j n c g k o d h l p
625
626 return x; 377 return x;
627 #endif
628 } 378 }
629 379
630 // This function converts an integer containing four bytes of alpha 380 // This function converts an integer containing four bytes of alpha
631 // values into an integer containing four bytes of indices into R11 EAC. 381 // values into an integer containing four bytes of indices into R11 EAC.
632 // Note, there needs to be a mapping of indices: 382 // Note, there needs to be a mapping of indices:
633 // 0 1 2 3 4 5 6 7 383 // 0 1 2 3 4 5 6 7
634 // 3 2 1 0 4 5 6 7 384 // 3 2 1 0 4 5 6 7
635 // 385 //
636 // To compute this, we first negate each byte, and then add three, which 386 // To compute this, we first negate each byte, and then add three, which
637 // gives the mapping 387 // gives the mapping
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
725 // Compress it 475 // Compress it
726 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes); 476 *encPtr = compress_r11eac_block_fast(src + 4*x, rowBytes);
727 ++encPtr; 477 ++encPtr;
728 } 478 }
729 src += 4 * rowBytes; 479 src += 4 * rowBytes;
730 } 480 }
731 return true; 481 return true;
732 } 482 }
733 #endif // COMPRESS_R11_EAC_FASTEST 483 #endif // COMPRESS_R11_EAC_FASTEST
734 484
485 ////////////////////////////////////////////////////////////////////////////////
486 //
487 // Utility functions used by the blitter
488 //
489 ////////////////////////////////////////////////////////////////////////////////
490
735 // The R11 EAC format expects that indices are given in column-major order. Sinc e 491 // The R11 EAC format expects that indices are given in column-major order. Sinc e
736 // we receive alpha values in raster order, this usually means that we have to u se 492 // we receive alpha values in raster order, this usually means that we have to u se
737 // pack6 above to properly pack our indices. However, if our indices come from t he 493 // pack6 above to properly pack our indices. However, if our indices come from t he
738 // blitter, then each integer will be a column of indices, and hence can be effi ciently 494 // blitter, then each integer will be a column of indices, and hence can be effi ciently
739 // packed. This function takes the bottom three bits of each byte and places the m in 495 // packed. This function takes the bottom three bits of each byte and places the m in
740 // the least significant 12 bits of the resulting integer. 496 // the least significant 12 bits of the resulting integer.
741 static inline uint32_t pack_indices_vertical(uint32_t x) { 497 static inline uint32_t pack_indices_vertical(uint32_t x) {
742 #if defined (SK_CPU_BENDIAN) 498 #if defined (SK_CPU_BENDIAN)
743 return 499 return
744 (x & 7) | 500 (x & 7) |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
788 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3); 544 const uint32_t packedIndexColumn3 = pack_indices_vertical(indexColumn3);
789 545
790 return SkEndian_SwapBE64(0x8490000000000000ULL | 546 return SkEndian_SwapBE64(0x8490000000000000ULL |
791 (static_cast<uint64_t>(packedIndexColumn0) << 36) | 547 (static_cast<uint64_t>(packedIndexColumn0) << 36) |
792 (static_cast<uint64_t>(packedIndexColumn1) << 24) | 548 (static_cast<uint64_t>(packedIndexColumn1) << 24) |
793 static_cast<uint64_t>(packedIndexColumn2 << 12) | 549 static_cast<uint64_t>(packedIndexColumn2 << 12) |
794 static_cast<uint64_t>(packedIndexColumn3)); 550 static_cast<uint64_t>(packedIndexColumn3));
795 551
796 } 552 }
797 553
798 static inline bool compress_a8_to_r11eac(uint8_t* dst, const uint8_t* src,
799 int width, int height, int rowBytes) {
800 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
801 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
802 #elif COMPRESS_R11_EAC_FASTEST
803 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
804 #else
805 #error "Must choose R11 EAC algorithm"
806 #endif
807 }
808
809 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d 554 // Updates the block whose columns are stored in blockColN. curAlphai is expecte d
810 // to store, as an integer, the four alpha values that will be placed within eac h 555 // to store, as an integer, the four alpha values that will be placed within eac h
811 // of the columns in the range [col, col+colsLeft). 556 // of the columns in the range [col, col+colsLeft).
812 static inline void update_block_columns( 557 static inline void update_block_columns(uint32_t* block, const int col,
813 uint32_t* blockCol1, uint32_t* blockCol2, uint32_t* blockCol3, uint32_t* blo ckCol4, 558 const int colsLeft, const uint32_t curAl phai) {
814 const int col, const int colsLeft, const uint32_t curAlphai) { 559 SkASSERT(NULL != block);
815 SkASSERT(NULL != blockCol1);
816 SkASSERT(NULL != blockCol2);
817 SkASSERT(NULL != blockCol3);
818 SkASSERT(NULL != blockCol4);
819 SkASSERT(col + colsLeft <= 4); 560 SkASSERT(col + colsLeft <= 4);
561
820 for (int i = col; i < (col + colsLeft); ++i) { 562 for (int i = col; i < (col + colsLeft); ++i) {
821 switch(i) { 563 block[i] = curAlphai;
822 case 0:
823 *blockCol1 = curAlphai;
824 break;
825 case 1:
826 *blockCol2 = curAlphai;
827 break;
828 case 2:
829 *blockCol3 = curAlphai;
830 break;
831 case 3:
832 *blockCol4 = curAlphai;
833 break;
834 }
835 } 564 }
836 } 565 }
837 566
838 //////////////////////////////////////////////////////////////////////////////// 567 ////////////////////////////////////////////////////////////////////////////////
839 568
840 namespace SkTextureCompressor { 569 namespace SkTextureCompressor {
841 570
842 static inline size_t get_compressed_data_size(Format fmt, int width, int height) { 571 bool CompressA8ToR11EAC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) {
843 switch (fmt) {
844 // These formats are 64 bits per 4x4 block.
845 case kR11_EAC_Format:
846 case kLATC_Format:
847 {
848 static const int kLATCEncodedBlockSize = 8;
849 572
850 const int blocksX = width / kLATCBlockSize; 573 #if (COMPRESS_R11_EAC_SLOW) || (COMPRESS_R11_EAC_FAST)
851 const int blocksY = height / kLATCBlockSize;
852 574
853 return blocksX * blocksY * kLATCEncodedBlockSize; 575 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_ r11eac_block);
854 }
855 576
856 default: 577 #elif COMPRESS_R11_EAC_FASTEST
857 SkFAIL("Unknown compressed format!"); 578
858 return 0; 579 return compress_a8_to_r11eac_fast(dst, src, width, height, rowBytes);
859 } 580
581 #else
582 #error "Must choose R11 EAC algorithm"
583 #endif
860 } 584 }
861 585
862 bool CompressBufferToFormat(uint8_t* dst, const uint8_t* src, SkColorType srcCol orType, 586 // This class implements a blitter that blits directly into a buffer that will
863 int width, int height, int rowBytes, Format format, bool opt) { 587 // be used as an R11 EAC compressed texture. We compute this buffer by
864 CompressionProc proc = NULL; 588 // buffering four scan lines and then outputting them all at once. This blitter
865 if (opt) { 589 // is only expected to be used with alpha masks, i.e. kAlpha8_SkColorType.
866 proc = SkTextureCompressorGetPlatformProc(srcColorType, format); 590 class R11_EACBlitter : public SkBlitter {
591 public:
592 R11_EACBlitter(int width, int height, void *compressedBuffer);
593 virtual ~R11_EACBlitter() { this->flushRuns(); }
594
595 // Blit a horizontal run of one or more pixels.
596 virtual void blitH(int x, int y, int width) SK_OVERRIDE {
597 // This function is intended to be called from any standard RGB
598 // buffer, so we should never encounter it. However, if some code
599 // path does end up here, then this needs to be investigated.
600 SkFAIL("Not implemented!");
601 }
602
603 // Blit a horizontal run of antialiased pixels; runs[] is a *sparse*
604 // zero-terminated run-length encoding of spans of constant alpha values.
605 virtual void blitAntiH(int x, int y,
606 const SkAlpha antialias[],
607 const int16_t runs[]) SK_OVERRIDE;
608
609 // Blit a vertical run of pixels with a constant alpha value.
610 virtual void blitV(int x, int y, int height, SkAlpha alpha) SK_OVERRIDE {
611 // This function is currently not implemented. It is not explicitly
612 // required by the contract, but if at some time a code path runs into
613 // this function (which is entirely possible), it needs to be implemente d.
614 //
615 // TODO (krajcevski):
616 // This function will be most easily implemented in one of two ways:
617 // 1. Buffer each vertical column value and then construct a list
618 // of alpha values and output all of the blocks at once. This only
619 // requires a write to the compressed buffer
620 // 2. Replace the indices of each block with the proper indices based
621 // on the alpha value. This requires a read and write of the compress ed
622 // buffer, but much less overhead.
623 SkFAIL("Not implemented!");
867 } 624 }
868 625
869 if (NULL == proc) { 626 // Blit a solid rectangle one or more pixels wide.
870 switch (srcColorType) { 627 virtual void blitRect(int x, int y, int width, int height) SK_OVERRIDE {
871 case kAlpha_8_SkColorType: 628 // Analogous to blitRow, this function is intended for RGB targets
872 { 629 // and should never be called by this blitter. Any calls to this functio n
873 switch (format) { 630 // are probably a bug and should be investigated.
874 case kLATC_Format: 631 SkFAIL("Not implemented!");
875 proc = compress_a8_to_latc;
876 break;
877 case kR11_EAC_Format:
878 proc = compress_a8_to_r11eac;
879 break;
880 default:
881 // Do nothing...
882 break;
883 }
884 }
885 break;
886
887 default:
888 // Do nothing...
889 break;
890 }
891 } 632 }
892 633
893 if (NULL != proc) { 634 // Blit a rectangle with one alpha-blended column on the left,
894 return proc(dst, src, width, height, rowBytes); 635 // width (zero or more) opaque pixels, and one alpha-blended column
636 // on the right. The result will always be at least two pixels wide.
637 virtual void blitAntiRect(int x, int y, int width, int height,
638 SkAlpha leftAlpha, SkAlpha rightAlpha) SK_OVERRIDE {
639 // This function is currently not implemented. It is not explicitly
640 // required by the contract, but if at some time a code path runs into
641 // this function (which is entirely possible), it needs to be implemente d.
642 //
643 // TODO (krajcevski):
644 // This function will be most easily implemented as follows:
645 // 1. If width/height are smaller than a block, then update the
646 // indices of the affected blocks.
647 // 2. If width/height are larger than a block, then construct a 9-patch
648 // of block encodings that represent the rectangle, and write them
649 // to the compressed buffer as necessary. Whether or not the blocks
650 // are overwritten by zeros or just their indices are updated is up
651 // to debate.
652 SkFAIL("Not implemented!");
895 } 653 }
896 654
897 return false; 655 // Blit a pattern of pixels defined by a rectangle-clipped mask;
898 } 656 // typically used for text.
899 657 virtual void blitMask(const SkMask&, const SkIRect& clip) SK_OVERRIDE {
900 SkData *CompressBitmapToFormat(const SkBitmap &bitmap, Format format) { 658 // This function is currently not implemented. It is not explicitly
901 SkAutoLockPixels alp(bitmap); 659 // required by the contract, but if at some time a code path runs into
902 660 // this function (which is entirely possible), it needs to be implemente d.
903 int compressedDataSize = get_compressed_data_size(format, bitmap.width(), bi tmap.height()); 661 //
904 const uint8_t* src = reinterpret_cast<const uint8_t*>(bitmap.getPixels()); 662 // TODO (krajcevski):
905 uint8_t* dst = reinterpret_cast<uint8_t*>(sk_malloc_throw(compressedDataSize )); 663 // This function will be most easily implemented in the same way as
906 if (CompressBufferToFormat(dst, src, bitmap.colorType(), bitmap.width(), bit map.height(), 664 // blitAntiRect above.
907 bitmap.rowBytes(), format)) { 665 SkFAIL("Not implemented!");
908 return SkData::NewFromMalloc(dst, compressedDataSize);
909 } 666 }
910 667
911 sk_free(dst); 668 // If the blitter just sets a single value for each pixel, return the
912 return NULL; 669 // bitmap it draws into, and assign value. If not, return NULL and ignore
913 } 670 // the value parameter.
671 virtual const SkBitmap* justAnOpaqueColor(uint32_t* value) SK_OVERRIDE {
672 return NULL;
673 }
674
675 /**
676 * Compressed texture blitters only really work correctly if they get
677 * four blocks at a time. That being said, this blitter tries it's best
678 * to preserve semantics if blitAntiH doesn't get called in too many
679 * weird ways...
680 */
681 virtual int requestRowsPreserved() const { return kR11_EACBlockSz; }
682
683 protected:
684 virtual void onNotifyFinished() { this->flushRuns(); }
685
686 private:
687 static const int kR11_EACBlockSz = 4;
688 static const int kPixelsPerBlock = kR11_EACBlockSz * kR11_EACBlockSz;
689
690 // The longest possible run of pixels that this blitter will receive.
691 // This is initialized in the constructor to 0x7FFE, which is one less
692 // than the largest positive 16-bit integer. We make sure that it's one
693 // less for debugging purposes. We also don't make this variable static
694 // in order to make sure that we can construct a valid pointer to it.
695 const int16_t kLongestRun;
696
697 // Usually used in conjunction with kLongestRun. This is initialized to
698 // zero.
699 const SkAlpha kZeroAlpha;
700
701 // This is the information that we buffer whenever we're asked to blit
702 // a row with this blitter.
703 struct BufferedRun {
704 const SkAlpha* fAlphas;
705 const int16_t* fRuns;
706 int fX, fY;
707 } fBufferedRuns[kR11_EACBlockSz];
708
709 // The next row (0-3) that we need to blit. This value should never exceed
710 // the number of rows that we have (kR11_EACBlockSz)
711 int fNextRun;
712
713 // The width and height of the image that we're blitting
714 const int fWidth;
715 const int fHeight;
716
717 // The R11 EAC buffer that we're blitting into. It is assumed that the buffe r
718 // is large enough to store a compressed image of size fWidth*fHeight.
719 uint64_t* const fBuffer;
720
721 // Various utility functions
722 int blocksWide() const { return fWidth / kR11_EACBlockSz; }
723 int blocksTall() const { return fHeight / kR11_EACBlockSz; }
724 int totalBlocks() const { return (fWidth * fHeight) / kPixelsPerBlock; }
725
726 // Returns the block index for the block containing pixel (x, y). Block
727 // indices start at zero and proceed in raster order.
728 int getBlockOffset(int x, int y) const {
729 SkASSERT(x < fWidth);
730 SkASSERT(y < fHeight);
731 const int blockCol = x / kR11_EACBlockSz;
732 const int blockRow = y / kR11_EACBlockSz;
733 return blockRow * this->blocksWide() + blockCol;
734 }
735
736 // Returns a pointer to the block containing pixel (x, y)
737 uint64_t *getBlock(int x, int y) const {
738 return fBuffer + this->getBlockOffset(x, y);
739 }
740
741 // The following function writes the buffered runs to compressed blocks.
742 // If fNextRun < 4, then we fill the runs that we haven't buffered with
743 // the constant zero buffer.
744 void flushRuns();
745 };
746
914 747
915 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer) 748 R11_EACBlitter::R11_EACBlitter(int width, int height, void *latcBuffer)
916 // 0x7FFE is one minus the largest positive 16-bit int. We use it for 749 // 0x7FFE is one minus the largest positive 16-bit int. We use it for
917 // debugging to make sure that we're properly setting the nextX distance 750 // debugging to make sure that we're properly setting the nextX distance
918 // in flushRuns(). 751 // in flushRuns().
919 : kLongestRun(0x7FFE), kZeroAlpha(0) 752 : kLongestRun(0x7FFE), kZeroAlpha(0)
920 , fNextRun(0) 753 , fNextRun(0)
921 , fWidth(width) 754 , fWidth(width)
922 , fHeight(height) 755 , fHeight(height)
923 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer)) 756 , fBuffer(reinterpret_cast<uint64_t*const>(latcBuffer))
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
1020 // nextX -- for each run, the next point at which we need to update curAlpha Column 853 // nextX -- for each run, the next point at which we need to update curAlpha Column
1021 // after the value of curX. 854 // after the value of curX.
1022 // finalX -- the minimum of all the nextX values. 855 // finalX -- the minimum of all the nextX values.
1023 // 856 //
1024 // curX advances to finalX outputting any blocks that it passes along 857 // curX advances to finalX outputting any blocks that it passes along
1025 // the way. Since finalX will not change when we reach the end of a 858 // the way. Since finalX will not change when we reach the end of a
1026 // run, the termination criteria will be whenever curX == finalX at the 859 // run, the termination criteria will be whenever curX == finalX at the
1027 // end of a loop. 860 // end of a loop.
1028 861
1029 // Setup: 862 // Setup:
1030 uint32_t c1 = 0; 863 uint32_t c[4] = { 0, 0, 0, 0 };
1031 uint32_t c2 = 0;
1032 uint32_t c3 = 0;
1033 uint32_t c4 = 0;
1034
1035 uint32_t curAlphaColumn = 0; 864 uint32_t curAlphaColumn = 0;
1036 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn); 865 SkAlpha *curAlpha = reinterpret_cast<SkAlpha*>(&curAlphaColumn);
1037 866
1038 int nextX[kR11_EACBlockSz]; 867 int nextX[kR11_EACBlockSz];
1039 for (int i = 0; i < kR11_EACBlockSz; ++i) { 868 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1040 nextX[i] = 0x7FFFFF; 869 nextX[i] = 0x7FFFFF;
1041 } 870 }
1042 871
1043 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY); 872 uint64_t* outPtr = this->getBlock(fBufferedRuns[0].fX, fBufferedRuns[0].fY);
1044 873
(...skipping 14 matching lines...) Expand all
1059 // Run the blitter... 888 // Run the blitter...
1060 while (curX != finalX) { 889 while (curX != finalX) {
1061 SkASSERT(finalX >= curX); 890 SkASSERT(finalX >= curX);
1062 891
1063 // Do we need to populate the rest of the block? 892 // Do we need to populate the rest of the block?
1064 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) { 893 if ((finalX - (curX & ~3)) >= kR11_EACBlockSz) {
1065 const int col = curX & 3; 894 const int col = curX & 3;
1066 const int colsLeft = 4 - col; 895 const int colsLeft = 4 - col;
1067 SkASSERT(curX + colsLeft <= finalX); 896 SkASSERT(curX + colsLeft <= finalX);
1068 897
1069 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn); 898 update_block_columns(c, col, colsLeft, curAlphaColumn);
1070 899
1071 // Write this block 900 // Write this block
1072 *outPtr = compress_block_vertical(c1, c2, c3, c4); 901 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);
robertphillips 2014/07/21 21:56:57 Why not pass the pointer into compress_block_verti
krajcevski 2014/07/21 22:03:55 I tried that, it seemed like it was a tad slower,
1073 ++outPtr; 902 ++outPtr;
1074 curX += colsLeft; 903 curX += colsLeft;
1075 } 904 }
1076 905
1077 // If we can advance even further, then just keep memsetting the block 906 // If we can advance even further, then just keep memsetting the block
1078 if ((finalX - curX) >= kR11_EACBlockSz) { 907 if ((finalX - curX) >= kR11_EACBlockSz) {
1079 SkASSERT((curX & 3) == 0); 908 SkASSERT((curX & 3) == 0);
1080 909
1081 const int col = 0; 910 const int col = 0;
1082 const int colsLeft = kR11_EACBlockSz; 911 const int colsLeft = kR11_EACBlockSz;
1083 912
1084 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn); 913 update_block_columns(c, col, colsLeft, curAlphaColumn);
1085 914
1086 // While we can keep advancing, just keep writing the block. 915 // While we can keep advancing, just keep writing the block.
1087 uint64_t lastBlock = compress_block_vertical(c1, c2, c3, c4); 916 uint64_t lastBlock = compress_block_vertical(c[0], c[1], c[2], c[3]) ;
1088 while((finalX - curX) >= kR11_EACBlockSz) { 917 while((finalX - curX) >= kR11_EACBlockSz) {
1089 *outPtr = lastBlock; 918 *outPtr = lastBlock;
1090 ++outPtr; 919 ++outPtr;
1091 curX += kR11_EACBlockSz; 920 curX += kR11_EACBlockSz;
1092 } 921 }
1093 } 922 }
1094 923
1095 // If we haven't advanced within the block then do so. 924 // If we haven't advanced within the block then do so.
1096 if (curX < finalX) { 925 if (curX < finalX) {
1097 const int col = curX & 3; 926 const int col = curX & 3;
1098 const int colsLeft = finalX - curX; 927 const int colsLeft = finalX - curX;
1099 928
1100 update_block_columns(&c1, &c2, &c3, &c4, col, colsLeft, curAlphaColu mn); 929 update_block_columns(c, col, colsLeft, curAlphaColumn);
1101 930
1102 curX += colsLeft; 931 curX += colsLeft;
1103 } 932 }
1104 933
1105 SkASSERT(curX == finalX); 934 SkASSERT(curX == finalX);
1106 935
1107 // Figure out what the next advancement is... 936 // Figure out what the next advancement is...
1108 for (int i = 0; i < kR11_EACBlockSz; ++i) { 937 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1109 if (nextX[i] == finalX) { 938 if (nextX[i] == finalX) {
1110 const int16_t run = *(fBufferedRuns[i].fRuns); 939 const int16_t run = *(fBufferedRuns[i].fRuns);
1111 fBufferedRuns[i].fRuns += run; 940 fBufferedRuns[i].fRuns += run;
1112 fBufferedRuns[i].fAlphas += run; 941 fBufferedRuns[i].fAlphas += run;
1113 curAlpha[i] = *(fBufferedRuns[i].fAlphas); 942 curAlpha[i] = *(fBufferedRuns[i].fAlphas);
1114 nextX[i] += *(fBufferedRuns[i].fRuns); 943 nextX[i] += *(fBufferedRuns[i].fRuns);
1115 } 944 }
1116 } 945 }
1117 946
1118 finalX = 0xFFFFF; 947 finalX = 0xFFFFF;
1119 for (int i = 0; i < kR11_EACBlockSz; ++i) { 948 for (int i = 0; i < kR11_EACBlockSz; ++i) {
1120 finalX = SkMin32(nextX[i], finalX); 949 finalX = SkMin32(nextX[i], finalX);
1121 } 950 }
1122 } 951 }
1123 952
1124 // If we didn't land on a block boundary, output the block... 953 // If we didn't land on a block boundary, output the block...
1125 if ((curX & 3) > 1) { 954 if ((curX & 3) > 1) {
1126 *outPtr = compress_block_vertical(c1, c2, c3, c4); 955 *outPtr = compress_block_vertical(c[0], c[1], c[2], c[3]);
1127 } 956 }
1128 957
1129 fNextRun = 0; 958 fNextRun = 0;
1130 } 959 }
1131 960
961 SkBlitter* CreateR11EACBlitter(int width, int height, void* outputBuffer) {
962 return new R11_EACBlitter(width, height, outputBuffer);
963 }
964
1132 } // namespace SkTextureCompressor 965 } // namespace SkTextureCompressor
OLDNEW
« src/utils/SkTextureCompressor.cpp ('K') | « src/utils/SkTextureCompressor_R11EAC.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698