OLD | NEW |
(Empty) | |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 // This file is based on the public domain code "stb_dxt.h" originally written |
| 6 // by Fabian Giesen and Sean Barrett. |
| 7 // |
| 8 // The following changes were made: |
| 9 // - Added support for ATC format. |
| 10 // - Replaced the Principal Component Analysis based calculation to find the |
| 11 // initial base colors with a much simpler bounding box implementation for low |
| 12 // quality only. |
| 13 // - Removed dithering support. |
| 14 // - Some minor optimizations. |
| 15 // - Reformatted the code (mainly with clang-format). |
| 16 // - Swapped red and blue channels in the output to match Skia. |
| 17 |
| 18 #include "cc/resources/texture_compress/atc_dxt.h" |
| 19 #include <cmath> |
| 20 #include <cstdlib> |
| 21 #include "base/logging.h" |
| 22 |
| 23 namespace cc { |
| 24 namespace texture_compress { |
| 25 |
| 26 struct TYPE_ATC_GENERIC : public TYPE_ATC { |
| 27 typedef TYPE_ATC BASE_TYPE; |
| 28 static const int kRemap[8]; |
| 29 static const int kW1Table[4]; |
| 30 static const int kProds[4]; |
| 31 }; |
| 32 |
| 33 struct TYPE_DXT_GENERIC : public TYPE_DXT { |
| 34 typedef TYPE_DXT BASE_TYPE; |
| 35 static const int kRemap[8]; |
| 36 static const int kW1Table[4]; |
| 37 static const int kProds[4]; |
| 38 }; |
| 39 |
| 40 const int TYPE_ATC_GENERIC::kRemap[8] = |
| 41 {0 << 30, 1 << 30, 0 << 30, 1 << 30, 2 << 30, 2 << 30, 3 << 30, 3 << 30}; |
| 42 const int TYPE_ATC_GENERIC::kW1Table[4] = {3, 2, 1, 0}; |
| 43 const int TYPE_ATC_GENERIC::kProds[4] = {0x090000, |
| 44 0x040102, |
| 45 0x010402, |
| 46 0x000900}; |
| 47 |
| 48 const int TYPE_DXT_GENERIC::kRemap[8] = |
| 49 {0 << 30, 2 << 30, 0 << 30, 2 << 30, 3 << 30, 3 << 30, 1 << 30, 1 << 30}; |
| 50 const int TYPE_DXT_GENERIC::kW1Table[4] = {3, 0, 2, 1}; |
| 51 const int TYPE_DXT_GENERIC::kProds[4] = {0x090000, |
| 52 0x000900, |
| 53 0x040102, |
| 54 0x010402}; |
| 55 |
| 56 // Number of passes over the block that's done to refine the base colors. |
| 57 const int kNumRefinements = 2; |
| 58 |
| 59 uint8_t g_o_match55[256][2]; |
| 60 uint8_t g_o_match66[256][2]; |
| 61 uint8_t g_o_match56[256][2]; |
| 62 |
| 63 namespace { |
| 64 |
| 65 inline int Mul8Bit(int a, int b) { |
| 66 int t = a * b + 128; |
| 67 return (t + (t >> 8)) >> 8; |
| 68 } |
| 69 |
| 70 // Linear interpolation at 1/3 point between a and b, using desired rounding |
| 71 // type. |
| 72 inline int Lerp13(int a, int b) { |
| 73 // Without rounding bias. |
| 74 // (2 * a + b) / 3; |
| 75 return ((2 * a + b) * 0xaaab) >> 17; |
| 76 } |
| 77 |
| 78 inline void Lerp13RGB(uint8_t* out, const uint8_t* p1, const uint8_t* p2) { |
| 79 out[0] = Lerp13(p1[0], p2[0]); |
| 80 out[1] = Lerp13(p1[1], p2[1]); |
| 81 out[2] = Lerp13(p1[2], p2[2]); |
| 82 } |
| 83 |
| 84 // Compute table to reproduce constant colors as accurately as possible. |
| 85 void PrepareOptTable(uint8_t* table, |
| 86 const uint8_t* expand_max, |
| 87 const uint8_t* expand_min, |
| 88 int size_max, |
| 89 int size_min) { |
| 90 for (int i = 0; i < 256; ++i) { |
| 91 int best_err = 256; |
| 92 for (int mn = 0; mn < size_min; ++mn) { |
| 93 for (int mx = 0; mx < size_max; ++mx) { |
| 94 int mine = expand_min[mn]; |
| 95 int maxe = expand_max[mx]; |
| 96 int err = std::abs(Lerp13(maxe, mine) - i); |
| 97 |
| 98 // DX10 spec says that interpolation must be within 3% of "correct" |
| 99 // result, add this as error term. (normally we'd expect a random |
| 100 // distribution of +-1.5% error, but nowhere in the spec does it say |
| 101 // that the error has to be unbiased - better safe than sorry). |
| 102 err += std::abs(maxe - mine) * 3 / 100; |
| 103 |
| 104 if (err < best_err) { |
| 105 table[i * 2 + 0] = mx; |
| 106 table[i * 2 + 1] = mn; |
| 107 best_err = err; |
| 108 } |
| 109 } |
| 110 } |
| 111 } |
| 112 } |
| 113 |
| 114 inline uint8_t Expand5(int i) { |
| 115 return (i << 3) | (i >> 2); |
| 116 } |
| 117 |
| 118 inline uint8_t Expand6(int i) { |
| 119 return (i << 2) | (i >> 4); |
| 120 } |
| 121 |
| 122 inline uint16_t As16Bit(int r, int g, int b) { |
| 123 return (Mul8Bit(r, 31) << 11) + (Mul8Bit(g, 63) << 5) + Mul8Bit(b, 31); |
| 124 } |
| 125 |
| 126 inline int sclamp(float y, int p0, int p1) { |
| 127 int x = static_cast<int>(y); |
| 128 if (x < p0) { |
| 129 return p0; |
| 130 } |
| 131 if (x > p1) { |
| 132 return p1; |
| 133 } |
| 134 return x; |
| 135 } |
| 136 |
| 137 #if defined(OS_ANDROID) |
| 138 inline uint16_t RGB2BGR(uint16_t rgb) { |
| 139 uint16_t r = rgb & 0xf800; |
| 140 uint16_t g = rgb & 0x07e0; |
| 141 uint16_t b = rgb & 0x001f; |
| 142 return (r >> 11) | g | (b << 11); |
| 143 } |
| 144 #endif |
| 145 |
| 146 // Take two 16-bit base colors and generate 4 32-bit RGBX colors where: |
| 147 // 0 = c0 |
| 148 // 1 = c1 |
| 149 // 2 = (2 * c0 + c1) / 3 |
| 150 // 3 = (2 * c1 + c0) / 3 |
| 151 // |
| 152 // The base colors are expanded by reusing the top bits at the end. That makes |
| 153 // sure that white is still white after being quantized and converted back. |
| 154 // |
| 155 // params: |
| 156 // color (output) 4 RGBA pixels. |
| 157 // c0 base color 0 (16 bit RGB) |
| 158 // c1 base color 1 (16 bit RGB) |
| 159 inline void EvalColors(uint8_t* color, uint16_t c0, uint16_t c1) { |
| 160 // Expand the two base colors to 32-bit |
| 161 // From: [00000000][00000000][rrrrrggg][gggbbbbb] |
| 162 // To: [00000000][bbbbbxxx][ggggggxx][rrrrrxxx] |
| 163 // Where x means repeat the upper bits for that color component. |
| 164 |
| 165 // Take shortcut if either color is zero. Both will never be zero. |
| 166 DCHECK(c0 | c1); |
| 167 if (c0 && c1) { |
| 168 // Combine the two base colors into one register to allow operating on both |
| 169 // pixels at the same time. |
| 170 // [rrrrrggg][gggbbbbb][RRRRRGGG][GGGBBBBB] |
| 171 uint32_t c01 = c1 | (c0 << 16); |
| 172 |
| 173 // Mask out the red components and shift it down one channel to avoid some |
| 174 // shifts when combining the channels. |
| 175 // [00000000][rrrrr000][00000000][RRRRR000] |
| 176 uint32_t c01_r = (c01 & 0xf800f800) >> 8; |
| 177 // Extend to be 8-bit by reusing top bits at the end. |
| 178 // Note that we leave some extra garbage bits in the other channels, but |
| 179 // that's ok since we mask that off when we combine the different |
| 180 // components. |
| 181 // [00000000][rrrrrrrr][xx000000][RRRRRRRR] |
| 182 c01_r |= (c01_r >> 5); |
| 183 |
| 184 // Mask out the green components. |
| 185 // [00000ggg][ggg00000][00000GGG][GGG00000] |
| 186 uint32_t c01_g = c01 & 0x07e007e0; |
| 187 // Shift it into place and extend. |
| 188 // [gggggggg][xxxx0000][GGGGGGGG][xxxx0000] |
| 189 c01_g = ((c01_g << 5) | (c01_g >> 1)); |
| 190 |
| 191 // Mask out the blue components. |
| 192 // [00000000][000bbbbb][00000000][000BBBBB] |
| 193 uint32_t c01_b = c01 & 0x001f001f; |
| 194 // Shift it into place and extend. |
| 195 // [bbbbbbbb][xx000000][BBBBBBBB][xx000000] |
| 196 c01_b = ((c01_b << 11) | (c01_b << 6)); |
| 197 |
| 198 // Combine the components into base color 0. |
| 199 // Shift the components into place and mask of each channel. |
| 200 // [00000000][bbbbbbbb][gggggggg][rrrrrrrr] |
| 201 *reinterpret_cast<uint32_t*>(color + 0) = ((c01_r >> 16) & 0x000000ff) | |
| 202 ((c01_g >> 16) & 0x0000ff00) | |
| 203 ((c01_b >> 8) & 0x00ff0000); |
| 204 |
| 205 // Combine the components into base color 1. |
| 206 // [00000000][BBBBBBBB][GGGGGGGG][RRRRRRRR] |
| 207 *reinterpret_cast<uint32_t*>(color + 4) = (c01_r & 0x000000ff) | |
| 208 (c01_g & 0x0000ff00) | |
| 209 ((c01_b << 8) & 0x00ff0000); |
| 210 |
| 211 Lerp13RGB(color + 8, color, color + 4); |
| 212 Lerp13RGB(color + 12, color + 4, color); |
| 213 } else { |
| 214 // Combine the two base colors into one register, one of them will be zero. |
| 215 // [00000000][00000000][rrrrrggg][gggbbbbb] |
| 216 uint32_t c = c0 | c1; |
| 217 |
| 218 // Mask out the red components and shift it down one channel to avoid some |
| 219 // shifts when combining the channels. |
| 220 // [00000000][00000000][00000000][rrrrr000] |
| 221 uint32_t c_r = (c & 0xf800) >> 8; |
| 222 // Extend to be 8-bit by reusing top bits at the end. |
| 223 // [00000000][00000000][00000000][rrrrrrrr] |
| 224 c_r |= c_r >> 5; |
| 225 |
| 226 // Mask out the green components. |
| 227 // [00000000][00000000][00000ggg][ggg00000] |
| 228 uint32_t c_g = c & 0x07e0; |
| 229 // Shift it into place and extend. Then mask off garbage bits. |
| 230 // [00000000][00000000][gggggggg][xxxx0000] |
| 231 c_g = ((c_g << 5) | (c_g >> 1)) & 0x0000ff00; |
| 232 |
| 233 // Mask out the blue components. |
| 234 // [00000000][00000000][00000000][000bbbbb] |
| 235 uint32_t c_b = c & 0x001f; |
| 236 // Shift it into place and extend. Then mask off garbage bits. |
| 237 // [00000000][bbbbbbbb][xx000000][00000000] |
| 238 c_b = ((c_b << 19) | (c_b << 14)) & 0x00ff0000; |
| 239 |
| 240 size_t zero_offset = !!c0 * 4; |
| 241 size_t nonzero_offset = !!c1 * 4; |
| 242 |
| 243 // Combine the components into non zero base color. |
| 244 // [00000000][bbbbbbbb][gggggggg][rrrrrrrr] |
| 245 *reinterpret_cast<uint32_t*>(color + nonzero_offset) = c_r | c_g | c_b; |
| 246 |
| 247 // We already know that the other base color is zero. |
| 248 *reinterpret_cast<uint32_t*>(color + zero_offset) = 0; |
| 249 |
| 250 color[8 + nonzero_offset + 0] = |
| 251 (color[nonzero_offset + 0] * (2 * 0xaaab)) >> 17; |
| 252 color[8 + nonzero_offset + 1] = |
| 253 (color[nonzero_offset + 1] * (2 * 0xaaab)) >> 17; |
| 254 color[8 + nonzero_offset + 2] = |
| 255 (color[nonzero_offset + 2] * (2 * 0xaaab)) >> 17; |
| 256 |
| 257 color[8 + zero_offset + 0] = (color[nonzero_offset + 0] * 0xaaab) >> 17; |
| 258 color[8 + zero_offset + 1] = (color[nonzero_offset + 1] * 0xaaab) >> 17; |
| 259 color[8 + zero_offset + 2] = (color[nonzero_offset + 2] * 0xaaab) >> 17; |
| 260 } |
| 261 } |
| 262 |
| 263 // The color matching function. |
| 264 template <typename T> |
| 265 uint32_t MatchColorsBlock(const uint8_t* block, uint8_t* color) { |
| 266 int dirr = color[0 * 4 + 0] - color[1 * 4 + 0]; |
| 267 int dirg = color[0 * 4 + 1] - color[1 * 4 + 1]; |
| 268 int dirb = color[0 * 4 + 2] - color[1 * 4 + 2]; |
| 269 |
| 270 int stops[4]; |
| 271 for (int i = 0; i < 4; ++i) { |
| 272 stops[i] = color[i * 4 + 0] * dirr + color[i * 4 + 1] * dirg + |
| 273 color[i * 4 + 2] * dirb; |
| 274 } |
| 275 |
| 276 // Think of the colors as arranged on a line; project point onto that line, |
| 277 // then choose next color out of available ones. we compute the crossover |
| 278 // points for "best color in top half"/"best in bottom half" and then the same |
| 279 // inside that subinterval. |
| 280 // |
| 281 // Relying on this 1d approximation isn't always optimal in terms of euclidean |
| 282 // distance, but it's very close and a lot faster. |
| 283 // http://cbloomrants.blogspot.com/2008/12/12-08-08-dxtc-summary.html |
| 284 |
| 285 int c0_point = (stops[1] + stops[3]) >> 1; |
| 286 int half_point = (stops[3] + stops[2]) >> 1; |
| 287 int c3_point = (stops[2] + stops[0]) >> 1; |
| 288 |
| 289 uint32_t mask = 0; |
| 290 for (int i = 0; i < 16; i++) { |
| 291 int dot = block[i * 4 + 0] * dirr + block[i * 4 + 1] * dirg + |
| 292 block[i * 4 + 2] * dirb; |
| 293 |
| 294 int bits = ((dot < half_point) ? 4 : 0) | ((dot < c0_point) ? 2 : 0) | |
| 295 ((dot < c3_point) ? 1 : 0); |
| 296 |
| 297 mask >>= 2; |
| 298 mask |= T::kRemap[bits]; |
| 299 } |
| 300 |
| 301 return mask; |
| 302 } |
| 303 |
| 304 void GetBaseColors(const uint8_t* block, |
| 305 int v_r, |
| 306 int v_g, |
| 307 int v_b, |
| 308 uint16_t* pmax16, |
| 309 uint16_t* pmin16) { |
| 310 // Pick colors at extreme points. |
| 311 #ifdef VERIFY_RESULTS |
| 312 // Rewritten to match the SIMD implementation, not as efficient. |
| 313 int dots[16]; |
| 314 for (int i = 0; i < 16; ++i) { |
| 315 dots[i] = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + |
| 316 block[i * 4 + 2] * v_b; |
| 317 } |
| 318 int max_dot = dots[0]; |
| 319 int min_dot = dots[0]; |
| 320 for (int i = 1; i < 16; ++i) { |
| 321 if (dots[i] > max_dot) |
| 322 max_dot = dots[i]; |
| 323 if (dots[i] < min_dot) |
| 324 min_dot = dots[i]; |
| 325 } |
| 326 uint32_t max_pixels[16]; |
| 327 uint32_t min_pixels[16]; |
| 328 for (int i = 0; i < 16; ++i) { |
| 329 const uint32_t* p = reinterpret_cast<const uint32_t*>(block) + i; |
| 330 max_pixels[i] = (dots[i] == max_dot) ? *p : 0; |
| 331 min_pixels[i] = (dots[i] == min_dot) ? *p : 0; |
| 332 } |
| 333 uint32_t max_pixel = max_pixels[0]; |
| 334 uint32_t min_pixel = min_pixels[0]; |
| 335 for (int i = 1; i < 16; ++i) { |
| 336 if (max_pixels[i] > max_pixel) { |
| 337 max_pixel = max_pixels[i]; |
| 338 } |
| 339 if (min_pixels[i] > min_pixel) { |
| 340 min_pixel = min_pixels[i]; |
| 341 } |
| 342 } |
| 343 uint8_t* maxp = reinterpret_cast<uint8_t*>(&max_pixel); |
| 344 uint8_t* minp = reinterpret_cast<uint8_t*>(&min_pixel); |
| 345 #else |
| 346 int mind = 0x7fffffff; |
| 347 int maxd = -0x7fffffff; |
| 348 const uint8_t* minp = block; |
| 349 const uint8_t* maxp = block; |
| 350 for (int i = 0; i < 16; ++i) { |
| 351 int dot = block[i * 4 + 0] * v_r + block[i * 4 + 1] * v_g + |
| 352 block[i * 4 + 2] * v_b; |
| 353 |
| 354 if (dot < mind) { |
| 355 mind = dot; |
| 356 minp = block + i * 4; |
| 357 } |
| 358 |
| 359 if (dot > maxd) { |
| 360 maxd = dot; |
| 361 maxp = block + i * 4; |
| 362 } |
| 363 } |
| 364 #endif |
| 365 |
| 366 *pmax16 = As16Bit(maxp[0], maxp[1], maxp[2]); |
| 367 *pmin16 = As16Bit(minp[0], minp[1], minp[2]); |
| 368 } |
| 369 |
| 370 // Figure out the two base colors to use from a block of 16 pixels |
| 371 // by Primary Component Analysis and map along principal axis. |
| 372 // |
| 373 // params: |
| 374 // block 16 32-bit RGBX colors. |
| 375 // pmax16 (output) base color 0 (minimum value), 16-bit RGB |
| 376 // pmin16 (output) base color 1 (maximum value), 16-bit RGB |
| 377 void OptimizeColorsBlock(const uint8_t* block, |
| 378 uint16_t* pmax16, |
| 379 uint16_t* pmin16) { |
| 380 // Determine color distribution. |
| 381 int mu[3]; |
| 382 int min[3]; |
| 383 int max[3]; |
| 384 for (int ch = 0; ch < 3; ++ch) { |
| 385 const uint8_t* bp = block + ch; |
| 386 int muv = bp[0]; |
| 387 int minv = muv; |
| 388 int maxv = muv; |
| 389 for (int i = 4; i < 64; i += 4) { |
| 390 int pixel = bp[i]; |
| 391 muv += pixel; |
| 392 if (pixel < minv) { |
| 393 minv = pixel; |
| 394 } else if (pixel > maxv) { |
| 395 maxv = pixel; |
| 396 } |
| 397 } |
| 398 |
| 399 mu[ch] = (muv + 8) >> 4; |
| 400 min[ch] = minv; |
| 401 max[ch] = maxv; |
| 402 } |
| 403 |
| 404 // Determine covariance matrix. |
| 405 int cov[6] = {0, 0, 0, 0, 0, 0}; |
| 406 for (int i = 0; i < 16; ++i) { |
| 407 int r = block[i * 4 + 0] - mu[0]; |
| 408 int g = block[i * 4 + 1] - mu[1]; |
| 409 int b = block[i * 4 + 2] - mu[2]; |
| 410 |
| 411 cov[0] += r * r; |
| 412 cov[1] += r * g; |
| 413 cov[2] += r * b; |
| 414 cov[3] += g * g; |
| 415 cov[4] += g * b; |
| 416 cov[5] += b * b; |
| 417 } |
| 418 |
| 419 // Convert covariance matrix to float, find principal axis via power iter. |
| 420 float covf[6]; |
| 421 for (int i = 0; i < 6; ++i) { |
| 422 covf[i] = cov[i] / 255.0f; |
| 423 } |
| 424 |
| 425 float vfr = static_cast<float>(max[0] - min[0]); |
| 426 float vfg = static_cast<float>(max[1] - min[1]); |
| 427 float vfb = static_cast<float>(max[2] - min[2]); |
| 428 |
| 429 // Iterate to the power of 4. |
| 430 for (int iter = 0; iter < 4; ++iter) { |
| 431 float r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2]; |
| 432 float g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4]; |
| 433 float b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5]; |
| 434 |
| 435 vfr = r; |
| 436 vfg = g; |
| 437 vfb = b; |
| 438 } |
| 439 |
| 440 double magn = std::abs(vfr); |
| 441 double mag_g = std::abs(vfg); |
| 442 double mag_b = std::abs(vfb); |
| 443 if (mag_g > magn) { |
| 444 magn = mag_g; |
| 445 } |
| 446 if (mag_b > magn) { |
| 447 magn = mag_b; |
| 448 } |
| 449 |
| 450 int v_r = 299; // JPEG YCbCr luma coefs, scaled by 1000. |
| 451 int v_g = 587; |
| 452 int v_b = 114; |
| 453 if (magn >= 4.0f) { // Too small, default to luminance. |
| 454 magn = 512.0 / magn; |
| 455 v_r = static_cast<int>(vfr * magn); |
| 456 v_g = static_cast<int>(vfg * magn); |
| 457 v_b = static_cast<int>(vfb * magn); |
| 458 } |
| 459 |
| 460 GetBaseColors(block, v_r, v_g, v_b, pmax16, pmin16); |
| 461 } |
| 462 |
| 463 // Figure out the two base colors simply using a direction vector between min |
| 464 // and max colors. |
| 465 // |
| 466 // params: |
| 467 // block 16 32-bit RGBX colors. |
| 468 // pmax16 (output) base color 0 (minimum value), 16-bit RGB |
| 469 // pmin16 (output) base color 1 (maximum value), 16-bit RGB |
| 470 void GetApproximateBaseColors(const uint8_t* block, |
| 471 uint16_t* pmax16, |
| 472 uint16_t* pmin16) { |
| 473 uint8_t dir[3]; |
| 474 for (int ch = 0; ch < 3; ++ch) { |
| 475 const uint8_t* bp = block + ch; |
| 476 uint8_t minv = bp[0]; |
| 477 uint8_t maxv = bp[0]; |
| 478 for (int i = 4; i < 64; i += 4) { |
| 479 uint8_t pixel = bp[i]; |
| 480 if (pixel < minv) { |
| 481 minv = pixel; |
| 482 } else if (pixel > maxv) { |
| 483 maxv = pixel; |
| 484 } |
| 485 } |
| 486 |
| 487 dir[ch] = maxv - minv; |
| 488 } |
| 489 |
| 490 GetBaseColors(block, dir[0], dir[1], dir[2], pmax16, pmin16); |
| 491 } |
| 492 |
| 493 // The refinement function. |
| 494 // Tries to optimize colors to suit block contents better. |
| 495 // (By solving a least squares system via normal equations+Cramer's rule) |
| 496 // |
| 497 // params: |
| 498 // block 16 32-bit RGBX colors. |
| 499 // pmax16 (output) base color 0 (minimum value), 16-bit RGB |
| 500 // pmin16 (output) base color 1 (maximum value), 16-bit RGB |
| 501 // mask 16 2-bit color indices. |
| 502 template <typename T> |
| 503 int RefineBlock(const uint8_t* block, |
| 504 uint16_t* pmax16, |
| 505 uint16_t* pmin16, |
| 506 uint32_t mask) { |
| 507 uint16_t min16 = 0; |
| 508 uint16_t max16 = 0; |
| 509 if ((mask ^ (mask << 2)) < 4) { // All pixels have the same index? |
| 510 // Yes, linear system would be singular; solve using optimal |
| 511 // single-color match on average color. |
| 512 int r = 8; |
| 513 int g = 8; |
| 514 int b = 8; |
| 515 for (int i = 0; i < 16; ++i) { |
| 516 r += block[i * 4 + 0]; |
| 517 g += block[i * 4 + 1]; |
| 518 b += block[i * 4 + 2]; |
| 519 } |
| 520 |
| 521 r >>= 4; |
| 522 g >>= 4; |
| 523 b >>= 4; |
| 524 |
| 525 max16 = MatchSingleColorMax<typename T::BASE_TYPE>(r, g, b); |
| 526 min16 = MatchSingleColorMin<typename T::BASE_TYPE>(r, g, b); |
| 527 } else { |
| 528 int at1_r = 0; |
| 529 int at1_g = 0; |
| 530 int at1_b = 0; |
| 531 int at2_r = 0; |
| 532 int at2_g = 0; |
| 533 int at2_b = 0; |
| 534 int akku = 0; |
| 535 uint32_t cm = mask; |
| 536 for (int i = 0; i < 16; ++i, cm >>= 2) { |
| 537 int step = cm & 3; |
| 538 |
| 539 int w1 = T::kW1Table[step]; |
| 540 int r = block[i * 4 + 0]; |
| 541 int g = block[i * 4 + 1]; |
| 542 int b = block[i * 4 + 2]; |
| 543 |
| 544 // Some magic to save a lot of multiplies in the accumulating loop... |
| 545 // (Precomputed products of weights for least squares system, accumulated |
| 546 // inside one 32-bit register.) |
| 547 akku += T::kProds[step]; |
| 548 at1_r += w1 * r; |
| 549 at1_g += w1 * g; |
| 550 at1_b += w1 * b; |
| 551 at2_r += r; |
| 552 at2_g += g; |
| 553 at2_b += b; |
| 554 } |
| 555 |
| 556 at2_r = 3 * at2_r - at1_r; |
| 557 at2_g = 3 * at2_g - at1_g; |
| 558 at2_b = 3 * at2_b - at1_b; |
| 559 |
| 560 // Extract solutions and decide solvability. |
| 561 int xx = akku >> 16; |
| 562 int yy = (akku >> 8) & 0xff; |
| 563 int xy = (akku >> 0) & 0xff; |
| 564 |
| 565 float frb = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy); |
| 566 float fg = frb * 63.0f / 31.0f; |
| 567 |
| 568 // Solve. |
| 569 max16 = sclamp((at1_r * yy - at2_r * xy) * frb + 0.5f, 0, 31) << 11; |
| 570 max16 |= sclamp((at1_g * yy - at2_g * xy) * fg + 0.5f, 0, 63) << 5; |
| 571 max16 |= sclamp((at1_b * yy - at2_b * xy) * frb + 0.5f, 0, 31) << 0; |
| 572 |
| 573 min16 = sclamp((at2_r * xx - at1_r * xy) * frb + 0.5f, 0, 31) << 11; |
| 574 min16 |= sclamp((at2_g * xx - at1_g * xy) * fg + 0.5f, 0, 63) << 5; |
| 575 min16 |= sclamp((at2_b * xx - at1_b * xy) * frb + 0.5f, 0, 31) << 0; |
| 576 } |
| 577 |
| 578 uint16_t oldMin = *pmin16; |
| 579 uint16_t oldMax = *pmax16; |
| 580 *pmin16 = min16; |
| 581 *pmax16 = max16; |
| 582 return oldMin != min16 || oldMax != max16; |
| 583 } |
| 584 |
| 585 // Color block compression. |
| 586 template <typename T, Quality QUALITY> |
| 587 void CompressColorBlock(uint8_t* dst, const uint8_t* block) { |
| 588 // Check if block is constant. |
| 589 int i = 1; |
| 590 uint32_t first_pixel = |
| 591 reinterpret_cast<const uint32_t*>(block)[0] & 0x00ffffff; |
| 592 for (; i < 16; ++i) { |
| 593 if ((reinterpret_cast<const uint32_t*>(block)[i] & 0x00ffffff) != |
| 594 first_pixel) { |
| 595 break; |
| 596 } |
| 597 } |
| 598 |
| 599 uint32_t mask = 0; |
| 600 uint16_t max16 = 0; |
| 601 uint16_t min16 = 0; |
| 602 if (i == 16) { // Constant color |
| 603 int r = block[0]; |
| 604 int g = block[1]; |
| 605 int b = block[2]; |
| 606 max16 = MatchSingleColorMax<typename T::BASE_TYPE>(r, g, b); |
| 607 min16 = MatchSingleColorMin<typename T::BASE_TYPE>(r, g, b); |
| 608 mask = T::kConstantColorIndices; |
| 609 } else { |
| 610 if (QUALITY == kQualityLow) { |
| 611 GetApproximateBaseColors(block, &max16, &min16); |
| 612 } else { |
| 613 // Do Primary Component Analysis and map along principal axis. |
| 614 OptimizeColorsBlock(block, &max16, &min16); |
| 615 } |
| 616 |
| 617 if (max16 != min16) { |
| 618 uint8_t color[4 * 4]; |
| 619 EvalColors(color, max16, min16); |
| 620 mask = MatchColorsBlock<T>(block, color); |
| 621 } |
| 622 |
| 623 if (QUALITY == kQualityHigh) { |
| 624 // Refine (multiple times if requested). |
| 625 for (int i = 0; i < kNumRefinements; ++i) { |
| 626 uint32_t lastmask = mask; |
| 627 |
| 628 if (RefineBlock<T>(block, &max16, &min16, mask)) { |
| 629 if (max16 != min16) { |
| 630 uint8_t color[4 * 4]; |
| 631 EvalColors(color, max16, min16); |
| 632 mask = MatchColorsBlock<T>(block, color); |
| 633 } else { |
| 634 mask = 0; |
| 635 break; |
| 636 } |
| 637 } |
| 638 |
| 639 if (mask == lastmask) { |
| 640 break; |
| 641 } |
| 642 } |
| 643 } |
| 644 } |
| 645 |
| 646 #if defined(OS_ANDROID) |
| 647 // Swapping r and b channels to match Skia. |
| 648 // See skia/config/SkUserConfig.h |
| 649 max16 = RGB2BGR(max16); |
| 650 min16 = RGB2BGR(min16); |
| 651 #endif |
| 652 |
| 653 FormatFixup_Generic<typename T::BASE_TYPE>(&max16, &min16, &mask); |
| 654 |
| 655 uint32_t* dst32 = reinterpret_cast<uint32_t*>(dst); |
| 656 dst32[0] = max16 | (min16 << 16); |
| 657 dst32[1] = mask; |
| 658 } |
| 659 |
| 660 // Alpha block compression. |
| 661 void CompressAlphaBlock(uint8_t* dst, const uint8_t* src) { |
| 662 // Find min/max alpha. |
| 663 int mn = src[3]; |
| 664 int mx = mn; |
| 665 for (int i = 1; i < 16; ++i) { |
| 666 int alpha = src[i * 4 + 3]; |
| 667 if (alpha < mn) { |
| 668 mn = alpha; |
| 669 } else if (alpha > mx) { |
| 670 mx = alpha; |
| 671 } |
| 672 } |
| 673 |
| 674 // Encode them. |
| 675 dst[0] = mx; |
| 676 dst[1] = mn; |
| 677 dst += 2; |
| 678 |
| 679 if (mx == mn) { |
| 680 memset(dst, 0, 6); |
| 681 } else { |
| 682 // Determine bias and emit color indices. |
| 683 // Given the choice of mx/mn, these indices are optimal: |
| 684 // http://fgiesen.wordpress.com/2009/12/15/dxt5-alpha-block-index-determinat
ion/ |
| 685 int dist = mx - mn; |
| 686 int dist4 = dist * 4; |
| 687 int dist2 = dist * 2; |
| 688 int bias = (dist < 8) ? (dist - 1) : (dist / 2 + 2); |
| 689 bias -= mn * 7; |
| 690 int bits = 0; |
| 691 int mask = 0; |
| 692 |
| 693 for (int i = 0; i < 16; ++i) { |
| 694 int a = src[i * 4 + 3] * 7 + bias; |
| 695 |
| 696 // Select index. this is a "linear scale" lerp factor between 0 (val=min) |
| 697 // and 7 (val=max). |
| 698 int t = (a >= dist4) ? -1 : 0; |
| 699 int ind = t & 4; |
| 700 a -= dist4 & t; |
| 701 |
| 702 t = (a >= dist2) ? -1 : 0; |
| 703 ind += t & 2; |
| 704 a -= dist2 & t; |
| 705 |
| 706 ind += (a >= dist); |
| 707 |
| 708 // Turn linear scale into DXT index (0/1 are extremal pts). |
| 709 ind = -ind & 7; |
| 710 ind ^= (2 > ind); |
| 711 |
| 712 // Write index. |
| 713 mask |= ind << bits; |
| 714 if ((bits += 3) >= 8) { |
| 715 *dst++ = mask; |
| 716 mask >>= 8; |
| 717 bits -= 8; |
| 718 } |
| 719 } |
| 720 } |
| 721 } |
| 722 |
| 723 static void ExtractBlock(uint8_t* dst, const uint8_t* src, int width) { |
| 724 for (int j = 0; j < 4; ++j) { |
| 725 memcpy(&dst[j * 4 * 4], src, 4 * 4); |
| 726 src += width * 4; |
| 727 } |
| 728 } |
| 729 |
| 730 template <typename T, bool OPAQUE, Quality QUALITY> |
| 731 void CompressImage(const uint8_t* src, uint8_t* dst, int width, int height) { |
| 732 for (int y = 0; y < height; y += 4, src += width * 4 * 4) { |
| 733 for (int x = 0; x < width; x += 4) { |
| 734 uint8_t block[64]; |
| 735 ExtractBlock(block, src + x * 4, width); |
| 736 |
| 737 if (!OPAQUE) { |
| 738 CompressAlphaBlock(dst, block); |
| 739 dst += 8; |
| 740 } |
| 741 |
| 742 CompressColorBlock<T, QUALITY>(dst, block); |
| 743 dst += 8; |
| 744 } |
| 745 } |
| 746 } |
| 747 |
| 748 } // namespace |
| 749 |
| 750 void Init_ATC_DXT() { |
| 751 uint8_t expand5[32]; |
| 752 for (int i = 0; i < 32; ++i) { |
| 753 expand5[i] = Expand5(i); |
| 754 } |
| 755 |
| 756 uint8_t expand6[64]; |
| 757 for (int i = 0; i < 64; ++i) { |
| 758 expand6[i] = Expand6(i); |
| 759 } |
| 760 |
| 761 PrepareOptTable(&g_o_match55[0][0], expand5, expand5, 32, 32); |
| 762 PrepareOptTable(&g_o_match66[0][0], expand6, expand6, 64, 64); |
| 763 PrepareOptTable(&g_o_match56[0][0], expand5, expand6, 32, 64); |
| 764 } |
| 765 |
| 766 void CompressATC_Generic(const uint8_t* src, |
| 767 uint8_t* dst, |
| 768 int width, |
| 769 int height) { |
| 770 CompressImage<TYPE_ATC_GENERIC, true, kQualityHigh>(src, dst, width, height); |
| 771 } |
| 772 |
| 773 void CompressATCIA_Generic(const uint8_t* src, |
| 774 uint8_t* dst, |
| 775 int width, |
| 776 int height) { |
| 777 CompressImage<TYPE_ATC_GENERIC, false, kQualityHigh>(src, dst, width, height); |
| 778 } |
| 779 |
| 780 void CompressDXT1_Generic(const uint8_t* src, |
| 781 uint8_t* dst, |
| 782 int width, |
| 783 int height) { |
| 784 CompressImage<TYPE_DXT_GENERIC, true, kQualityHigh>(src, dst, width, height); |
| 785 } |
| 786 |
| 787 void CompressDXT5_Generic(const uint8_t* src, |
| 788 uint8_t* dst, |
| 789 int width, |
| 790 int height) { |
| 791 CompressImage<TYPE_DXT_GENERIC, false, kQualityHigh>(src, dst, width, height); |
| 792 } |
| 793 |
| 794 } // namespace texture_compress |
| 795 } // namespace cc |
OLD | NEW |