cc/resources/texture_compressor_etc1_sse.cc - Issue 1096703002: Reland: Add ETC1 powered SSE encoder for tile texture compression

Side by Side Diff: cc/resources/texture_compressor_etc1_sse.cc

Issue 1096703002: Reland: Add ETC1 powered SSE encoder for tile texture compression (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: --no-find-copies Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright 2015 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "texture_compressor_etc1_sse.h"

	6

	7 #include <assert.h>

	8 #include <smmintrin.h>

	9 #include <stdio.h>

	10 #include <stdlib.h>

	11 #include <string.h>

	12 #include <time.h>

	13 #include <unistd.h>

	14

	15 #include <cmath>

	16 #include <limits>

	17 #include <sstream>

	18

	19 #include "base/compiler_specific.h"

	20 #include "base/logging.h"

	21

	22 // Defining the following macro will cause the error metric function to weigh

	23 // each color channel differently depending on how the human eye can perceive

	24 // them. This can give a slight improvement in image quality at the cost of a

	25 // performance hit.

	26 // #define USE_PERCEIVED_ERROR_METRIC

	27

	28 namespace {

	29

	30 template <typename T>

	31 inline T clamp(T val, T min, T max) {

	32 return val < min ? min : (val > max ? max : val);

	33 }

	34

	35 inline uint8_t round_to_5_bits(float val) {

	36 return clamp<uint8_t>(val * 31.0f / 255.0f + 0.5f, 0, 31);

	37 }

	38

	39 inline uint8_t round_to_4_bits(float val) {

	40 return clamp<uint8_t>(val * 15.0f / 255.0f + 0.5f, 0, 15);

	41 }

	42

	43 union Color {

	44 struct BgraColorType {

	45 uint8_t b;

	46 uint8_t g;

	47 uint8_t r;

	48 uint8_t a;

	49 } channels;

	50 uint8_t components[4];

	51 uint32_t bits;

	52 };

	53

	54 /*

	55 * Codeword tables.

	56 * See: Table 3.17.2

	57 */

	58 static const int16_t g_codeword_tables[8][4]

	59 __attribute__((aligned(16))) = {{-8, -2, 2, 8},

	60 {-17, -5, 5, 17},

	61 {-29, -9, 9, 29},

	62 {-42, -13, 13, 42},

	63 {-60, -18, 18, 60},

	64 {-80, -24, 24, 80},

	65 {-106, -33, 33, 106},

	66 {-183, -47, 47, 183}};

	67

	68 /*

	69 * Maps modifier indices to pixel index values.

	70 * See: Table 3.17.3

	71 */

	72 static const uint8_t g_mod_to_pix[4] = {3, 2, 0, 1};

	73

	74 /*

	75 * The ETC1 specification index texels as follows:

	76 *

	77 * [a][e][i][m] [ 0][ 4][ 8][12]

	78 * [b][f][j][n] <-> [ 1][ 5][ 9][13]

	79 * [c][g][k][o] [ 2][ 6][10][14]

	80 * [d][h][l][p] [ 3][ 7][11][15]

	81 *

	82 * However, when extracting sub blocks from BGRA data the natural array

	83 * indexing order ends up different:

	84 *

	85 * vertical0: [a][e][b][f] horizontal0: [a][e][i][m]

	86 * [c][g][d][h] [b][f][j][n]

	87 * vertical1: [i][m][j][n] horizontal1: [c][g][k][o]

	88 * [k][o][l][p] [d][h][l][p]

	89 *

	90 * In order to translate from the natural array indices in a sub block to the

	91 * indices (number) used by specification and hardware we use this table.

	92 */

	93 static const uint8_t g_idx_to_num[4][8] = {

	94 {0, 4, 1, 5, 2, 6, 3, 7}, // Vertical block 0.

	95 {8, 12, 9, 13, 10, 14, 11, 15}, // Vertical block 1.

	96 {0, 4, 8, 12, 1, 5, 9, 13}, // Horizontal block 0.

	97 {2, 6, 10, 14, 3, 7, 11, 15} // Horizontal block 1.

	98 };

	99

	100 inline void WriteColors444(uint8_t* block,

	101 const Color& color0,

	102 const Color& color1) {

	103 /* 0, 1, 2 - for ARM */
	adrian.belgun 2015/04/17 14:02:58 Please check image channel order for input. This Please check image channel order for input. This is more likely an input issue of RGB vs BGR than an endianness one. Also, for clarity, I recommend changing the assignment from block[2,1,0] = channels[r,g,b] to block[0,1,2] = channels[b,g,r].
	104 block[2] = (color0.channels.r & 0xf0) \| (color1.channels.r >> 4);

	105 block[1] = (color0.channels.g & 0xf0) \| (color1.channels.g >> 4);

	106 block[0] = (color0.channels.b & 0xf0) \| (color1.channels.b >> 4);

	107 }

	108

	109 inline void WriteColors555(uint8_t* block,

	110 const Color& color0,

	111 const Color& color1) {

	112 // Table for conversion to 3-bit two complement format.

	113 static const uint8_t two_compl_trans_table[8] = {

	114 4, // -4 (100b)

	115 5, // -3 (101b)

	116 6, // -2 (110b)

	117 7, // -1 (111b)

	118 0, // 0 (000b)

	119 1, // 1 (001b)

	120 2, // 2 (010b)

	121 3, // 3 (011b)

	122 };

	123

	124 int16_t delta_r =

	125 static_cast<int16_t>(color1.channels.r >> 3) - (color0.channels.r >> 3);

	126 int16_t delta_g =

	127 static_cast<int16_t>(color1.channels.g >> 3) - (color0.channels.g >> 3);

	128 int16_t delta_b =

	129 static_cast<int16_t>(color1.channels.b >> 3) - (color0.channels.b >> 3);

	130 DCHECK(delta_r >= -4 && delta_r <= 3);

	131 DCHECK(delta_g >= -4 && delta_g <= 3);

	132 DCHECK(delta_b >= -4 && delta_b <= 3);

	133

	134 /* 0, 1, 2 - for ARM */
	adrian.belgun 2015/04/17 14:02:58 Same comments as for :103. Same comments as for :103.
	135 block[2] = (color0.channels.r & 0xf8) \| two_compl_trans_table[delta_r + 4];

	136 block[1] = (color0.channels.g & 0xf8) \| two_compl_trans_table[delta_g + 4];

	137 block[0] = (color0.channels.b & 0xf8) \| two_compl_trans_table[delta_b + 4];

	138 }

	139

	140 inline void WriteCodewordTable(uint8_t* block,

	141 uint8_t sub_block_id,

	142 uint8_t table) {

	143 DCHECK_LT(sub_block_id, 2);

	144 DCHECK_LT(table, 8);

	145

	146 uint8_t shift = (2 + (3 - sub_block_id * 3));

	147 block[3] &= ~(0x07 << shift);

	148 block[3] \|= table << shift;

	149 }

	150

	151 inline void WritePixelData(uint8_t* block, uint32_t pixel_data) {

	152 block[4] \|= pixel_data >> 24;

	153 block[5] \|= (pixel_data >> 16) & 0xff;

	154 block[6] \|= (pixel_data >> 8) & 0xff;

	155 block[7] \|= pixel_data & 0xff;

	156 }

	157

	158 inline void WriteFlip(uint8_t* block, bool flip) {

	159 block[3] &= ~0x01;

	160 block[3] \|= static_cast<uint8_t>(flip);

	161 }

	162

	163 inline void WriteDiff(uint8_t* block, bool diff) {

	164 block[3] &= ~0x02;

	165 block[3] \|= static_cast<uint8_t>(diff) << 1;

	166 }

	167

	168 /**

	169 * Compress and rounds BGR888 into BGR444. The resulting BGR444 color is

	170 * expanded to BGR888 as it would be in hardware after decompression. The

	171 * actual 444-bit data is available in the four most significant bits of each

	172 * channel.

	173 */

	174 inline Color MakeColor444(const float* bgr) {

	175 uint8_t b4 = round_to_4_bits(bgr[0]);

	176 uint8_t g4 = round_to_4_bits(bgr[1]);

	177 uint8_t r4 = round_to_4_bits(bgr[2]);

	178 Color bgr444;

	179 bgr444.channels.b = (b4 << 4) \| b4;

	180 bgr444.channels.g = (g4 << 4) \| g4;

	181 bgr444.channels.r = (r4 << 4) \| r4;

	182 bgr444.channels.a = 0x44; /* added by Radu */

	183 return bgr444;

	184 }

	185

	186 /**

	187 * Compress and rounds BGR888 into BGR555. The resulting BGR555 color is

	188 * expanded to BGR888 as it would be in hardware after decompression. The

	189 * actual 555-bit data is available in the five most significant bits of each

	190 * channel.

	191 */

	192 inline Color MakeColor555(const float* bgr) {

	193 uint8_t b5 = round_to_5_bits(bgr[0]);

	194 uint8_t g5 = round_to_5_bits(bgr[1]);

	195 uint8_t r5 = round_to_5_bits(bgr[2]);

	196 Color bgr555;

	197 bgr555.channels.b = (b5 << 3) \| (b5 >> 2);

	198 bgr555.channels.g = (g5 << 3) \| (g5 >> 2);

	199 bgr555.channels.r = (r5 << 3) \| (r5 >> 2);

	200 bgr555.channels.a = 0x55; /* added by Radu */

	201 return bgr555;

	202 }

	203

	204 /**

	205 * Constructs a color from a given base color and luminance value.

	206 */

	207 inline Color MakeColor(const Color& base, int16_t lum) {

	208 int b = static_cast<int>(base.channels.b) + lum;

	209 int g = static_cast<int>(base.channels.g) + lum;

	210 int r = static_cast<int>(base.channels.r) + lum;

	211 Color color;

	212 color.channels.b = static_cast<uint8_t>(clamp(b, 0, 255));

	213 color.channels.g = static_cast<uint8_t>(clamp(g, 0, 255));

	214 color.channels.r = static_cast<uint8_t>(clamp(r, 0, 255));

	215 return color;

	216 }

	217

	218 /**

	219 * Calculates the error metric for two colors. A small error signals that the

	220 * colors are similar to each other, a large error the signals the opposite.

	221 */

	222 inline uint32_t GetColorError(const Color& u, const Color& v) {

	223 #ifdef USE_PERCEIVED_ERROR_METRIC

	224 float delta_b = static_cast<float>(u.channels.b) - v.channels.b;

	225 float delta_g = static_cast<float>(u.channels.g) - v.channels.g;

	226 float delta_r = static_cast<float>(u.channels.r) - v.channels.r;

	227 return static_cast<uint32_t>(0.299f * delta_b * delta_b +

	228 0.587f * delta_g * delta_g +

	229 0.114f * delta_r * delta_r);

	230 #else

	231 int delta_b = static_cast<int>(u.channels.b) - v.channels.b;

	232 int delta_g = static_cast<int>(u.channels.g) - v.channels.g;

	233 int delta_r = static_cast<int>(u.channels.r) - v.channels.r;

	234 return delta_b * delta_b + delta_g * delta_g + delta_r * delta_r;

	235 #endif

	236 }

	237

	238 /**************************************** START OF SSE CODE
	adrian.belgun 2015/04/17 14:02:58 Use only one line here. Reduce number of stars. Use only one line here. Reduce number of stars.
	239 * ***************************************/

	240

	241 struct __sse_data {

	242 /* raw data */

	243 uint8_t* block;

	244 /* 8 bit packed values */

	245 __m128i* packed;

	246 /* 32 bit zero extended values - 4x4 arrays */

	247 __m128i* blue;

	248 __m128i* green;

	249 __m128i* red;

	250 // __m128i *alpha;

	251 };

	252

	253 /* commonly used registers */

	254 static const __m128i __sse_zero = _mm_set1_epi32(0);

	255 static const __m128i __sse_max_int = _mm_set1_epi32(0x7FFFFFFF);

	256

	257 inline __m128i AddAndClamp(const __m128i x, const __m128i y) {

	258 static const __m128i color_max = _mm_set1_epi32(0xFF);

	259 return _mm_max_epi32(__sse_zero,

	260 _mm_min_epi32(_mm_add_epi32(x, y), color_max));

	261 }

	262

	263 inline __m128i GetColorErrorSSE(const __m128i x, const __m128i y) {

	264 __m128i ret = _mm_sub_epi32(x, y);

	265 return _mm_mullo_epi32(ret, ret);

	266 }

	267

	268 inline __m128i AddChannelError(const __m128i x,

	269 const __m128i y,

	270 const __m128i z) {

	271 return _mm_add_epi32(x, _mm_add_epi32(y, z));

	272 }

	273 /*

	274 inline void ShuffleImm(__m128i src, __m128i dest, int size, uint8_t notimm) {

	275 switch(notimm) {

	276 case 0x1B:

	277 for (int i = 0; i < size; i++) {
	adrian.belgun 2015/04/17 14:02:58 Braces are optional for single-statement loops. Co Braces are optional for single-statement loops. Consider removing them for increased readability. http://google-styleguide.googlecode.com/svn/trunk/cppguide.html#Loops_and_Swi...
	278 dest[i] = _mm_shuffle_epi32(src[i], 0x1B);

	279 }

	280 break;

	281 case 0x4E:

	282 for (int i = 0; i < size; i++) {

	283 dest[i] = _mm_shuffle_epi32(src[i], 0x4E);

	284 }

	285 break;

	286 case 0xB1:

	287 for (int i = 0; i < size; i++) {

	288 dest[i] = _mm_shuffle_epi32(src[i], 0xB1);

	289 }

	290 break;

	291 case 0xE4:

	292 for (int i = 0; i < size; i++) {

	293 dest[i] = _mm_shuffle_epi32(src[i], 0xE4);

	294 }

	295 break;

	296 default:

	297 for (int i = 0; i < size; i++) {

	298 dest[i] = src[i];

	299 }

	300 };

	301 }

	302 */

	303 inline uint32_t GetVerticalError(const __sse_data* data,

	304 const __m128i* blue_avg,

	305 const __m128i* green_avg,

	306 const __m128i* red_avg) {

	307 __m128i error = __sse_zero;

	308

	309 #pragma unroll

	310 for (int i = 0; i < 4; i++) {

	311 error = _mm_add_epi32(error, GetColorErrorSSE(data->blue[i], blue_avg[0]));

	312 error =

	313 _mm_add_epi32(error, GetColorErrorSSE(data->green[i], green_avg[0]));

	314 error = _mm_add_epi32(error, GetColorErrorSSE(data->red[i], red_avg[0]));

	315 }

	316

	317 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E));

	318 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0xB1));

	319

	320 return _mm_cvtsi128_si32(error);

	321 }

	322

	323 inline uint32_t GetHorizontalError(const __sse_data* data,

	324 const __m128i* blue_avg,

	325 const __m128i* green_avg,

	326 const __m128i* red_avg) {

	327 __m128i error = __sse_zero;

	328 int first_index, second_index;

	329

	330 #pragma unroll

	331 for (int i = 0; i < 2; i++) {

	332 first_index = 2 * i;

	333 second_index = first_index + 1;

	334

	335 error = _mm_add_epi32(

	336 error, GetColorErrorSSE(data->blue[first_index], blue_avg[i]));

	337 error = _mm_add_epi32(

	338 error, GetColorErrorSSE(data->blue[second_index], blue_avg[i]));

	339 error = _mm_add_epi32(

	340 error, GetColorErrorSSE(data->green[first_index], green_avg[i]));

	341 error = _mm_add_epi32(

	342 error, GetColorErrorSSE(data->green[second_index], green_avg[i]));

	343 error = _mm_add_epi32(error,

	344 GetColorErrorSSE(data->red[first_index], red_avg[i]));

	345 error = _mm_add_epi32(

	346 error, GetColorErrorSSE(data->red[second_index], red_avg[i]));

	347 }

	348

	349 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E));

	350 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0xB1));

	351 return _mm_cvtsi128_si32(error);

	352 }

	353

	354 inline void GetAvgColors(const __sse_data* data,

	355 float* output,

	356 bool* __sse_use_diff) {

	357 __m128i sum[2], tmp;

	358

	359 // TODO(radu.velea): _mm_avg_epu8 on packed data maybe

	360

	361 /* get avg red */

	362 /* [S0 S0 S1 S1] */

	363 sum[0] = _mm_add_epi32(data->red[0], data->red[1]);

	364 sum[0] = _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0xB1));

	365

	366 /* [S2 S2 S3 S3] */

	367 sum[1] = _mm_add_epi32(data->red[2], data->red[3]);

	368 sum[1] = _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0xB1));

	369

	370 float hred[2], vred[2];

	371 hred[0] = (_mm_cvtsi128_si32(

	372 _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0x4E)))) /

	373 8.0f;

	374 hred[1] = (_mm_cvtsi128_si32(

	375 _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0x4E)))) /

	376 8.0f;

	377

	378 tmp = _mm_add_epi32(sum[0], sum[1]);

	379 vred[0] = (_mm_cvtsi128_si32(tmp)) / 8.0f;

	380 vred[1] = (_mm_extract_epi32(tmp, 2)) / 8.0f;

	381

	382 /* get avg green */

	383 /* [S0 S0 S1 S1] */

	384 sum[0] = _mm_add_epi32(data->green[0], data->green[1]);

	385 sum[0] = _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0xB1));

	386

	387 /* [S2 S2 S3 S3] */

	388 sum[1] = _mm_add_epi32(data->green[2], data->green[3]);

	389 sum[1] = _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0xB1));

	390

	391 float hgreen[2], vgreen[2];

	392 hgreen[0] = (_mm_cvtsi128_si32(

	393 _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0x4E)))) /

	394 8.0f;

	395 hgreen[1] = (_mm_cvtsi128_si32(

	396 _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0x4E)))) /

	397 8.0f;

	398

	399 tmp = _mm_add_epi32(sum[0], sum[1]);

	400 vgreen[0] = (_mm_cvtsi128_si32(tmp)) / 8.0f;

	401 vgreen[1] = (_mm_extract_epi32(tmp, 2)) / 8.0f;

	402

	403 /* get avg blue */

	404 /* [S0 S0 S1 S1] */

	405 sum[0] = _mm_add_epi32(data->blue[0], data->blue[1]);

	406 sum[0] = _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0xB1));

	407

	408 /* [S2 S2 S3 S3] */

	409 sum[1] = _mm_add_epi32(data->blue[2], data->blue[3]);

	410 sum[1] = _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0xB1));

	411

	412 float hblue[2], vblue[2];

	413 hblue[0] = (_mm_cvtsi128_si32(

	414 _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0x4E)))) /

	415 8.0f;

	416 hblue[1] = (_mm_cvtsi128_si32(

	417 _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0x4E)))) /

	418 8.0f;

	419

	420 tmp = _mm_add_epi32(sum[0], sum[1]);

	421 vblue[0] = (_mm_cvtsi128_si32(tmp)) / 8.0f;

	422 vblue[1] = (_mm_extract_epi32(tmp, 2)) / 8.0f;

	423

	424 /* TODO(radu.velea): return int's instead of floats */

	425 output[0] = vblue[0];

	426 output[1] = vgreen[0];

	427 output[2] = vred[0];

	428

	429 output[3] = vblue[1];

	430 output[4] = vgreen[1];

	431 output[5] = vred[1];

	432

	433 output[6] = hblue[0];

	434 output[7] = hgreen[0];

	435 output[8] = hred[0];

	436

	437 output[9] = hblue[1];

	438 output[10] = hgreen[1];

	439 output[11] = hred[1];

	440

	441 __m128i threashhold_upper = _mm_set1_epi32(3);

	442 __m128i threashhold_lower = _mm_set1_epi32(-4);

	443

	444 __m128 factor_v = _mm_set1_ps(31.0f / 255.0f);

	445 __m128 rounding_v = _mm_set1_ps(0.5f);

	446 __m128 h_avg_0 = _mm_set_ps(hblue[0], hgreen[0], hred[0], 0);

	447 __m128 h_avg_1 = _mm_set_ps(hblue[1], hgreen[1], hred[1], 0);

	448

	449 __m128 v_avg_0 = _mm_set_ps(vblue[0], vgreen[0], vred[0], 0);

	450 __m128 v_avg_1 = _mm_set_ps(vblue[1], vgreen[1], vred[1], 0);

	451

	452 h_avg_0 = _mm_mul_ps(h_avg_0, factor_v);

	453 h_avg_1 = _mm_mul_ps(h_avg_1, factor_v);

	454 v_avg_0 = _mm_mul_ps(v_avg_0, factor_v);

	455 v_avg_1 = _mm_mul_ps(v_avg_1, factor_v);

	456

	457 h_avg_0 = _mm_add_ps(h_avg_0, rounding_v);

	458 h_avg_1 = _mm_add_ps(h_avg_1, rounding_v);

	459 v_avg_0 = _mm_add_ps(v_avg_0, rounding_v);

	460 v_avg_1 = _mm_add_ps(v_avg_1, rounding_v);

	461

	462 __m128i h_avg_0i = _mm_cvttps_epi32(h_avg_0);

	463 __m128i h_avg_1i = _mm_cvttps_epi32(h_avg_1);

	464

	465 __m128i v_avg_0i = _mm_cvttps_epi32(v_avg_0);

	466 __m128i v_avg_1i = _mm_cvttps_epi32(v_avg_1);

	467

	468 h_avg_0i = _mm_sub_epi32(h_avg_1i, h_avg_0i);

	469 v_avg_0i = _mm_sub_epi32(v_avg_1i, v_avg_0i);

	470

	471 __sse_use_diff[0] =

	472 (0 == _mm_movemask_epi8(_mm_cmplt_epi32(v_avg_0i, threashhold_lower)));

	473 __sse_use_diff[0] &=

	474 (0 == _mm_movemask_epi8(_mm_cmpgt_epi32(v_avg_0i, threashhold_upper)));

	475

	476 __sse_use_diff[1] =

	477 (0 == _mm_movemask_epi8(_mm_cmplt_epi32(h_avg_0i, threashhold_lower)));

	478 __sse_use_diff[1] &=

	479 (0 == _mm_movemask_epi8(_mm_cmpgt_epi32(h_avg_0i, threashhold_upper)));

	480 }

	481

	482 void ComputeLuminanceSSE(uint8_t* block,

	483 const Color& base,

	484 const int sub_block_id,

	485 const uint8_t* idx_to_num_tab,

	486 const __sse_data* data) {

	487 uint8_t my_best_tbl_idx = 0;

	488 uint32_t my_best_error = 0x7FFFFFFF;

	489 uint8_t my_best_mod_idx[8][8]; // [table][texel]

	490

	491 const __m128i base_blue = _mm_set1_epi32(base.channels.b);

	492 const __m128i base_green = _mm_set1_epi32(base.channels.g);

	493 const __m128i base_red = _mm_set1_epi32(base.channels.r);

	494

	495 __m128i test_red, test_blue, test_green, tmp, tmp_blue, tmp_green, tmp_red;

	496 __m128i block_error;

	497

	498 /* this will have the minimum errors for each 4 pixels */

	499 __m128i first_half_min;

	500 __m128i second_half_min;

	501

	502 /* this will have the matching table index combo for each 4 pixels */

	503 __m128i first_half_pattern;

	504 __m128i second_half_pattern;

	505

	506 const __m128i first_blue_data_block = data->blue[2 * sub_block_id];

	507 const __m128i first_green_data_block = data->green[2 * sub_block_id];

	508 const __m128i first_red_data_block = data->red[2 * sub_block_id];

	509

	510 const __m128i second_blue_data_block = data->blue[2 * sub_block_id + 1];

	511 const __m128i second_green_data_block = data->green[2 * sub_block_id + 1];

	512 const __m128i second_red_data_block = data->red[2 * sub_block_id + 1];

	513

	514 uint32_t min;

	515

	516 #define ELEMENT_1 3, 2, 1, 0

	517 #define ELEMENT_2 7, 6, 5, 4

	518 #define ELEMENT_3 11, 10, 9, 8

	519 #define ELEMENT_4 15, 14, 13, 12

	520

	521 static const __m128i mask_extended[4] = {

	522 _mm_set_epi8(ELEMENT_1, ELEMENT_2, ELEMENT_3, ELEMENT_4),

	523 _mm_set_epi8(ELEMENT_2, ELEMENT_1, ELEMENT_4, ELEMENT_3),

	524 _mm_set_epi8(ELEMENT_3, ELEMENT_4, ELEMENT_1, ELEMENT_2),

	525 _mm_set_epi8(ELEMENT_4, ELEMENT_3, ELEMENT_2, ELEMENT_1)};

	526

	527 static const __m128i mask_imm[4] = {_mm_set1_epi32(0x1B),

	528 _mm_set1_epi32(0x4E),

	529 _mm_set1_epi32(0xB1),

	530 _mm_set1_epi32(0xE4)};

	531

	532 for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) {

	533 tmp = _mm_set_epi32(

	534 g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2],

	535 g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]);

	536

	537 test_blue = AddAndClamp(tmp, base_blue);

	538 test_green = AddAndClamp(tmp, base_green);

	539 test_red = AddAndClamp(tmp, base_red);

	540

	541 first_half_min = __sse_max_int;

	542 second_half_min = __sse_max_int;

	543

	544 first_half_pattern = __sse_zero;

	545 second_half_pattern = __sse_zero;

	546

	547 #pragma unroll

	548 for (int i = 0; i < 4; i++) {

	549 tmp_blue = _mm_shuffle_epi8(test_blue, mask_extended[i]);

	550 tmp_green = _mm_shuffle_epi8(test_green, mask_extended[i]);

	551 tmp_red = _mm_shuffle_epi8(test_red, mask_extended[i]);

	552

	553 block_error =

	554 AddChannelError(GetColorErrorSSE(tmp_blue, first_blue_data_block),

	555 GetColorErrorSSE(tmp_green, first_green_data_block),

	556 GetColorErrorSSE(tmp_red, first_red_data_block));

	557

	558 /* save winning pattern */

	559 first_half_pattern = _mm_max_epi32(

	560 first_half_pattern,

	561 _mm_and_si128(mask_imm[i],

	562 _mm_cmpgt_epi32(first_half_min, block_error)));

	563 first_half_min = _mm_min_epi32(first_half_min, block_error);

	564

	565 /* Second part of the block */

	566 block_error =

	567 AddChannelError(GetColorErrorSSE(tmp_blue, second_blue_data_block),

	568 GetColorErrorSSE(tmp_green, second_green_data_block),

	569 GetColorErrorSSE(tmp_red, second_red_data_block));

	570

	571 /* save winning pattern */

	572 second_half_pattern = _mm_max_epi32(

	573 second_half_pattern,

	574 _mm_and_si128(mask_imm[i],

	575 _mm_cmpgt_epi32(second_half_min, block_error)));

	576 second_half_min = _mm_min_epi32(second_half_min, block_error);

	577 }

	578

	579 first_half_min = _mm_add_epi32(first_half_min, second_half_min);

	580 first_half_min =

	581 _mm_add_epi32(first_half_min, _mm_shuffle_epi32(first_half_min, 0x4E));

	582 first_half_min =

	583 _mm_add_epi32(first_half_min, _mm_shuffle_epi32(first_half_min, 0xB1));

	584

	585 min = _mm_cvtsi128_si32(first_half_min);

	586

	587 if (min < my_best_error) {

	588 my_best_tbl_idx = tbl_idx;

	589 my_best_error = min;

	590 #if O3_OPTIMIZATION

	591 #pragma unroll

	592 for (int i = 0; i < 4; i++) {

	593 my_best_mod_idx[tbl_idx][i] =

	594 (_mm_extract_epi32(first_half_pattern, i) >> (2 * i)) & 3;

	595 my_best_mod_idx[tbl_idx][i + 4] =

	596 (_mm_extract_epi32(second_half_pattern, i) >> (2 * i)) & 3;

	597 }

	598 #endif

	599 my_best_mod_idx[tbl_idx][0] =

	600 (_mm_extract_epi32(first_half_pattern, 0) >> (0)) & 3;

	601 my_best_mod_idx[tbl_idx][4] =

	602 (_mm_extract_epi32(second_half_pattern, 0) >> (0)) & 3;

	603

	604 my_best_mod_idx[tbl_idx][1] =

	605 (_mm_extract_epi32(first_half_pattern, 1) >> (2)) & 3;

	606 my_best_mod_idx[tbl_idx][5] =

	607 (_mm_extract_epi32(second_half_pattern, 1) >> (2)) & 3;

	608

	609 my_best_mod_idx[tbl_idx][2] =

	610 (_mm_extract_epi32(first_half_pattern, 2) >> (4)) & 3;

	611 my_best_mod_idx[tbl_idx][6] =

	612 (_mm_extract_epi32(second_half_pattern, 2) >> (4)) & 3;

	613

	614 my_best_mod_idx[tbl_idx][3] =

	615 (_mm_extract_epi32(first_half_pattern, 3) >> (6)) & 3;

	616 my_best_mod_idx[tbl_idx][7] =

	617 (_mm_extract_epi32(second_half_pattern, 3) >> (6)) & 3;

	618

	619 if (my_best_error == 0) {

	620 break;

	621 }

	622 }

	623 }

	624

	625 WriteCodewordTable(block, sub_block_id, my_best_tbl_idx);

	626

	627 uint32_t pix_data = 0;

	628 uint8_t mod_idx;

	629 uint8_t pix_idx;

	630 uint32_t lsb;

	631 uint32_t msb;

	632 int texel_num;

	633

	634 for (unsigned int i = 0; i < 8; ++i) {

	635 mod_idx = my_best_mod_idx[my_best_tbl_idx][i];

	636 pix_idx = g_mod_to_pix[mod_idx];

	637

	638 lsb = pix_idx & 0x1;

	639 msb = pix_idx >> 1;

	640

	641 // Obtain the texel number as specified in the standard.

	642 texel_num = idx_to_num_tab[i];

	643 pix_data \|= msb << (texel_num + 16);

	644 pix_data \|= lsb << (texel_num);

	645 }

	646

	647 WritePixelData(block, pix_data);

	648 }

	649

	650 void CompressBlock(uint8_t* dst, __sse_data* data) {

	651 /* first 3 vertical 1, seconds 3 vertical 2, third 3 horizontal 1, last 3

	652 * horizontal 2 */

	653 float __sse_avg_colors[12] = {

	654 0,

	655 };

	656 bool use_differential[2] = {true, true};

	657 GetAvgColors(data, __sse_avg_colors, use_differential);

	658 Color sub_block_avg[4];

	659

	660 /* TODO(radu.velea): remove floating point operations and use only int's +

	661 * normal

	662 * rounding and shifts */

	663 for (int i = 0, j = 1; i < 4; i += 2, j += 2) {

	664 if (use_differential[i / 2] == false) {

	665 sub_block_avg[i] = MakeColor444(&__sse_avg_colors[i * 3]);

	666 sub_block_avg[j] = MakeColor444(&__sse_avg_colors[j * 3]);

	667 } else {

	668 sub_block_avg[i] = MakeColor555(&__sse_avg_colors[i * 3]);

	669 sub_block_avg[j] = MakeColor555(&__sse_avg_colors[j * 3]);

	670 }

	671 }

	672

	673 __m128i red_avg[2], green_avg[2], blue_avg[2];

	674

	675 // TODO(radu.velea): perfect accuracy, maybe skip floating variables

	676 blue_avg[0] =

	677 _mm_set_epi32((int)__sse_avg_colors[3], (int)__sse_avg_colors[3],

	678 (int)__sse_avg_colors[0], (int)__sse_avg_colors[0]);

	679

	680 green_avg[0] =

	681 _mm_set_epi32((int)__sse_avg_colors[4], (int)__sse_avg_colors[4],

	682 (int)__sse_avg_colors[1], (int)__sse_avg_colors[1]);

	683

	684 red_avg[0] =

	685 _mm_set_epi32((int)__sse_avg_colors[5], (int)__sse_avg_colors[5],

	686 (int)__sse_avg_colors[2], (int)__sse_avg_colors[2]);

	687

	688 uint32_t vertical_error =

	689 GetVerticalError(data, blue_avg, green_avg, red_avg);

	690

	691 // TODO(radu.velea): perfect accuracy, maybe skip floating variables

	692 blue_avg[0] = _mm_set1_epi32((int)__sse_avg_colors[6]);

	693 blue_avg[1] = _mm_set1_epi32((int)__sse_avg_colors[9]);

	694

	695 green_avg[0] = _mm_set1_epi32((int)__sse_avg_colors[7]);

	696 green_avg[1] = _mm_set1_epi32((int)__sse_avg_colors[10]);

	697

	698 red_avg[0] = _mm_set1_epi32((int)__sse_avg_colors[8]);

	699 red_avg[1] = _mm_set1_epi32((int)__sse_avg_colors[11]);

	700

	701 uint32_t horizontal_error =

	702 GetHorizontalError(data, blue_avg, green_avg, red_avg);

	703

	704 bool flip = horizontal_error < vertical_error;

	705

	706 // Clear destination buffer so that we can "or" in the results.

	707 memset(dst, 0, 8);

	708

	709 WriteDiff(dst, use_differential[!!flip]);

	710 WriteFlip(dst, flip);

	711

	712 uint8_t sub_block_off_0 = flip ? 2 : 0;

	713 uint8_t sub_block_off_1 = sub_block_off_0 + 1;

	714

	715 if (use_differential[!!flip]) {

	716 WriteColors555(dst, sub_block_avg[sub_block_off_0],

	717 sub_block_avg[sub_block_off_1]);

	718 } else {

	719 WriteColors444(dst, sub_block_avg[sub_block_off_0],

	720 sub_block_avg[sub_block_off_1]);

	721 }

	722

	723 if (flip == false) {

	724 /* transpose vertical data into horizontal lines */

	725 __m128i tmp;

	726 #pragma unroll

	727 for (int i = 0; i < 4; i += 2) {

	728 tmp = data->blue[i];

	729 data->blue[i] = _mm_add_epi32(

	730 _mm_move_epi64(data->blue[i]),

	731 _mm_shuffle_epi32(_mm_move_epi64(data->blue[i + 1]), 0x4E));

	732 data->blue[i + 1] = _mm_add_epi32(

	733 _mm_move_epi64(_mm_shuffle_epi32(tmp, 0x4E)),

	734 _mm_shuffle_epi32(

	735 _mm_move_epi64(_mm_shuffle_epi32(data->blue[i + 1], 0x4E)),

	736 0x4E));

	737

	738 tmp = data->green[i];

	739 data->green[i] = _mm_add_epi32(

	740 _mm_move_epi64(data->green[i]),

	741 _mm_shuffle_epi32(_mm_move_epi64(data->green[i + 1]), 0x4E));

	742 data->green[i + 1] = _mm_add_epi32(

	743 _mm_move_epi64(_mm_shuffle_epi32(tmp, 0x4E)),

	744 _mm_shuffle_epi32(

	745 _mm_move_epi64(_mm_shuffle_epi32(data->green[i + 1], 0x4E)),

	746 0x4E));

	747

	748 tmp = data->red[i];

	749 data->red[i] = _mm_add_epi32(

	750 _mm_move_epi64(data->red[i]),

	751 _mm_shuffle_epi32(_mm_move_epi64(data->red[i + 1]), 0x4E));

	752 data->red[i + 1] = _mm_add_epi32(

	753 _mm_move_epi64(_mm_shuffle_epi32(tmp, 0x4E)),

	754 _mm_shuffle_epi32(

	755 _mm_move_epi64(_mm_shuffle_epi32(data->red[i + 1], 0x4E)), 0x4E));

	756 }

	757

	758 tmp = data->blue[1];

	759 data->blue[1] = data->blue[2];

	760 data->blue[2] = tmp;

	761

	762 tmp = data->green[1];

	763 data->green[1] = data->green[2];

	764 data->green[2] = tmp;

	765

	766 tmp = data->red[1];

	767 data->red[1] = data->red[2];

	768 data->red[2] = tmp;

	769 }

	770

	771 // Compute luminance for the first sub block.

	772 ComputeLuminanceSSE(dst, sub_block_avg[sub_block_off_0], 0,

	773 g_idx_to_num[sub_block_off_0], data);

	774 // Compute luminance for the second sub block.

	775 ComputeLuminanceSSE(dst, sub_block_avg[sub_block_off_1], 1,

	776 g_idx_to_num[sub_block_off_1], data);

	777 }

	778

	779 static void LegacyExtractBlock(uint8_t* dst, const uint8_t* src, int width) {

	780 for (int j = 0; j < 4; ++j) {

	781 memcpy(&dst[j * 4 * 4], src, 4 * 4);

	782 src += width * 4;

	783 }

	784 }

	785

	786 inline void TransposeBlock(uint8_t* block, __m128i* transposed /* [4] */) {

	787 __m128i tmp3, tmp2, tmp1, tmp0;

	788
	adrian.belgun 2015/04/17 14:02:58 I think something went wrong with 'git cl format' I think something went wrong with 'git cl format' in this function. Please review your comments in the source file.
	789 transposed[0] = _mm_loadu_si128((__m128i*)(block)); // a0,a1,a2,...a7, ...a15

	790 transposed[1] =

	791 _mm_loadu_si128((__m128i*)(block + 16)); // b0, b1,b2,...b7.... b15

	792 transposed[2] =

	793 _mm_loadu_si128((__m128i*)(block + 32)); // c0, c1,c2,...c7....c15

	794 transposed[3] =

	795 _mm_loadu_si128((__m128i*)(block + 48)); // d0,d1,d2,...d7....d15

	796

	797 tmp0 = _mm_unpacklo_epi8(

	798 transposed[0], transposed[1]); // a0,b0, a1,b1, a2,b2, a3,b3,....a7,b7

	799 tmp1 = _mm_unpacklo_epi8(

	800 transposed[2], transposed[3]); // c0,d0, c1,d1, c2,d2, c3,d3,... c7,d7

	801 tmp2 = _mm_unpackhi_epi8(

	802 transposed[0],

	803 transposed[1]); // a8,b8, a9,b9, a10,b10, a11,b11,...a15,b15

	804 tmp3 = _mm_unpackhi_epi8(

	805 transposed[2],

	806 transposed[3]); // c8,d8, c9,d9, c10,d10, c11,d11,...c15,d15

	807

	808 transposed[0] = _mm_unpacklo_epi8(

	809 tmp0, tmp2); // a0,a8, b0,b8, a1,a9, b1,b9, ....a3,a11, b3,b11

	810 transposed[1] = _mm_unpackhi_epi8(

	811 tmp0, tmp2); // a4,a12, b4,b12, a5,a13, b5,b13,....a7,a15,b7,b15

	812 transposed[2] =

	813 _mm_unpacklo_epi8(tmp1, tmp3); // c0,c8, d0,d8, c1,c9, d1,d9.....d3,d11

	814 transposed[3] = _mm_unpackhi_epi8(

	815 tmp1, tmp3); // c4,c12,d4,d12, c5,c13, d5,d13,....d7,d15

	816

	817 tmp0 = _mm_unpacklo_epi32(transposed[0], transposed[2]); // a0,a8, b0,b8,

	818 // c0,c8, d0,d8,

	819 // a1,a9, b1,b9,

	820 // c1,c9, d1,d9

	821 tmp1 = _mm_unpackhi_epi32(transposed[0], transposed[2]); // a2,a10, b2,b10,

	822 // c2,c10, d2,d10,

	823 // a3,a11, b3,b11,

	824 // c3,c11, d3,d11

	825 tmp2 = _mm_unpacklo_epi32(transposed[1], transposed[3]); // a4,a12, b4,b12,

	826 // c4,c12, d4,d12,

	827 // a5,a13, b5,b13,

	828 // c5,c13, d5,d13,

	829 tmp3 = _mm_unpackhi_epi32(transposed[1],

	830 transposed[3]); // a6,a14, b6,b14, c6,c14, d6,d14,

	831 // a7,a15,b7,b15,c7,c15,d7,d15

	832

	833 transposed[0] = _mm_unpacklo_epi8(tmp0, tmp2); // a0,a4, a8, a12, b0,b4,

	834 // b8,b12, c0,c4, c8, c12,

	835 // d0,d4, d8, d12

	836 transposed[1] = _mm_unpackhi_epi8(tmp0, tmp2); // a1,a5, a9, a13, b1,b5,

	837 // b9,b13, c1,c5, c9, c13,

	838 // d1,d5, d9, d13

	839 transposed[2] = _mm_unpacklo_epi8(tmp1, tmp3); // a2,a6, a10,a14, b2,b6,

	840 // b10,b14, c2,c6, c10,c14,

	841 // d2,d6, d10,d14

	842 transposed[3] = _mm_unpackhi_epi8(tmp1, tmp3); // a3,a7, a11,a15, b3,b7,

	843 // b11,b15, c3,c7, c11,c15,

	844 // d3,d7, d11,d15

	845 }

	846

	847 inline void UnpackBlock(__m128i* packed,

	848 __m128i* red,

	849 __m128i* green,

	850 __m128i* blue,

	851 __m128i* alpha) {

	852 const __m128i zero = _mm_set1_epi8(0);

	853 __m128i tmp_low, tmp_high;

	854

	855 /* unpack red */

	856 tmp_low = _mm_unpacklo_epi8(packed[0], zero);

	857 tmp_high = _mm_unpackhi_epi8(packed[0], zero);

	858

	859 red[0] = _mm_unpacklo_epi16(tmp_low, zero);

	860 red[1] = _mm_unpackhi_epi16(tmp_low, zero);

	861

	862 red[2] = _mm_unpacklo_epi16(tmp_high, zero);

	863 red[3] = _mm_unpackhi_epi16(tmp_high, zero);

	864

	865 /* unpack green */

	866 tmp_low = _mm_unpacklo_epi8(packed[1], zero);

	867 tmp_high = _mm_unpackhi_epi8(packed[1], zero);

	868

	869 green[0] = _mm_unpacklo_epi16(tmp_low, zero);

	870 green[1] = _mm_unpackhi_epi16(tmp_low, zero);

	871

	872 green[2] = _mm_unpacklo_epi16(tmp_high, zero);

	873 green[3] = _mm_unpackhi_epi16(tmp_high, zero);

	874

	875 /* unpack blue */

	876 tmp_low = _mm_unpacklo_epi8(packed[2], zero);

	877 tmp_high = _mm_unpackhi_epi8(packed[2], zero);

	878

	879 blue[0] = _mm_unpacklo_epi16(tmp_low, zero);

	880 blue[1] = _mm_unpackhi_epi16(tmp_low, zero);

	881

	882 blue[2] = _mm_unpacklo_epi16(tmp_high, zero);

	883 blue[3] = _mm_unpackhi_epi16(tmp_high, zero);

	884

	885 /* unpack alpha */

	886 tmp_low = _mm_unpacklo_epi8(packed[3], zero);

	887 tmp_high = _mm_unpackhi_epi8(packed[3], zero);

	888

	889 alpha[0] = _mm_unpacklo_epi16(tmp_low, zero);

	890 alpha[1] = _mm_unpackhi_epi16(tmp_low, zero);

	891

	892 alpha[2] = _mm_unpacklo_epi16(tmp_high, zero);

	893 alpha[3] = _mm_unpackhi_epi16(tmp_high, zero);

	894 }

	895

	896 inline int BlockIsConstant(const uint8_t* block, const __m128i* transposed) {

	897 __m128i first = _mm_set1_epi8(block[0]);

	898 first = _mm_cmpeq_epi8(transposed[0], first);

	899 if (_mm_movemask_epi8(first) != 0xFFFF) {

	900 return 0;

	901 }

	902

	903 first = _mm_set1_epi8(block[1]);

	904 first = _mm_cmpeq_epi8(transposed[1], first);

	905

	906 if (_mm_movemask_epi8(first) != 0xFFFF) {

	907 return 0;

	908 }

	909

	910 first = _mm_set1_epi8(block[2]);

	911 first = _mm_cmpeq_epi8(transposed[2], first);

	912

	913 if (_mm_movemask_epi8(first) != 0xFFFF) {

	914 return 0;

	915 }

	916

	917 return 1;

	918 }

	919

	920 inline void CompressSolid(uint8_t* dst, uint8_t* block) {

	921 // Clear destination buffer so that we can "or" in the results.

	922 memset(dst, 0, 8);

	923

	924 float src_color_float[3] = {static_cast<float>(block[0]),

	925 static_cast<float>(block[1]),

	926 static_cast<float>(block[2])};

	927 Color base = MakeColor555(src_color_float);

	928 Color constant;

	929 constant.channels.b = block[0];

	930 constant.channels.g = block[1];

	931 constant.channels.r = block[2];

	932

	933 WriteDiff(dst, true);

	934 WriteFlip(dst, false);

	935 WriteColors555(dst, base, base);

	936

	937 uint8_t best_tbl_idx = 0;

	938 uint8_t best_mod_idx = 0;

	939 uint32_t best_mod_err = std::numeric_limits<uint32_t>::max();

	940

	941 // Try all codeword tables to find the one giving the best results for this

	942 // block.

	943 for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) {

	944 // Try all modifiers in the current table to find which one gives the

	945 // smallest error.

	946 for (unsigned int mod_idx = 0; mod_idx < 4; ++mod_idx) {

	947 int16_t lum = g_codeword_tables[tbl_idx][mod_idx];

	948 const Color& color = MakeColor(base, lum);

	949

	950 uint32_t mod_err = GetColorError(constant, color);

	951 if (mod_err < best_mod_err) {

	952 best_tbl_idx = tbl_idx;

	953 best_mod_idx = mod_idx;

	954 best_mod_err = mod_err;

	955

	956 if (mod_err == 0)

	957 break; // We cannot do any better than this.

	958 }

	959 }

	960

	961 if (best_mod_err == 0)

	962 break;

	963 }

	964

	965 WriteCodewordTable(dst, 0, best_tbl_idx);

	966 WriteCodewordTable(dst, 1, best_tbl_idx);

	967

	968 uint8_t pix_idx = g_mod_to_pix[best_mod_idx];

	969 uint32_t lsb = pix_idx & 0x1;

	970 uint32_t msb = pix_idx >> 1;

	971

	972 uint32_t pix_data = 0;

	973 for (unsigned int i = 0; i < 2; ++i) {

	974 for (unsigned int j = 0; j < 8; ++j) {

	975 // Obtain the texel number as specified in the standard.

	976 int texel_num = g_idx_to_num[i][j];

	977 pix_data \|= msb << (texel_num + 16);

	978 pix_data \|= lsb << (texel_num);

	979 }

	980 }

	981

	982 WritePixelData(dst, pix_data);

	983 }

	984

	985 } // namespace

	986

	987 namespace cc {

	988

	989 void TextureCompressorETC1_SSE::Compress(const uint8_t* src,

	990 uint8_t* dst,

	991 int width,

	992 int height,

	993 Quality quality) {

	994 DCHECK(width >= 4 && (width & 3) == 0);

	995 DCHECK(height >= 4 && (height & 3) == 0);

	996

	997 uint8_t block[64] __attribute__((aligned(16)));

	998 __m128i packed[4];

	999 __m128i red[4], green[4], blue[4], alpha[4];

	1000 __sse_data data;

	1001

	1002 for (int y = 0; y < height; y += 4, src += width * 4 * 4) {

	1003 for (int x = 0; x < width; x += 4, dst += 8) {

	1004 /* SSE */

	1005 LegacyExtractBlock(block, src + x * 4, width);

	1006 TransposeBlock(block, packed);

	1007 if (BlockIsConstant(block, packed) == 1) {

	1008 /* TODO(radu.velea): handle constant blocks in SSE */

	1009 CompressSolid(dst, block);

	1010 } else {

	1011 UnpackBlock(packed, blue, green, red, alpha);

	1012

	1013 data.block = block;

	1014 data.packed = packed;

	1015 data.red = red;

	1016 data.blue = blue;

	1017 data.green = green;

	1018

	1019 CompressBlock(dst, &data);

	1020 }

	1021 }

	1022 }

	1023 }

	1024

	1025 } // namespace cc

OLD	NEW

« cc/resources/texture_compressor.cc ('K') | « cc/resources/texture_compressor_etc1_sse.h ('k') | no next file » | no next file with comments »