OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "cc/raster/texture_compressor_etc1_sse.h" | 5 #include "cc/raster/texture_compressor_etc1_sse.h" |
6 | 6 |
7 #include <emmintrin.h> | 7 #include <emmintrin.h> |
8 | 8 |
9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 20 matching lines...) Expand all Loading... |
31 // This is used to store raw data. | 31 // This is used to store raw data. |
32 uint8_t* block; | 32 uint8_t* block; |
33 // This is used to store 8 bit packed values. | 33 // This is used to store 8 bit packed values. |
34 __m128i* packed; | 34 __m128i* packed; |
35 // This is used to store 32 bit zero extended values into 4x4 arrays. | 35 // This is used to store 32 bit zero extended values into 4x4 arrays. |
36 __m128i* blue; | 36 __m128i* blue; |
37 __m128i* green; | 37 __m128i* green; |
38 __m128i* red; | 38 __m128i* red; |
39 }; | 39 }; |
40 | 40 |
41 // Commonly used registers throughout the code. | |
42 static const __m128i __sse_zero = _mm_set1_epi32(0); | |
43 static const __m128i __sse_max_int = _mm_set1_epi32(0x7FFFFFFF); | |
44 | |
45 inline __m128i AddAndClamp(const __m128i x, const __m128i y) { | 41 inline __m128i AddAndClamp(const __m128i x, const __m128i y) { |
46 static const __m128i color_max = _mm_set1_epi32(0xFF); | 42 static const __m128i color_max = _mm_set1_epi32(0xFF); |
47 return _mm_max_epi16(__sse_zero, | 43 return _mm_max_epi16(_mm_setzero_si128(), |
48 _mm_min_epi16(_mm_add_epi16(x, y), color_max)); | 44 _mm_min_epi16(_mm_add_epi16(x, y), color_max)); |
49 } | 45 } |
50 | 46 |
51 inline __m128i GetColorErrorSSE(const __m128i x, const __m128i y) { | 47 inline __m128i GetColorErrorSSE(const __m128i x, const __m128i y) { |
52 // Changed from _mm_mullo_epi32 (SSE4) to _mm_mullo_epi16 (SSE2). | 48 // Changed from _mm_mullo_epi32 (SSE4) to _mm_mullo_epi16 (SSE2). |
53 __m128i ret = _mm_sub_epi16(x, y); | 49 __m128i ret = _mm_sub_epi16(x, y); |
54 return _mm_mullo_epi16(ret, ret); | 50 return _mm_mullo_epi16(ret, ret); |
55 } | 51 } |
56 | 52 |
57 inline __m128i AddChannelError(const __m128i x, | 53 inline __m128i AddChannelError(const __m128i x, |
58 const __m128i y, | 54 const __m128i y, |
59 const __m128i z) { | 55 const __m128i z) { |
60 return _mm_add_epi32(x, _mm_add_epi32(y, z)); | 56 return _mm_add_epi32(x, _mm_add_epi32(y, z)); |
61 } | 57 } |
62 | 58 |
63 inline uint32_t SumSSE(const __m128i x) { | 59 inline uint32_t SumSSE(const __m128i x) { |
64 __m128i sum = _mm_add_epi32(x, _mm_shuffle_epi32(x, 0x4E)); | 60 __m128i sum = _mm_add_epi32(x, _mm_shuffle_epi32(x, 0x4E)); |
65 sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); | 61 sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); |
66 | 62 |
67 return _mm_cvtsi128_si32(sum); | 63 return _mm_cvtsi128_si32(sum); |
68 } | 64 } |
69 | 65 |
70 inline uint32_t GetVerticalError(const __sse_data* data, | 66 inline uint32_t GetVerticalError(const __sse_data* data, |
71 const __m128i* blue_avg, | 67 const __m128i* blue_avg, |
72 const __m128i* green_avg, | 68 const __m128i* green_avg, |
73 const __m128i* red_avg, | 69 const __m128i* red_avg, |
74 uint32_t* verror) { | 70 uint32_t* verror) { |
75 __m128i error = __sse_zero; | 71 __m128i error = _mm_setzero_si128(); |
76 | 72 |
77 for (int i = 0; i < 4; i++) { | 73 for (int i = 0; i < 4; i++) { |
78 error = _mm_add_epi32(error, GetColorErrorSSE(data->blue[i], blue_avg[0])); | 74 error = _mm_add_epi32(error, GetColorErrorSSE(data->blue[i], blue_avg[0])); |
79 error = | 75 error = |
80 _mm_add_epi32(error, GetColorErrorSSE(data->green[i], green_avg[0])); | 76 _mm_add_epi32(error, GetColorErrorSSE(data->green[i], green_avg[0])); |
81 error = _mm_add_epi32(error, GetColorErrorSSE(data->red[i], red_avg[0])); | 77 error = _mm_add_epi32(error, GetColorErrorSSE(data->red[i], red_avg[0])); |
82 } | 78 } |
83 | 79 |
84 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E)); | 80 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E)); |
85 | 81 |
86 verror[0] = _mm_cvtsi128_si32(error); | 82 verror[0] = _mm_cvtsi128_si32(error); |
87 verror[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(error, 0xB1)); | 83 verror[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(error, 0xB1)); |
88 | 84 |
89 return verror[0] + verror[1]; | 85 return verror[0] + verror[1]; |
90 } | 86 } |
91 | 87 |
92 inline uint32_t GetHorizontalError(const __sse_data* data, | 88 inline uint32_t GetHorizontalError(const __sse_data* data, |
93 const __m128i* blue_avg, | 89 const __m128i* blue_avg, |
94 const __m128i* green_avg, | 90 const __m128i* green_avg, |
95 const __m128i* red_avg, | 91 const __m128i* red_avg, |
96 uint32_t* verror) { | 92 uint32_t* verror) { |
97 __m128i error = __sse_zero; | 93 __m128i error = _mm_setzero_si128(); |
98 int first_index, second_index; | 94 int first_index, second_index; |
99 | 95 |
100 for (int i = 0; i < 2; i++) { | 96 for (int i = 0; i < 2; i++) { |
101 first_index = 2 * i; | 97 first_index = 2 * i; |
102 second_index = first_index + 1; | 98 second_index = first_index + 1; |
103 | 99 |
104 error = _mm_add_epi32( | 100 error = _mm_add_epi32( |
105 error, GetColorErrorSSE(data->blue[first_index], blue_avg[i])); | 101 error, GetColorErrorSSE(data->blue[first_index], blue_avg[i])); |
106 error = _mm_add_epi32( | 102 error = _mm_add_epi32( |
107 error, GetColorErrorSSE(data->blue[second_index], blue_avg[i])); | 103 error, GetColorErrorSSE(data->blue[second_index], blue_avg[i])); |
(...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
294 | 290 |
295 for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { | 291 for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { |
296 tmp = _mm_set_epi32( | 292 tmp = _mm_set_epi32( |
297 g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2], | 293 g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2], |
298 g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]); | 294 g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]); |
299 | 295 |
300 test_blue = AddAndClamp(tmp, base_blue); | 296 test_blue = AddAndClamp(tmp, base_blue); |
301 test_green = AddAndClamp(tmp, base_green); | 297 test_green = AddAndClamp(tmp, base_green); |
302 test_red = AddAndClamp(tmp, base_red); | 298 test_red = AddAndClamp(tmp, base_red); |
303 | 299 |
304 first_half_min = __sse_max_int; | 300 first_half_min = _mm_set1_epi32(0x7FFFFFFF); |
305 second_half_min = __sse_max_int; | 301 second_half_min = _mm_set1_epi32(0x7FFFFFFF); |
306 | 302 |
307 first_half_pattern = __sse_zero; | 303 first_half_pattern = _mm_setzero_si128(); |
308 second_half_pattern = __sse_zero; | 304 second_half_pattern = _mm_setzero_si128(); |
309 | 305 |
310 for (uint8_t imm8 : shuffle_mask) { | 306 for (uint8_t imm8 : shuffle_mask) { |
311 switch (imm8) { | 307 switch (imm8) { |
312 case 0x1B: | 308 case 0x1B: |
313 tmp_blue = _mm_shuffle_epi32(test_blue, 0x1B); | 309 tmp_blue = _mm_shuffle_epi32(test_blue, 0x1B); |
314 tmp_green = _mm_shuffle_epi32(test_green, 0x1B); | 310 tmp_green = _mm_shuffle_epi32(test_green, 0x1B); |
315 tmp_red = _mm_shuffle_epi32(test_red, 0x1B); | 311 tmp_red = _mm_shuffle_epi32(test_red, 0x1B); |
316 break; | 312 break; |
317 case 0x4E: | 313 case 0x4E: |
318 tmp_blue = _mm_shuffle_epi32(test_blue, 0x4E); | 314 tmp_blue = _mm_shuffle_epi32(test_blue, 0x4E); |
(...skipping 493 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
812 data.blue = blue; | 808 data.blue = blue; |
813 data.green = green; | 809 data.green = green; |
814 | 810 |
815 CompressBlock(dst, &data); | 811 CompressBlock(dst, &data); |
816 } | 812 } |
817 } | 813 } |
818 } | 814 } |
819 } | 815 } |
820 | 816 |
821 } // namespace cc | 817 } // namespace cc |
OLD | NEW |