| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "cc/raster/texture_compressor_etc1_sse.h" | 5 #include "cc/raster/texture_compressor_etc1_sse.h" |
| 6 | 6 |
| 7 #include <emmintrin.h> | 7 #include <emmintrin.h> |
| 8 | 8 |
| 9 #include "base/compiler_specific.h" | 9 #include "base/compiler_specific.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 // This is used to store raw data. | 31 // This is used to store raw data. |
| 32 uint8_t* block; | 32 uint8_t* block; |
| 33 // This is used to store 8 bit packed values. | 33 // This is used to store 8 bit packed values. |
| 34 __m128i* packed; | 34 __m128i* packed; |
| 35 // This is used to store 32 bit zero extended values into 4x4 arrays. | 35 // This is used to store 32 bit zero extended values into 4x4 arrays. |
| 36 __m128i* blue; | 36 __m128i* blue; |
| 37 __m128i* green; | 37 __m128i* green; |
| 38 __m128i* red; | 38 __m128i* red; |
| 39 }; | 39 }; |
| 40 | 40 |
| 41 // Commonly used registers throughout the code. | |
| 42 static const __m128i __sse_zero = _mm_set1_epi32(0); | |
| 43 static const __m128i __sse_max_int = _mm_set1_epi32(0x7FFFFFFF); | |
| 44 | |
| 45 inline __m128i AddAndClamp(const __m128i x, const __m128i y) { | 41 inline __m128i AddAndClamp(const __m128i x, const __m128i y) { |
| 46 static const __m128i color_max = _mm_set1_epi32(0xFF); | 42 static const __m128i color_max = _mm_set1_epi32(0xFF); |
| 47 return _mm_max_epi16(__sse_zero, | 43 return _mm_max_epi16(_mm_setzero_si128(), |
| 48 _mm_min_epi16(_mm_add_epi16(x, y), color_max)); | 44 _mm_min_epi16(_mm_add_epi16(x, y), color_max)); |
| 49 } | 45 } |
| 50 | 46 |
| 51 inline __m128i GetColorErrorSSE(const __m128i x, const __m128i y) { | 47 inline __m128i GetColorErrorSSE(const __m128i x, const __m128i y) { |
| 52 // Changed from _mm_mullo_epi32 (SSE4) to _mm_mullo_epi16 (SSE2). | 48 // Changed from _mm_mullo_epi32 (SSE4) to _mm_mullo_epi16 (SSE2). |
| 53 __m128i ret = _mm_sub_epi16(x, y); | 49 __m128i ret = _mm_sub_epi16(x, y); |
| 54 return _mm_mullo_epi16(ret, ret); | 50 return _mm_mullo_epi16(ret, ret); |
| 55 } | 51 } |
| 56 | 52 |
| 57 inline __m128i AddChannelError(const __m128i x, | 53 inline __m128i AddChannelError(const __m128i x, |
| 58 const __m128i y, | 54 const __m128i y, |
| 59 const __m128i z) { | 55 const __m128i z) { |
| 60 return _mm_add_epi32(x, _mm_add_epi32(y, z)); | 56 return _mm_add_epi32(x, _mm_add_epi32(y, z)); |
| 61 } | 57 } |
| 62 | 58 |
| 63 inline uint32_t SumSSE(const __m128i x) { | 59 inline uint32_t SumSSE(const __m128i x) { |
| 64 __m128i sum = _mm_add_epi32(x, _mm_shuffle_epi32(x, 0x4E)); | 60 __m128i sum = _mm_add_epi32(x, _mm_shuffle_epi32(x, 0x4E)); |
| 65 sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); | 61 sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); |
| 66 | 62 |
| 67 return _mm_cvtsi128_si32(sum); | 63 return _mm_cvtsi128_si32(sum); |
| 68 } | 64 } |
| 69 | 65 |
| 70 inline uint32_t GetVerticalError(const __sse_data* data, | 66 inline uint32_t GetVerticalError(const __sse_data* data, |
| 71 const __m128i* blue_avg, | 67 const __m128i* blue_avg, |
| 72 const __m128i* green_avg, | 68 const __m128i* green_avg, |
| 73 const __m128i* red_avg, | 69 const __m128i* red_avg, |
| 74 uint32_t* verror) { | 70 uint32_t* verror) { |
| 75 __m128i error = __sse_zero; | 71 __m128i error = _mm_setzero_si128(); |
| 76 | 72 |
| 77 for (int i = 0; i < 4; i++) { | 73 for (int i = 0; i < 4; i++) { |
| 78 error = _mm_add_epi32(error, GetColorErrorSSE(data->blue[i], blue_avg[0])); | 74 error = _mm_add_epi32(error, GetColorErrorSSE(data->blue[i], blue_avg[0])); |
| 79 error = | 75 error = |
| 80 _mm_add_epi32(error, GetColorErrorSSE(data->green[i], green_avg[0])); | 76 _mm_add_epi32(error, GetColorErrorSSE(data->green[i], green_avg[0])); |
| 81 error = _mm_add_epi32(error, GetColorErrorSSE(data->red[i], red_avg[0])); | 77 error = _mm_add_epi32(error, GetColorErrorSSE(data->red[i], red_avg[0])); |
| 82 } | 78 } |
| 83 | 79 |
| 84 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E)); | 80 error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E)); |
| 85 | 81 |
| 86 verror[0] = _mm_cvtsi128_si32(error); | 82 verror[0] = _mm_cvtsi128_si32(error); |
| 87 verror[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(error, 0xB1)); | 83 verror[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(error, 0xB1)); |
| 88 | 84 |
| 89 return verror[0] + verror[1]; | 85 return verror[0] + verror[1]; |
| 90 } | 86 } |
| 91 | 87 |
| 92 inline uint32_t GetHorizontalError(const __sse_data* data, | 88 inline uint32_t GetHorizontalError(const __sse_data* data, |
| 93 const __m128i* blue_avg, | 89 const __m128i* blue_avg, |
| 94 const __m128i* green_avg, | 90 const __m128i* green_avg, |
| 95 const __m128i* red_avg, | 91 const __m128i* red_avg, |
| 96 uint32_t* verror) { | 92 uint32_t* verror) { |
| 97 __m128i error = __sse_zero; | 93 __m128i error = _mm_setzero_si128(); |
| 98 int first_index, second_index; | 94 int first_index, second_index; |
| 99 | 95 |
| 100 for (int i = 0; i < 2; i++) { | 96 for (int i = 0; i < 2; i++) { |
| 101 first_index = 2 * i; | 97 first_index = 2 * i; |
| 102 second_index = first_index + 1; | 98 second_index = first_index + 1; |
| 103 | 99 |
| 104 error = _mm_add_epi32( | 100 error = _mm_add_epi32( |
| 105 error, GetColorErrorSSE(data->blue[first_index], blue_avg[i])); | 101 error, GetColorErrorSSE(data->blue[first_index], blue_avg[i])); |
| 106 error = _mm_add_epi32( | 102 error = _mm_add_epi32( |
| 107 error, GetColorErrorSSE(data->blue[second_index], blue_avg[i])); | 103 error, GetColorErrorSSE(data->blue[second_index], blue_avg[i])); |
| (...skipping 186 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 294 | 290 |
| 295 for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { | 291 for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { |
| 296 tmp = _mm_set_epi32( | 292 tmp = _mm_set_epi32( |
| 297 g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2], | 293 g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2], |
| 298 g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]); | 294 g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]); |
| 299 | 295 |
| 300 test_blue = AddAndClamp(tmp, base_blue); | 296 test_blue = AddAndClamp(tmp, base_blue); |
| 301 test_green = AddAndClamp(tmp, base_green); | 297 test_green = AddAndClamp(tmp, base_green); |
| 302 test_red = AddAndClamp(tmp, base_red); | 298 test_red = AddAndClamp(tmp, base_red); |
| 303 | 299 |
| 304 first_half_min = __sse_max_int; | 300 first_half_min = _mm_set1_epi32(0x7FFFFFFF); |
| 305 second_half_min = __sse_max_int; | 301 second_half_min = _mm_set1_epi32(0x7FFFFFFF); |
| 306 | 302 |
| 307 first_half_pattern = __sse_zero; | 303 first_half_pattern = _mm_setzero_si128(); |
| 308 second_half_pattern = __sse_zero; | 304 second_half_pattern = _mm_setzero_si128(); |
| 309 | 305 |
| 310 for (uint8_t imm8 : shuffle_mask) { | 306 for (uint8_t imm8 : shuffle_mask) { |
| 311 switch (imm8) { | 307 switch (imm8) { |
| 312 case 0x1B: | 308 case 0x1B: |
| 313 tmp_blue = _mm_shuffle_epi32(test_blue, 0x1B); | 309 tmp_blue = _mm_shuffle_epi32(test_blue, 0x1B); |
| 314 tmp_green = _mm_shuffle_epi32(test_green, 0x1B); | 310 tmp_green = _mm_shuffle_epi32(test_green, 0x1B); |
| 315 tmp_red = _mm_shuffle_epi32(test_red, 0x1B); | 311 tmp_red = _mm_shuffle_epi32(test_red, 0x1B); |
| 316 break; | 312 break; |
| 317 case 0x4E: | 313 case 0x4E: |
| 318 tmp_blue = _mm_shuffle_epi32(test_blue, 0x4E); | 314 tmp_blue = _mm_shuffle_epi32(test_blue, 0x4E); |
| (...skipping 493 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 812 data.blue = blue; | 808 data.blue = blue; |
| 813 data.green = green; | 809 data.green = green; |
| 814 | 810 |
| 815 CompressBlock(dst, &data); | 811 CompressBlock(dst, &data); |
| 816 } | 812 } |
| 817 } | 813 } |
| 818 } | 814 } |
| 819 } | 815 } |
| 820 | 816 |
| 821 } // namespace cc | 817 } // namespace cc |
| OLD | NEW |