| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <stdint.h> |
| 6 |
| 5 #include "build/build_config.h" | 7 #include "build/build_config.h" |
| 6 #include "media/base/simd/convert_rgb_to_yuv.h" | 8 #include "media/base/simd/convert_rgb_to_yuv.h" |
| 7 | 9 |
| 8 #if defined(COMPILER_MSVC) | 10 #if defined(COMPILER_MSVC) |
| 9 #include <intrin.h> | 11 #include <intrin.h> |
| 10 #else | 12 #else |
| 11 #include <mmintrin.h> | 13 #include <mmintrin.h> |
| 12 #include <emmintrin.h> | 14 #include <emmintrin.h> |
| 13 #endif | 15 #endif |
| 14 | 16 |
| 15 #if defined(COMPILER_MSVC) | 17 #if defined(COMPILER_MSVC) |
| 16 #define SIMD_ALIGNED(var) __declspec(align(16)) var | 18 #define SIMD_ALIGNED(var) __declspec(align(16)) var |
| 17 #else | 19 #else |
| 18 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) | 20 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) |
| 19 #endif | 21 #endif |
| 20 | 22 |
| 21 namespace media { | 23 namespace media { |
| 22 | 24 |
| 23 #define FIX_SHIFT 12 | 25 #define FIX_SHIFT 12 |
| 24 #define FIX(x) ((x) * (1 << FIX_SHIFT)) | 26 #define FIX(x) ((x) * (1 << FIX_SHIFT)) |
| 25 | 27 |
| 26 // Define a convenient macro to do static cast. | 28 // Define a convenient macro to do static cast. |
| 27 #define INT16_FIX(x) static_cast<int16>(FIX(x)) | 29 #define INT16_FIX(x) static_cast<int16_t>(FIX(x)) |
| 28 | 30 |
| 29 // Android's pixel layout is RGBA, while other platforms | 31 // Android's pixel layout is RGBA, while other platforms |
| 30 // are BGRA. | 32 // are BGRA. |
| 31 #if defined(OS_ANDROID) | 33 #if defined(OS_ANDROID) |
| 32 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { | 34 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { |
| 33 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, | 35 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, |
| 34 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, | 36 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, |
| 35 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, | 37 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, |
| 36 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, | 38 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, |
| 37 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, | 39 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, |
| 38 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, | 40 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, |
| 39 }; | 41 }; |
| 40 #else | 42 #else |
| 41 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { | 43 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { |
| 42 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, | 44 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, |
| 43 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, | 45 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, |
| 44 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, | 46 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, |
| 45 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, | 47 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, |
| 46 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, | 48 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, |
| 47 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, | 49 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, |
| 48 }; | 50 }; |
| 49 #endif | 51 #endif |
| 50 | 52 |
| 51 #undef INT16_FIX | 53 #undef INT16_FIX |
| 52 | 54 |
| 53 // This is the final offset for the conversion from signed yuv values to | 55 // This is the final offset for the conversion from signed yuv values to |
| 54 // unsigned values. It is arranged so that offset of 16 is applied to Y | 56 // unsigned values. It is arranged so that offset of 16 is applied to Y |
| 55 // components and 128 is added to UV components for 2 pixels. | 57 // components and 128 is added to UV components for 2 pixels. |
| 56 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16}; | 58 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16}; |
| 57 | 59 |
| 58 static inline uint8 Clamp(int value) { | 60 static inline uint8_t Clamp(int value) { |
| 59 if (value < 0) | 61 if (value < 0) |
| 60 return 0; | 62 return 0; |
| 61 if (value > 255) | 63 if (value > 255) |
| 62 return 255; | 64 return 255; |
| 63 return static_cast<uint8>(value); | 65 return static_cast<uint8_t>(value); |
| 64 } | 66 } |
| 65 | 67 |
| 66 static inline uint8 RGBToY(int r, int g, int b) { | 68 static inline uint8_t RGBToY(int r, int g, int b) { |
| 67 int y = ConvertRGBAToYUV_kTable[0] * b + | 69 int y = ConvertRGBAToYUV_kTable[0] * b + |
| 68 ConvertRGBAToYUV_kTable[1] * g + | 70 ConvertRGBAToYUV_kTable[1] * g + |
| 69 ConvertRGBAToYUV_kTable[2] * r; | 71 ConvertRGBAToYUV_kTable[2] * r; |
| 70 y >>= FIX_SHIFT; | 72 y >>= FIX_SHIFT; |
| 71 return Clamp(y + 16); | 73 return Clamp(y + 16); |
| 72 } | 74 } |
| 73 | 75 |
| 74 static inline uint8 RGBToU(int r, int g, int b, int shift) { | 76 static inline uint8_t RGBToU(int r, int g, int b, int shift) { |
| 75 int u = ConvertRGBAToYUV_kTable[8] * b + | 77 int u = ConvertRGBAToYUV_kTable[8] * b + |
| 76 ConvertRGBAToYUV_kTable[9] * g + | 78 ConvertRGBAToYUV_kTable[9] * g + |
| 77 ConvertRGBAToYUV_kTable[10] * r; | 79 ConvertRGBAToYUV_kTable[10] * r; |
| 78 u >>= FIX_SHIFT + shift; | 80 u >>= FIX_SHIFT + shift; |
| 79 return Clamp(u + 128); | 81 return Clamp(u + 128); |
| 80 } | 82 } |
| 81 | 83 |
| 82 static inline uint8 RGBToV(int r, int g, int b, int shift) { | 84 static inline uint8_t RGBToV(int r, int g, int b, int shift) { |
| 83 int v = ConvertRGBAToYUV_kTable[16] * b + | 85 int v = ConvertRGBAToYUV_kTable[16] * b + |
| 84 ConvertRGBAToYUV_kTable[17] * g + | 86 ConvertRGBAToYUV_kTable[17] * g + |
| 85 ConvertRGBAToYUV_kTable[18] * r; | 87 ConvertRGBAToYUV_kTable[18] * r; |
| 86 v >>= FIX_SHIFT + shift; | 88 v >>= FIX_SHIFT + shift; |
| 87 return Clamp(v + 128); | 89 return Clamp(v + 128); |
| 88 } | 90 } |
| 89 | 91 |
| 90 #define CONVERT_Y(rgb_buf, y_buf) \ | 92 #define CONVERT_Y(rgb_buf, y_buf) \ |
| 91 b = *rgb_buf++; \ | 93 b = *rgb_buf++; \ |
| 92 g = *rgb_buf++; \ | 94 g = *rgb_buf++; \ |
| 93 r = *rgb_buf++; \ | 95 r = *rgb_buf++; \ |
| 94 ++rgb_buf; \ | 96 ++rgb_buf; \ |
| 95 sum_b += b; \ | 97 sum_b += b; \ |
| 96 sum_g += g; \ | 98 sum_g += g; \ |
| 97 sum_r += r; \ | 99 sum_r += r; \ |
| 98 *y_buf++ = RGBToY(r, g, b); | 100 *y_buf++ = RGBToY(r, g, b); |
| 99 | 101 |
| 100 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1, | 102 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1, |
| 101 const uint8* rgb_buf_2, | 103 const uint8_t* rgb_buf_2, |
| 102 uint8* y_buf_1, | 104 uint8_t* y_buf_1, |
| 103 uint8* y_buf_2, | 105 uint8_t* y_buf_2, |
| 104 uint8* u_buf, | 106 uint8_t* u_buf, |
| 105 uint8* v_buf) { | 107 uint8_t* v_buf) { |
| 106 int sum_b = 0; | 108 int sum_b = 0; |
| 107 int sum_g = 0; | 109 int sum_g = 0; |
| 108 int sum_r = 0; | 110 int sum_r = 0; |
| 109 int r, g, b; | 111 int r, g, b; |
| 110 | 112 |
| 111 | 113 |
| 112 | 114 |
| 113 CONVERT_Y(rgb_buf_1, y_buf_1); | 115 CONVERT_Y(rgb_buf_1, y_buf_1); |
| 114 CONVERT_Y(rgb_buf_1, y_buf_1); | 116 CONVERT_Y(rgb_buf_1, y_buf_1); |
| 115 CONVERT_Y(rgb_buf_2, y_buf_2); | 117 CONVERT_Y(rgb_buf_2, y_buf_2); |
| 116 CONVERT_Y(rgb_buf_2, y_buf_2); | 118 CONVERT_Y(rgb_buf_2, y_buf_2); |
| 117 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); | 119 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); |
| 118 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); | 120 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); |
| 119 } | 121 } |
| 120 | 122 |
| 121 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1, | 123 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1, |
| 122 const uint8* rgb_buf_2, | 124 const uint8_t* rgb_buf_2, |
| 123 uint8* y_buf_1, | 125 uint8_t* y_buf_1, |
| 124 uint8* y_buf_2, | 126 uint8_t* y_buf_2, |
| 125 uint8* u_buf, | 127 uint8_t* u_buf, |
| 126 uint8* v_buf) { | 128 uint8_t* v_buf) { |
| 127 int sum_b = 0; | 129 int sum_b = 0; |
| 128 int sum_g = 0; | 130 int sum_g = 0; |
| 129 int sum_r = 0; | 131 int sum_r = 0; |
| 130 int r, g, b; | 132 int r, g, b; |
| 131 | 133 |
| 132 CONVERT_Y(rgb_buf_1, y_buf_1); | 134 CONVERT_Y(rgb_buf_1, y_buf_1); |
| 133 CONVERT_Y(rgb_buf_2, y_buf_2); | 135 CONVERT_Y(rgb_buf_2, y_buf_2); |
| 134 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); | 136 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); |
| 135 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); | 137 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); |
| 136 } | 138 } |
| 137 | 139 |
| 138 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf, | 140 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf, |
| 139 uint8* y_buf, | 141 uint8_t* y_buf, |
| 140 uint8* u_buf, | 142 uint8_t* u_buf, |
| 141 uint8* v_buf) { | 143 uint8_t* v_buf) { |
| 142 int sum_b = 0; | 144 int sum_b = 0; |
| 143 int sum_g = 0; | 145 int sum_g = 0; |
| 144 int sum_r = 0; | 146 int sum_r = 0; |
| 145 int r, g, b; | 147 int r, g, b; |
| 146 | 148 |
| 147 CONVERT_Y(rgb_buf, y_buf); | 149 CONVERT_Y(rgb_buf, y_buf); |
| 148 CONVERT_Y(rgb_buf, y_buf); | 150 CONVERT_Y(rgb_buf, y_buf); |
| 149 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); | 151 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); |
| 150 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); | 152 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); |
| 151 } | 153 } |
| 152 | 154 |
| 153 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf, | 155 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf, |
| 154 uint8* y_buf, | 156 uint8_t* y_buf, |
| 155 uint8* u_buf, | 157 uint8_t* u_buf, |
| 156 uint8* v_buf) { | 158 uint8_t* v_buf) { |
| 157 int sum_b = 0; | 159 int sum_b = 0; |
| 158 int sum_g = 0; | 160 int sum_g = 0; |
| 159 int sum_r = 0; | 161 int sum_r = 0; |
| 160 int r, g, b; | 162 int r, g, b; |
| 161 | 163 |
| 162 CONVERT_Y(rgb_buf, y_buf); | 164 CONVERT_Y(rgb_buf, y_buf); |
| 163 *u_buf++ = RGBToU(r, g, b, 0); | 165 *u_buf++ = RGBToU(r, g, b, 0); |
| 164 *v_buf++ = RGBToV(r, g, b, 0); | 166 *v_buf++ = RGBToV(r, g, b, 0); |
| 165 } | 167 } |
| 166 | 168 |
| 167 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1, | 169 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1, |
| 168 const uint8* rgb_buf_2, | 170 const uint8_t* rgb_buf_2, |
| 169 uint8* y_buf_1, | 171 uint8_t* y_buf_1, |
| 170 uint8* y_buf_2, | 172 uint8_t* y_buf_2, |
| 171 uint8* u_buf, | 173 uint8_t* u_buf, |
| 172 uint8* v_buf, | 174 uint8_t* v_buf, |
| 173 int width) { | 175 int width) { |
| 174 while (width >= 4) { | 176 while (width >= 4) { |
| 175 // Name for the Y pixels: | 177 // Name for the Y pixels: |
| 176 // Row 1: a b c d | 178 // Row 1: a b c d |
| 177 // Row 2: e f g h | 179 // Row 2: e f g h |
| 178 // | 180 // |
| 179 // First row 4 pixels. | 181 // First row 4 pixels. |
| 180 __m128i rgb_row_1 = _mm_loadu_si128( | 182 __m128i rgb_row_1 = _mm_loadu_si128( |
| 181 reinterpret_cast<const __m128i*>(rgb_buf_1)); | 183 reinterpret_cast<const __m128i*>(rgb_buf_1)); |
| 182 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); | 184 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); |
| (...skipping 23 matching lines...) Expand all Loading... |
| 206 (2 << 6) | (2 << 2))); | 208 (2 << 6) | (2 << 2))); |
| 207 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); | 209 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); |
| 208 | 210 |
| 209 // Down shift back to 8bits range. | 211 // Down shift back to 8bits range. |
| 210 __m128i y_offset = _mm_load_si128( | 212 __m128i y_offset = _mm_load_si128( |
| 211 reinterpret_cast<const __m128i*>(kYOffset)); | 213 reinterpret_cast<const __m128i*>(kYOffset)); |
| 212 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); | 214 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); |
| 213 y_abcd = _mm_add_epi32(y_abcd, y_offset); | 215 y_abcd = _mm_add_epi32(y_abcd, y_offset); |
| 214 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); | 216 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); |
| 215 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); | 217 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); |
| 216 *reinterpret_cast<uint32*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); | 218 *reinterpret_cast<uint32_t*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); |
| 217 y_buf_1 += 4; | 219 y_buf_1 += 4; |
| 218 | 220 |
| 219 // Second row 4 pixels. | 221 // Second row 4 pixels. |
| 220 __m128i rgb_row_2 = _mm_loadu_si128( | 222 __m128i rgb_row_2 = _mm_loadu_si128( |
| 221 reinterpret_cast<const __m128i*>(rgb_buf_2)); | 223 reinterpret_cast<const __m128i*>(rgb_buf_2)); |
| 222 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); | 224 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); |
| 223 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); | 225 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); |
| 224 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); | 226 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); |
| 225 | 227 |
| 226 // Add two rows together. | 228 // Add two rows together. |
| (...skipping 12 matching lines...) Expand all Loading... |
| 239 (3 << 6) | (1 << 4) | (3 << 2) | 1)); | 241 (3 << 6) | (1 << 4) | (3 << 2) | 1)); |
| 240 __m128i r_efgh = _mm_castps_si128( | 242 __m128i r_efgh = _mm_castps_si128( |
| 241 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), | 243 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), |
| 242 _mm_castsi128_ps(rgb_e_f), | 244 _mm_castsi128_ps(rgb_e_f), |
| 243 (2 << 6) | (2 << 2))); | 245 (2 << 6) | (2 << 2))); |
| 244 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); | 246 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); |
| 245 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); | 247 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); |
| 246 y_efgh = _mm_add_epi32(y_efgh, y_offset); | 248 y_efgh = _mm_add_epi32(y_efgh, y_offset); |
| 247 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); | 249 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); |
| 248 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); | 250 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); |
| 249 *reinterpret_cast<uint32*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); | 251 *reinterpret_cast<uint32_t*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); |
| 250 y_buf_2 += 4; | 252 y_buf_2 += 4; |
| 251 | 253 |
| 252 __m128i rgb_ae_cg = _mm_castps_si128( | 254 __m128i rgb_ae_cg = _mm_castps_si128( |
| 253 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), | 255 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), |
| 254 _mm_castsi128_ps(rgb_ae_bf), | 256 _mm_castsi128_ps(rgb_ae_bf), |
| 255 (3 << 6) | (2 << 4) | (3 << 2) | 2)); | 257 (3 << 6) | (2 << 4) | (3 << 2) | 2)); |
| 256 __m128i rgb_bf_dh = _mm_castps_si128( | 258 __m128i rgb_bf_dh = _mm_castps_si128( |
| 257 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), | 259 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), |
| 258 _mm_castsi128_ps(rgb_ae_bf), | 260 _mm_castsi128_ps(rgb_ae_bf), |
| 259 (1 << 6) | (1 << 2))); | 261 (1 << 6) | (1 << 2))); |
| 260 | 262 |
| 261 // This is a 2x2 subsampling for 2 pixels. | 263 // This is a 2x2 subsampling for 2 pixels. |
| 262 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); | 264 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); |
| 263 | 265 |
| 264 // Do a multiply add with U table. | 266 // Do a multiply add with U table. |
| 265 __m128i u_a_b = _mm_madd_epi16( | 267 __m128i u_a_b = _mm_madd_epi16( |
| 266 rgb_abef_cdgh, | 268 rgb_abef_cdgh, |
| 267 _mm_load_si128( | 269 _mm_load_si128( |
| 268 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); | 270 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); |
| 269 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), | 271 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), |
| 270 _mm_shuffle_epi32(u_a_b, (2 << 2))); | 272 _mm_shuffle_epi32(u_a_b, (2 << 2))); |
| 271 // Right shift 14 because of 12 from fixed point and 2 from subsampling. | 273 // Right shift 14 because of 12 from fixed point and 2 from subsampling. |
| 272 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); | 274 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); |
| 273 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); | 275 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); |
| 274 u_a_b = _mm_add_epi32(u_a_b, uv_offset); | 276 u_a_b = _mm_add_epi32(u_a_b, uv_offset); |
| 275 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); | 277 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); |
| 276 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); | 278 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); |
| 277 *reinterpret_cast<uint16*>(u_buf) = | 279 *reinterpret_cast<uint16_t*>(u_buf) = |
| 278 static_cast<uint16>(_mm_extract_epi16(u_a_b, 0)); | 280 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0)); |
| 279 u_buf += 2; | 281 u_buf += 2; |
| 280 | 282 |
| 281 __m128i v_a_b = _mm_madd_epi16( | 283 __m128i v_a_b = _mm_madd_epi16( |
| 282 rgb_abef_cdgh, | 284 rgb_abef_cdgh, |
| 283 _mm_load_si128( | 285 _mm_load_si128( |
| 284 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); | 286 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); |
| 285 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), | 287 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), |
| 286 _mm_shuffle_epi32(v_a_b, (2 << 2))); | 288 _mm_shuffle_epi32(v_a_b, (2 << 2))); |
| 287 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); | 289 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); |
| 288 v_a_b = _mm_add_epi32(v_a_b, uv_offset); | 290 v_a_b = _mm_add_epi32(v_a_b, uv_offset); |
| 289 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); | 291 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); |
| 290 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); | 292 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); |
| 291 *reinterpret_cast<uint16*>(v_buf) = | 293 *reinterpret_cast<uint16_t*>(v_buf) = |
| 292 static_cast<uint16>(_mm_extract_epi16(v_a_b, 0)); | 294 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0)); |
| 293 v_buf += 2; | 295 v_buf += 2; |
| 294 | 296 |
| 295 rgb_buf_1 += 16; | 297 rgb_buf_1 += 16; |
| 296 rgb_buf_2 += 16; | 298 rgb_buf_2 += 16; |
| 297 | 299 |
| 298 // Move forward by 4 pixels. | 300 // Move forward by 4 pixels. |
| 299 width -= 4; | 301 width -= 4; |
| 300 } | 302 } |
| 301 | 303 |
| 302 // Just use C code to convert the remaining pixels. | 304 // Just use C code to convert the remaining pixels. |
| 303 if (width >= 2) { | 305 if (width >= 2) { |
| 304 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); | 306 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); |
| 305 rgb_buf_1 += 8; | 307 rgb_buf_1 += 8; |
| 306 rgb_buf_2 += 8; | 308 rgb_buf_2 += 8; |
| 307 y_buf_1 += 2; | 309 y_buf_1 += 2; |
| 308 y_buf_2 += 2; | 310 y_buf_2 += 2; |
| 309 ++u_buf; | 311 ++u_buf; |
| 310 ++v_buf; | 312 ++v_buf; |
| 311 width -= 2; | 313 width -= 2; |
| 312 } | 314 } |
| 313 | 315 |
| 314 if (width) | 316 if (width) |
| 315 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); | 317 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); |
| 316 } | 318 } |
| 317 | 319 |
| 318 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, | 320 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe, |
| 319 uint8* yplane, | 321 uint8_t* yplane, |
| 320 uint8* uplane, | 322 uint8_t* uplane, |
| 321 uint8* vplane, | 323 uint8_t* vplane, |
| 322 int width, | 324 int width, |
| 323 int height, | 325 int height, |
| 324 int rgbstride, | 326 int rgbstride, |
| 325 int ystride, | 327 int ystride, |
| 326 int uvstride) { | 328 int uvstride) { |
| 327 while (height >= 2) { | 329 while (height >= 2) { |
| 328 ConvertRGB32ToYUVRow_SSE2(rgbframe, | 330 ConvertRGB32ToYUVRow_SSE2(rgbframe, |
| 329 rgbframe + rgbstride, | 331 rgbframe + rgbstride, |
| 330 yplane, | 332 yplane, |
| 331 yplane + ystride, | 333 yplane + ystride, |
| (...skipping 17 matching lines...) Expand all Loading... |
| 349 yplane += 2; | 351 yplane += 2; |
| 350 ++uplane; | 352 ++uplane; |
| 351 ++vplane; | 353 ++vplane; |
| 352 width -= 2; | 354 width -= 2; |
| 353 } | 355 } |
| 354 | 356 |
| 355 if (width) | 357 if (width) |
| 356 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); | 358 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); |
| 357 } | 359 } |
| 358 | 360 |
| 359 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, | 361 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe, |
| 360 uint8* yplane, | 362 uint8_t* yplane, |
| 361 uint8* uplane, | 363 uint8_t* uplane, |
| 362 uint8* vplane, | 364 uint8_t* vplane, |
| 363 int width, | 365 int width, |
| 364 int height, | 366 int height, |
| 365 int rgbstride, | 367 int rgbstride, |
| 366 int ystride, | 368 int ystride, |
| 367 int uvstride) { | 369 int uvstride) { |
| 368 while (height >= 2) { | 370 while (height >= 2) { |
| 369 int i = 0; | 371 int i = 0; |
| 370 | 372 |
| 371 // Convert a 2x2 block. | 373 // Convert a 2x2 block. |
| 372 while (i + 2 <= width) { | 374 while (i + 2 <= width) { |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 408 ++vplane; | 410 ++vplane; |
| 409 width -= 2; | 411 width -= 2; |
| 410 } | 412 } |
| 411 | 413 |
| 412 // Handle the last pixel in the last row. | 414 // Handle the last pixel in the last row. |
| 413 if (width) | 415 if (width) |
| 414 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); | 416 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); |
| 415 } | 417 } |
| 416 | 418 |
| 417 } // namespace media | 419 } // namespace media |
| OLD | NEW |