OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "build/build_config.h" | 5 #include "build/build_config.h" |
6 #include "media/base/simd/convert_rgb_to_yuv.h" | 6 #include "media/base/simd/convert_rgb_to_yuv.h" |
7 | 7 |
8 #if defined(COMPILER_MSVC) | 8 #if defined(COMPILER_MSVC) |
9 #include <intrin.h> | 9 #include <intrin.h> |
10 #else | 10 #else |
11 #include <mmintrin.h> | 11 #include <mmintrin.h> |
12 #include <emmintrin.h> | 12 #include <emmintrin.h> |
13 #endif | 13 #endif |
14 | 14 |
15 #if defined(COMPILER_MSVC) | 15 #if defined(COMPILER_MSVC) |
16 #define SIMD_ALIGNED(var) __declspec(align(16)) var | 16 #define SIMD_ALIGNED(var) __declspec(align(16)) var |
17 #else | 17 #else |
18 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) | 18 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) |
19 #endif | 19 #endif |
20 | 20 |
21 namespace media { | 21 namespace media { |
22 | 22 |
23 #define FIX_SHIFT 12 | 23 #define FIX_SHIFT 12 |
24 #define FIX(x) ((x) * (1 << FIX_SHIFT)) | 24 #define FIX(x) ((x) * (1 << FIX_SHIFT)) |
25 | 25 |
26 // Define a convenient macro to do static cast. | 26 // Define a convenient macro to do static cast. |
27 #define INT16_FIX(x) static_cast<int16>(FIX(x)) | 27 #define INT16_FIX(x) static_cast<int16_t>(FIX(x)) |
28 | 28 |
29 // Android's pixel layout is RGBA, while other platforms | 29 // Android's pixel layout is RGBA, while other platforms |
30 // are BGRA. | 30 // are BGRA. |
31 #if defined(OS_ANDROID) | 31 #if defined(OS_ANDROID) |
32 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { | 32 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { |
33 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, | 33 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, |
34 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, | 34 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, |
35 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, | 35 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, |
36 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, | 36 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, |
37 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, | 37 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, |
38 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, | 38 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, |
39 }; | 39 }; |
40 #else | 40 #else |
41 SIMD_ALIGNED(const int16 ConvertRGBAToYUV_kTable[8 * 3]) = { | 41 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { |
42 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, | 42 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, |
43 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, | 43 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, |
44 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, | 44 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, |
45 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, | 45 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, |
46 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, | 46 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, |
47 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, | 47 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, |
48 }; | 48 }; |
49 #endif | 49 #endif |
50 | 50 |
51 #undef INT16_FIX | 51 #undef INT16_FIX |
52 | 52 |
53 // This is the final offset for the conversion from signed yuv values to | 53 // This is the final offset for the conversion from signed yuv values to |
54 // unsigned values. It is arranged so that offset of 16 is applied to Y | 54 // unsigned values. It is arranged so that offset of 16 is applied to Y |
55 // components and 128 is added to UV components for 2 pixels. | 55 // components and 128 is added to UV components for 2 pixels. |
56 SIMD_ALIGNED(const int32 kYOffset[4]) = {16, 16, 16, 16}; | 56 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16}; |
57 | 57 |
58 static inline uint8 Clamp(int value) { | 58 static inline uint8_t Clamp(int value) { |
59 if (value < 0) | 59 if (value < 0) |
60 return 0; | 60 return 0; |
61 if (value > 255) | 61 if (value > 255) |
62 return 255; | 62 return 255; |
63 return static_cast<uint8>(value); | 63 return static_cast<uint8_t>(value); |
64 } | 64 } |
65 | 65 |
66 static inline uint8 RGBToY(int r, int g, int b) { | 66 static inline uint8_t RGBToY(int r, int g, int b) { |
67 int y = ConvertRGBAToYUV_kTable[0] * b + | 67 int y = ConvertRGBAToYUV_kTable[0] * b + |
68 ConvertRGBAToYUV_kTable[1] * g + | 68 ConvertRGBAToYUV_kTable[1] * g + |
69 ConvertRGBAToYUV_kTable[2] * r; | 69 ConvertRGBAToYUV_kTable[2] * r; |
70 y >>= FIX_SHIFT; | 70 y >>= FIX_SHIFT; |
71 return Clamp(y + 16); | 71 return Clamp(y + 16); |
72 } | 72 } |
73 | 73 |
74 static inline uint8 RGBToU(int r, int g, int b, int shift) { | 74 static inline uint8_t RGBToU(int r, int g, int b, int shift) { |
75 int u = ConvertRGBAToYUV_kTable[8] * b + | 75 int u = ConvertRGBAToYUV_kTable[8] * b + |
76 ConvertRGBAToYUV_kTable[9] * g + | 76 ConvertRGBAToYUV_kTable[9] * g + |
77 ConvertRGBAToYUV_kTable[10] * r; | 77 ConvertRGBAToYUV_kTable[10] * r; |
78 u >>= FIX_SHIFT + shift; | 78 u >>= FIX_SHIFT + shift; |
79 return Clamp(u + 128); | 79 return Clamp(u + 128); |
80 } | 80 } |
81 | 81 |
82 static inline uint8 RGBToV(int r, int g, int b, int shift) { | 82 static inline uint8_t RGBToV(int r, int g, int b, int shift) { |
83 int v = ConvertRGBAToYUV_kTable[16] * b + | 83 int v = ConvertRGBAToYUV_kTable[16] * b + |
84 ConvertRGBAToYUV_kTable[17] * g + | 84 ConvertRGBAToYUV_kTable[17] * g + |
85 ConvertRGBAToYUV_kTable[18] * r; | 85 ConvertRGBAToYUV_kTable[18] * r; |
86 v >>= FIX_SHIFT + shift; | 86 v >>= FIX_SHIFT + shift; |
87 return Clamp(v + 128); | 87 return Clamp(v + 128); |
88 } | 88 } |
89 | 89 |
90 #define CONVERT_Y(rgb_buf, y_buf) \ | 90 #define CONVERT_Y(rgb_buf, y_buf) \ |
91 b = *rgb_buf++; \ | 91 b = *rgb_buf++; \ |
92 g = *rgb_buf++; \ | 92 g = *rgb_buf++; \ |
93 r = *rgb_buf++; \ | 93 r = *rgb_buf++; \ |
94 ++rgb_buf; \ | 94 ++rgb_buf; \ |
95 sum_b += b; \ | 95 sum_b += b; \ |
96 sum_g += g; \ | 96 sum_g += g; \ |
97 sum_r += r; \ | 97 sum_r += r; \ |
98 *y_buf++ = RGBToY(r, g, b); | 98 *y_buf++ = RGBToY(r, g, b); |
99 | 99 |
100 static inline void ConvertRGBToYUV_V2H2(const uint8* rgb_buf_1, | 100 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1, |
101 const uint8* rgb_buf_2, | 101 const uint8_t* rgb_buf_2, |
102 uint8* y_buf_1, | 102 uint8_t* y_buf_1, |
103 uint8* y_buf_2, | 103 uint8_t* y_buf_2, |
104 uint8* u_buf, | 104 uint8_t* u_buf, |
105 uint8* v_buf) { | 105 uint8_t* v_buf) { |
106 int sum_b = 0; | 106 int sum_b = 0; |
107 int sum_g = 0; | 107 int sum_g = 0; |
108 int sum_r = 0; | 108 int sum_r = 0; |
109 int r, g, b; | 109 int r, g, b; |
110 | 110 |
111 | 111 |
112 | 112 |
113 CONVERT_Y(rgb_buf_1, y_buf_1); | 113 CONVERT_Y(rgb_buf_1, y_buf_1); |
114 CONVERT_Y(rgb_buf_1, y_buf_1); | 114 CONVERT_Y(rgb_buf_1, y_buf_1); |
115 CONVERT_Y(rgb_buf_2, y_buf_2); | 115 CONVERT_Y(rgb_buf_2, y_buf_2); |
116 CONVERT_Y(rgb_buf_2, y_buf_2); | 116 CONVERT_Y(rgb_buf_2, y_buf_2); |
117 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); | 117 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); |
118 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); | 118 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); |
119 } | 119 } |
120 | 120 |
121 static inline void ConvertRGBToYUV_V2H1(const uint8* rgb_buf_1, | 121 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1, |
122 const uint8* rgb_buf_2, | 122 const uint8_t* rgb_buf_2, |
123 uint8* y_buf_1, | 123 uint8_t* y_buf_1, |
124 uint8* y_buf_2, | 124 uint8_t* y_buf_2, |
125 uint8* u_buf, | 125 uint8_t* u_buf, |
126 uint8* v_buf) { | 126 uint8_t* v_buf) { |
127 int sum_b = 0; | 127 int sum_b = 0; |
128 int sum_g = 0; | 128 int sum_g = 0; |
129 int sum_r = 0; | 129 int sum_r = 0; |
130 int r, g, b; | 130 int r, g, b; |
131 | 131 |
132 CONVERT_Y(rgb_buf_1, y_buf_1); | 132 CONVERT_Y(rgb_buf_1, y_buf_1); |
133 CONVERT_Y(rgb_buf_2, y_buf_2); | 133 CONVERT_Y(rgb_buf_2, y_buf_2); |
134 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); | 134 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); |
135 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); | 135 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); |
136 } | 136 } |
137 | 137 |
138 static inline void ConvertRGBToYUV_V1H2(const uint8* rgb_buf, | 138 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf, |
139 uint8* y_buf, | 139 uint8_t* y_buf, |
140 uint8* u_buf, | 140 uint8_t* u_buf, |
141 uint8* v_buf) { | 141 uint8_t* v_buf) { |
142 int sum_b = 0; | 142 int sum_b = 0; |
143 int sum_g = 0; | 143 int sum_g = 0; |
144 int sum_r = 0; | 144 int sum_r = 0; |
145 int r, g, b; | 145 int r, g, b; |
146 | 146 |
147 CONVERT_Y(rgb_buf, y_buf); | 147 CONVERT_Y(rgb_buf, y_buf); |
148 CONVERT_Y(rgb_buf, y_buf); | 148 CONVERT_Y(rgb_buf, y_buf); |
149 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); | 149 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); |
150 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); | 150 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); |
151 } | 151 } |
152 | 152 |
153 static inline void ConvertRGBToYUV_V1H1(const uint8* rgb_buf, | 153 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf, |
154 uint8* y_buf, | 154 uint8_t* y_buf, |
155 uint8* u_buf, | 155 uint8_t* u_buf, |
156 uint8* v_buf) { | 156 uint8_t* v_buf) { |
157 int sum_b = 0; | 157 int sum_b = 0; |
158 int sum_g = 0; | 158 int sum_g = 0; |
159 int sum_r = 0; | 159 int sum_r = 0; |
160 int r, g, b; | 160 int r, g, b; |
161 | 161 |
162 CONVERT_Y(rgb_buf, y_buf); | 162 CONVERT_Y(rgb_buf, y_buf); |
163 *u_buf++ = RGBToU(r, g, b, 0); | 163 *u_buf++ = RGBToU(r, g, b, 0); |
164 *v_buf++ = RGBToV(r, g, b, 0); | 164 *v_buf++ = RGBToV(r, g, b, 0); |
165 } | 165 } |
166 | 166 |
167 static void ConvertRGB32ToYUVRow_SSE2(const uint8* rgb_buf_1, | 167 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1, |
168 const uint8* rgb_buf_2, | 168 const uint8_t* rgb_buf_2, |
169 uint8* y_buf_1, | 169 uint8_t* y_buf_1, |
170 uint8* y_buf_2, | 170 uint8_t* y_buf_2, |
171 uint8* u_buf, | 171 uint8_t* u_buf, |
172 uint8* v_buf, | 172 uint8_t* v_buf, |
173 int width) { | 173 int width) { |
174 while (width >= 4) { | 174 while (width >= 4) { |
175 // Name for the Y pixels: | 175 // Name for the Y pixels: |
176 // Row 1: a b c d | 176 // Row 1: a b c d |
177 // Row 2: e f g h | 177 // Row 2: e f g h |
178 // | 178 // |
179 // First row 4 pixels. | 179 // First row 4 pixels. |
180 __m128i rgb_row_1 = _mm_loadu_si128( | 180 __m128i rgb_row_1 = _mm_loadu_si128( |
181 reinterpret_cast<const __m128i*>(rgb_buf_1)); | 181 reinterpret_cast<const __m128i*>(rgb_buf_1)); |
182 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); | 182 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); |
(...skipping 23 matching lines...) Expand all Loading... |
206 (2 << 6) | (2 << 2))); | 206 (2 << 6) | (2 << 2))); |
207 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); | 207 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); |
208 | 208 |
209 // Down shift back to 8bits range. | 209 // Down shift back to 8bits range. |
210 __m128i y_offset = _mm_load_si128( | 210 __m128i y_offset = _mm_load_si128( |
211 reinterpret_cast<const __m128i*>(kYOffset)); | 211 reinterpret_cast<const __m128i*>(kYOffset)); |
212 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); | 212 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); |
213 y_abcd = _mm_add_epi32(y_abcd, y_offset); | 213 y_abcd = _mm_add_epi32(y_abcd, y_offset); |
214 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); | 214 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); |
215 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); | 215 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); |
216 *reinterpret_cast<uint32*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); | 216 *reinterpret_cast<uint32_t*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); |
217 y_buf_1 += 4; | 217 y_buf_1 += 4; |
218 | 218 |
219 // Second row 4 pixels. | 219 // Second row 4 pixels. |
220 __m128i rgb_row_2 = _mm_loadu_si128( | 220 __m128i rgb_row_2 = _mm_loadu_si128( |
221 reinterpret_cast<const __m128i*>(rgb_buf_2)); | 221 reinterpret_cast<const __m128i*>(rgb_buf_2)); |
222 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); | 222 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); |
223 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); | 223 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); |
224 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); | 224 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); |
225 | 225 |
226 // Add two rows together. | 226 // Add two rows together. |
(...skipping 12 matching lines...) Expand all Loading... |
239 (3 << 6) | (1 << 4) | (3 << 2) | 1)); | 239 (3 << 6) | (1 << 4) | (3 << 2) | 1)); |
240 __m128i r_efgh = _mm_castps_si128( | 240 __m128i r_efgh = _mm_castps_si128( |
241 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), | 241 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), |
242 _mm_castsi128_ps(rgb_e_f), | 242 _mm_castsi128_ps(rgb_e_f), |
243 (2 << 6) | (2 << 2))); | 243 (2 << 6) | (2 << 2))); |
244 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); | 244 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); |
245 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); | 245 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); |
246 y_efgh = _mm_add_epi32(y_efgh, y_offset); | 246 y_efgh = _mm_add_epi32(y_efgh, y_offset); |
247 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); | 247 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); |
248 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); | 248 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); |
249 *reinterpret_cast<uint32*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); | 249 *reinterpret_cast<uint32_t*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); |
250 y_buf_2 += 4; | 250 y_buf_2 += 4; |
251 | 251 |
252 __m128i rgb_ae_cg = _mm_castps_si128( | 252 __m128i rgb_ae_cg = _mm_castps_si128( |
253 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), | 253 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), |
254 _mm_castsi128_ps(rgb_ae_bf), | 254 _mm_castsi128_ps(rgb_ae_bf), |
255 (3 << 6) | (2 << 4) | (3 << 2) | 2)); | 255 (3 << 6) | (2 << 4) | (3 << 2) | 2)); |
256 __m128i rgb_bf_dh = _mm_castps_si128( | 256 __m128i rgb_bf_dh = _mm_castps_si128( |
257 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), | 257 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), |
258 _mm_castsi128_ps(rgb_ae_bf), | 258 _mm_castsi128_ps(rgb_ae_bf), |
259 (1 << 6) | (1 << 2))); | 259 (1 << 6) | (1 << 2))); |
260 | 260 |
261 // This is a 2x2 subsampling for 2 pixels. | 261 // This is a 2x2 subsampling for 2 pixels. |
262 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); | 262 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); |
263 | 263 |
264 // Do a multiply add with U table. | 264 // Do a multiply add with U table. |
265 __m128i u_a_b = _mm_madd_epi16( | 265 __m128i u_a_b = _mm_madd_epi16( |
266 rgb_abef_cdgh, | 266 rgb_abef_cdgh, |
267 _mm_load_si128( | 267 _mm_load_si128( |
268 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); | 268 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); |
269 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), | 269 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), |
270 _mm_shuffle_epi32(u_a_b, (2 << 2))); | 270 _mm_shuffle_epi32(u_a_b, (2 << 2))); |
271 // Right shift 14 because of 12 from fixed point and 2 from subsampling. | 271 // Right shift 14 because of 12 from fixed point and 2 from subsampling. |
272 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); | 272 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); |
273 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); | 273 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); |
274 u_a_b = _mm_add_epi32(u_a_b, uv_offset); | 274 u_a_b = _mm_add_epi32(u_a_b, uv_offset); |
275 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); | 275 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); |
276 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); | 276 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); |
277 *reinterpret_cast<uint16*>(u_buf) = | 277 *reinterpret_cast<uint16_t*>(u_buf) = |
278 static_cast<uint16>(_mm_extract_epi16(u_a_b, 0)); | 278 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0)); |
279 u_buf += 2; | 279 u_buf += 2; |
280 | 280 |
281 __m128i v_a_b = _mm_madd_epi16( | 281 __m128i v_a_b = _mm_madd_epi16( |
282 rgb_abef_cdgh, | 282 rgb_abef_cdgh, |
283 _mm_load_si128( | 283 _mm_load_si128( |
284 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); | 284 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); |
285 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), | 285 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), |
286 _mm_shuffle_epi32(v_a_b, (2 << 2))); | 286 _mm_shuffle_epi32(v_a_b, (2 << 2))); |
287 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); | 287 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); |
288 v_a_b = _mm_add_epi32(v_a_b, uv_offset); | 288 v_a_b = _mm_add_epi32(v_a_b, uv_offset); |
289 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); | 289 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); |
290 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); | 290 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); |
291 *reinterpret_cast<uint16*>(v_buf) = | 291 *reinterpret_cast<uint16_t*>(v_buf) = |
292 static_cast<uint16>(_mm_extract_epi16(v_a_b, 0)); | 292 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0)); |
293 v_buf += 2; | 293 v_buf += 2; |
294 | 294 |
295 rgb_buf_1 += 16; | 295 rgb_buf_1 += 16; |
296 rgb_buf_2 += 16; | 296 rgb_buf_2 += 16; |
297 | 297 |
298 // Move forward by 4 pixels. | 298 // Move forward by 4 pixels. |
299 width -= 4; | 299 width -= 4; |
300 } | 300 } |
301 | 301 |
302 // Just use C code to convert the remaining pixels. | 302 // Just use C code to convert the remaining pixels. |
303 if (width >= 2) { | 303 if (width >= 2) { |
304 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); | 304 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); |
305 rgb_buf_1 += 8; | 305 rgb_buf_1 += 8; |
306 rgb_buf_2 += 8; | 306 rgb_buf_2 += 8; |
307 y_buf_1 += 2; | 307 y_buf_1 += 2; |
308 y_buf_2 += 2; | 308 y_buf_2 += 2; |
309 ++u_buf; | 309 ++u_buf; |
310 ++v_buf; | 310 ++v_buf; |
311 width -= 2; | 311 width -= 2; |
312 } | 312 } |
313 | 313 |
314 if (width) | 314 if (width) |
315 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); | 315 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); |
316 } | 316 } |
317 | 317 |
318 extern void ConvertRGB32ToYUV_SSE2(const uint8* rgbframe, | 318 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe, |
319 uint8* yplane, | 319 uint8_t* yplane, |
320 uint8* uplane, | 320 uint8_t* uplane, |
321 uint8* vplane, | 321 uint8_t* vplane, |
322 int width, | 322 int width, |
323 int height, | 323 int height, |
324 int rgbstride, | 324 int rgbstride, |
325 int ystride, | 325 int ystride, |
326 int uvstride) { | 326 int uvstride) { |
327 while (height >= 2) { | 327 while (height >= 2) { |
328 ConvertRGB32ToYUVRow_SSE2(rgbframe, | 328 ConvertRGB32ToYUVRow_SSE2(rgbframe, |
329 rgbframe + rgbstride, | 329 rgbframe + rgbstride, |
330 yplane, | 330 yplane, |
331 yplane + ystride, | 331 yplane + ystride, |
(...skipping 17 matching lines...) Expand all Loading... |
349 yplane += 2; | 349 yplane += 2; |
350 ++uplane; | 350 ++uplane; |
351 ++vplane; | 351 ++vplane; |
352 width -= 2; | 352 width -= 2; |
353 } | 353 } |
354 | 354 |
355 if (width) | 355 if (width) |
356 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); | 356 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); |
357 } | 357 } |
358 | 358 |
359 void ConvertRGB32ToYUV_SSE2_Reference(const uint8* rgbframe, | 359 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe, |
360 uint8* yplane, | 360 uint8_t* yplane, |
361 uint8* uplane, | 361 uint8_t* uplane, |
362 uint8* vplane, | 362 uint8_t* vplane, |
363 int width, | 363 int width, |
364 int height, | 364 int height, |
365 int rgbstride, | 365 int rgbstride, |
366 int ystride, | 366 int ystride, |
367 int uvstride) { | 367 int uvstride) { |
368 while (height >= 2) { | 368 while (height >= 2) { |
369 int i = 0; | 369 int i = 0; |
370 | 370 |
371 // Convert a 2x2 block. | 371 // Convert a 2x2 block. |
372 while (i + 2 <= width) { | 372 while (i + 2 <= width) { |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
408 ++vplane; | 408 ++vplane; |
409 width -= 2; | 409 width -= 2; |
410 } | 410 } |
411 | 411 |
412 // Handle the last pixel in the last row. | 412 // Handle the last pixel in the last row. |
413 if (width) | 413 if (width) |
414 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); | 414 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); |
415 } | 415 } |
416 | 416 |
417 } // namespace media | 417 } // namespace media |
OLD | NEW |