Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(223)

Side by Side Diff: media/base/simd/convert_rgb_to_yuv_sse2.cc

Issue 2694113002: Delete media/base/yuv_convert and dependents. Prefer libyuv. (Closed)
Patch Set: Fix media_unittests. Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « media/base/simd/convert_rgb_to_yuv_c.cc ('k') | media/base/simd/convert_rgb_to_yuv_ssse3.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <stdint.h>
6
7 #include "build/build_config.h"
8 #include "media/base/simd/convert_rgb_to_yuv.h"
9
10 #if defined(COMPILER_MSVC)
11 #include <intrin.h>
12 #else
13 #include <mmintrin.h>
14 #include <emmintrin.h>
15 #endif
16
17 #if defined(COMPILER_MSVC)
18 #define SIMD_ALIGNED(var) __declspec(align(16)) var
19 #else
20 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
21 #endif
22
23 namespace media {
24
25 #define FIX_SHIFT 12
26 #define FIX(x) ((x) * (1 << FIX_SHIFT))
27
28 // Define a convenient macro to do static cast.
29 #define INT16_FIX(x) static_cast<int16_t>(FIX(x))
30
31 // Android's pixel layout is RGBA, while other platforms
32 // are BGRA.
33 #if defined(OS_ANDROID)
34 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {
35 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,
36 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0,
37 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,
38 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0,
39 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,
40 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0,
41 };
42 #else
43 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = {
44 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,
45 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0,
46 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,
47 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0,
48 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,
49 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0,
50 };
51 #endif
52
53 #undef INT16_FIX
54
55 // This is the final offset for the conversion from signed yuv values to
56 // unsigned values. It is arranged so that offset of 16 is applied to Y
57 // components and 128 is added to UV components for 2 pixels.
58 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16};
59
60 static inline uint8_t Clamp(int value) {
61 if (value < 0)
62 return 0;
63 if (value > 255)
64 return 255;
65 return static_cast<uint8_t>(value);
66 }
67
68 static inline uint8_t RGBToY(int r, int g, int b) {
69 int y = ConvertRGBAToYUV_kTable[0] * b +
70 ConvertRGBAToYUV_kTable[1] * g +
71 ConvertRGBAToYUV_kTable[2] * r;
72 y >>= FIX_SHIFT;
73 return Clamp(y + 16);
74 }
75
76 static inline uint8_t RGBToU(int r, int g, int b, int shift) {
77 int u = ConvertRGBAToYUV_kTable[8] * b +
78 ConvertRGBAToYUV_kTable[9] * g +
79 ConvertRGBAToYUV_kTable[10] * r;
80 u >>= FIX_SHIFT + shift;
81 return Clamp(u + 128);
82 }
83
84 static inline uint8_t RGBToV(int r, int g, int b, int shift) {
85 int v = ConvertRGBAToYUV_kTable[16] * b +
86 ConvertRGBAToYUV_kTable[17] * g +
87 ConvertRGBAToYUV_kTable[18] * r;
88 v >>= FIX_SHIFT + shift;
89 return Clamp(v + 128);
90 }
91
92 #define CONVERT_Y(rgb_buf, y_buf) \
93 b = *rgb_buf++; \
94 g = *rgb_buf++; \
95 r = *rgb_buf++; \
96 ++rgb_buf; \
97 sum_b += b; \
98 sum_g += g; \
99 sum_r += r; \
100 *y_buf++ = RGBToY(r, g, b);
101
102 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1,
103 const uint8_t* rgb_buf_2,
104 uint8_t* y_buf_1,
105 uint8_t* y_buf_2,
106 uint8_t* u_buf,
107 uint8_t* v_buf) {
108 int sum_b = 0;
109 int sum_g = 0;
110 int sum_r = 0;
111 int r, g, b;
112
113
114
115 CONVERT_Y(rgb_buf_1, y_buf_1);
116 CONVERT_Y(rgb_buf_1, y_buf_1);
117 CONVERT_Y(rgb_buf_2, y_buf_2);
118 CONVERT_Y(rgb_buf_2, y_buf_2);
119 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2);
120 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2);
121 }
122
123 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1,
124 const uint8_t* rgb_buf_2,
125 uint8_t* y_buf_1,
126 uint8_t* y_buf_2,
127 uint8_t* u_buf,
128 uint8_t* v_buf) {
129 int sum_b = 0;
130 int sum_g = 0;
131 int sum_r = 0;
132 int r, g, b;
133
134 CONVERT_Y(rgb_buf_1, y_buf_1);
135 CONVERT_Y(rgb_buf_2, y_buf_2);
136 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);
137 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);
138 }
139
140 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf,
141 uint8_t* y_buf,
142 uint8_t* u_buf,
143 uint8_t* v_buf) {
144 int sum_b = 0;
145 int sum_g = 0;
146 int sum_r = 0;
147 int r, g, b;
148
149 CONVERT_Y(rgb_buf, y_buf);
150 CONVERT_Y(rgb_buf, y_buf);
151 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1);
152 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1);
153 }
154
155 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf,
156 uint8_t* y_buf,
157 uint8_t* u_buf,
158 uint8_t* v_buf) {
159 int sum_b = 0;
160 int sum_g = 0;
161 int sum_r = 0;
162 int r, g, b;
163
164 CONVERT_Y(rgb_buf, y_buf);
165 *u_buf++ = RGBToU(r, g, b, 0);
166 *v_buf++ = RGBToV(r, g, b, 0);
167 }
168
169 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1,
170 const uint8_t* rgb_buf_2,
171 uint8_t* y_buf_1,
172 uint8_t* y_buf_2,
173 uint8_t* u_buf,
174 uint8_t* v_buf,
175 int width) {
176 while (width >= 4) {
177 // Name for the Y pixels:
178 // Row 1: a b c d
179 // Row 2: e f g h
180 //
181 // First row 4 pixels.
182 __m128i rgb_row_1 = _mm_loadu_si128(
183 reinterpret_cast<const __m128i*>(rgb_buf_1));
184 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1);
185
186 __m128i y_table = _mm_load_si128(
187 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable));
188
189 __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1);
190 rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table);
191
192 __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1);
193 rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table);
194
195 // Do a crazh shuffle so that we get:
196 // v------------ Multiply Add
197 // BG: a b c d
198 // A0: a b c d
199 __m128i bg_abcd = _mm_castps_si128(
200 _mm_shuffle_ps(
201 _mm_castsi128_ps(rgb_c_d),
202 _mm_castsi128_ps(rgb_a_b),
203 (3 << 6) | (1 << 4) | (3 << 2) | 1));
204 __m128i r_abcd = _mm_castps_si128(
205 _mm_shuffle_ps(
206 _mm_castsi128_ps(rgb_c_d),
207 _mm_castsi128_ps(rgb_a_b),
208 (2 << 6) | (2 << 2)));
209 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd);
210
211 // Down shift back to 8bits range.
212 __m128i y_offset = _mm_load_si128(
213 reinterpret_cast<const __m128i*>(kYOffset));
214 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT);
215 y_abcd = _mm_add_epi32(y_abcd, y_offset);
216 y_abcd = _mm_packs_epi32(y_abcd, y_abcd);
217 y_abcd = _mm_packus_epi16(y_abcd, y_abcd);
218 *reinterpret_cast<uint32_t*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd);
219 y_buf_1 += 4;
220
221 // Second row 4 pixels.
222 __m128i rgb_row_2 = _mm_loadu_si128(
223 reinterpret_cast<const __m128i*>(rgb_buf_2));
224 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2);
225 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2);
226 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2);
227
228 // Add two rows together.
229 __m128i rgb_ae_bf =
230 _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f);
231 __m128i rgb_cg_dh =
232 _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h);
233
234 // Multiply add like the previous row.
235 rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table);
236 rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table);
237
238 __m128i bg_efgh = _mm_castps_si128(
239 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),
240 _mm_castsi128_ps(rgb_e_f),
241 (3 << 6) | (1 << 4) | (3 << 2) | 1));
242 __m128i r_efgh = _mm_castps_si128(
243 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h),
244 _mm_castsi128_ps(rgb_e_f),
245 (2 << 6) | (2 << 2)));
246 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh);
247 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT);
248 y_efgh = _mm_add_epi32(y_efgh, y_offset);
249 y_efgh = _mm_packs_epi32(y_efgh, y_efgh);
250 y_efgh = _mm_packus_epi16(y_efgh, y_efgh);
251 *reinterpret_cast<uint32_t*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh);
252 y_buf_2 += 4;
253
254 __m128i rgb_ae_cg = _mm_castps_si128(
255 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),
256 _mm_castsi128_ps(rgb_ae_bf),
257 (3 << 6) | (2 << 4) | (3 << 2) | 2));
258 __m128i rgb_bf_dh = _mm_castps_si128(
259 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh),
260 _mm_castsi128_ps(rgb_ae_bf),
261 (1 << 6) | (1 << 2)));
262
263 // This is a 2x2 subsampling for 2 pixels.
264 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh);
265
266 // Do a multiply add with U table.
267 __m128i u_a_b = _mm_madd_epi16(
268 rgb_abef_cdgh,
269 _mm_load_si128(
270 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8)));
271 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)),
272 _mm_shuffle_epi32(u_a_b, (2 << 2)));
273 // Right shift 14 because of 12 from fixed point and 2 from subsampling.
274 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2);
275 __m128i uv_offset = _mm_slli_epi32(y_offset, 3);
276 u_a_b = _mm_add_epi32(u_a_b, uv_offset);
277 u_a_b = _mm_packs_epi32(u_a_b, u_a_b);
278 u_a_b = _mm_packus_epi16(u_a_b, u_a_b);
279 *reinterpret_cast<uint16_t*>(u_buf) =
280 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0));
281 u_buf += 2;
282
283 __m128i v_a_b = _mm_madd_epi16(
284 rgb_abef_cdgh,
285 _mm_load_si128(
286 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16)));
287 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)),
288 _mm_shuffle_epi32(v_a_b, (2 << 2)));
289 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2);
290 v_a_b = _mm_add_epi32(v_a_b, uv_offset);
291 v_a_b = _mm_packs_epi32(v_a_b, v_a_b);
292 v_a_b = _mm_packus_epi16(v_a_b, v_a_b);
293 *reinterpret_cast<uint16_t*>(v_buf) =
294 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0));
295 v_buf += 2;
296
297 rgb_buf_1 += 16;
298 rgb_buf_2 += 16;
299
300 // Move forward by 4 pixels.
301 width -= 4;
302 }
303
304 // Just use C code to convert the remaining pixels.
305 if (width >= 2) {
306 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);
307 rgb_buf_1 += 8;
308 rgb_buf_2 += 8;
309 y_buf_1 += 2;
310 y_buf_2 += 2;
311 ++u_buf;
312 ++v_buf;
313 width -= 2;
314 }
315
316 if (width)
317 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf);
318 }
319
320 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe,
321 uint8_t* yplane,
322 uint8_t* uplane,
323 uint8_t* vplane,
324 int width,
325 int height,
326 int rgbstride,
327 int ystride,
328 int uvstride) {
329 while (height >= 2) {
330 ConvertRGB32ToYUVRow_SSE2(rgbframe,
331 rgbframe + rgbstride,
332 yplane,
333 yplane + ystride,
334 uplane,
335 vplane,
336 width);
337 rgbframe += 2 * rgbstride;
338 yplane += 2 * ystride;
339 uplane += uvstride;
340 vplane += uvstride;
341 height -= 2;
342 }
343
344 if (!height)
345 return;
346
347 // Handle the last row.
348 while (width >= 2) {
349 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane);
350 rgbframe += 8;
351 yplane += 2;
352 ++uplane;
353 ++vplane;
354 width -= 2;
355 }
356
357 if (width)
358 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);
359 }
360
361 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe,
362 uint8_t* yplane,
363 uint8_t* uplane,
364 uint8_t* vplane,
365 int width,
366 int height,
367 int rgbstride,
368 int ystride,
369 int uvstride) {
370 while (height >= 2) {
371 int i = 0;
372
373 // Convert a 2x2 block.
374 while (i + 2 <= width) {
375 ConvertRGBToYUV_V2H2(rgbframe + i * 4,
376 rgbframe + rgbstride + i * 4,
377 yplane + i,
378 yplane + ystride + i,
379 uplane + i / 2,
380 vplane + i / 2);
381 i += 2;
382 }
383
384 // Convert the last pixel of two rows.
385 if (i < width) {
386 ConvertRGBToYUV_V2H1(rgbframe + i * 4,
387 rgbframe + rgbstride + i * 4,
388 yplane + i,
389 yplane + ystride + i,
390 uplane + i / 2,
391 vplane + i / 2);
392 }
393
394 rgbframe += 2 * rgbstride;
395 yplane += 2 * ystride;
396 uplane += uvstride;
397 vplane += uvstride;
398 height -= 2;
399 }
400
401 if (!height)
402 return;
403
404 // Handle the last row.
405 while (width >= 2) {
406 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane);
407 rgbframe += 8;
408 yplane += 2;
409 ++uplane;
410 ++vplane;
411 width -= 2;
412 }
413
414 // Handle the last pixel in the last row.
415 if (width)
416 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane);
417 }
418
419 } // namespace media
OLDNEW
« no previous file with comments | « media/base/simd/convert_rgb_to_yuv_c.cc ('k') | media/base/simd/convert_rgb_to_yuv_ssse3.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698