| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include <stdint.h> | |
| 6 | |
| 7 #include "build/build_config.h" | |
| 8 #include "media/base/simd/convert_rgb_to_yuv.h" | |
| 9 | |
| 10 #if defined(COMPILER_MSVC) | |
| 11 #include <intrin.h> | |
| 12 #else | |
| 13 #include <mmintrin.h> | |
| 14 #include <emmintrin.h> | |
| 15 #endif | |
| 16 | |
| 17 #if defined(COMPILER_MSVC) | |
| 18 #define SIMD_ALIGNED(var) __declspec(align(16)) var | |
| 19 #else | |
| 20 #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) | |
| 21 #endif | |
| 22 | |
| 23 namespace media { | |
| 24 | |
| 25 #define FIX_SHIFT 12 | |
| 26 #define FIX(x) ((x) * (1 << FIX_SHIFT)) | |
| 27 | |
| 28 // Define a convenient macro to do static cast. | |
| 29 #define INT16_FIX(x) static_cast<int16_t>(FIX(x)) | |
| 30 | |
| 31 // Android's pixel layout is RGBA, while other platforms | |
| 32 // are BGRA. | |
| 33 #if defined(OS_ANDROID) | |
| 34 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { | |
| 35 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, | |
| 36 INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, | |
| 37 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, | |
| 38 -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, | |
| 39 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, | |
| 40 INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, | |
| 41 }; | |
| 42 #else | |
| 43 SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { | |
| 44 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, | |
| 45 INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, | |
| 46 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, | |
| 47 INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, | |
| 48 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, | |
| 49 -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, | |
| 50 }; | |
| 51 #endif | |
| 52 | |
| 53 #undef INT16_FIX | |
| 54 | |
| 55 // This is the final offset for the conversion from signed yuv values to | |
| 56 // unsigned values. It is arranged so that offset of 16 is applied to Y | |
| 57 // components and 128 is added to UV components for 2 pixels. | |
| 58 SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16}; | |
| 59 | |
| 60 static inline uint8_t Clamp(int value) { | |
| 61 if (value < 0) | |
| 62 return 0; | |
| 63 if (value > 255) | |
| 64 return 255; | |
| 65 return static_cast<uint8_t>(value); | |
| 66 } | |
| 67 | |
| 68 static inline uint8_t RGBToY(int r, int g, int b) { | |
| 69 int y = ConvertRGBAToYUV_kTable[0] * b + | |
| 70 ConvertRGBAToYUV_kTable[1] * g + | |
| 71 ConvertRGBAToYUV_kTable[2] * r; | |
| 72 y >>= FIX_SHIFT; | |
| 73 return Clamp(y + 16); | |
| 74 } | |
| 75 | |
| 76 static inline uint8_t RGBToU(int r, int g, int b, int shift) { | |
| 77 int u = ConvertRGBAToYUV_kTable[8] * b + | |
| 78 ConvertRGBAToYUV_kTable[9] * g + | |
| 79 ConvertRGBAToYUV_kTable[10] * r; | |
| 80 u >>= FIX_SHIFT + shift; | |
| 81 return Clamp(u + 128); | |
| 82 } | |
| 83 | |
| 84 static inline uint8_t RGBToV(int r, int g, int b, int shift) { | |
| 85 int v = ConvertRGBAToYUV_kTable[16] * b + | |
| 86 ConvertRGBAToYUV_kTable[17] * g + | |
| 87 ConvertRGBAToYUV_kTable[18] * r; | |
| 88 v >>= FIX_SHIFT + shift; | |
| 89 return Clamp(v + 128); | |
| 90 } | |
| 91 | |
| 92 #define CONVERT_Y(rgb_buf, y_buf) \ | |
| 93 b = *rgb_buf++; \ | |
| 94 g = *rgb_buf++; \ | |
| 95 r = *rgb_buf++; \ | |
| 96 ++rgb_buf; \ | |
| 97 sum_b += b; \ | |
| 98 sum_g += g; \ | |
| 99 sum_r += r; \ | |
| 100 *y_buf++ = RGBToY(r, g, b); | |
| 101 | |
| 102 static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1, | |
| 103 const uint8_t* rgb_buf_2, | |
| 104 uint8_t* y_buf_1, | |
| 105 uint8_t* y_buf_2, | |
| 106 uint8_t* u_buf, | |
| 107 uint8_t* v_buf) { | |
| 108 int sum_b = 0; | |
| 109 int sum_g = 0; | |
| 110 int sum_r = 0; | |
| 111 int r, g, b; | |
| 112 | |
| 113 | |
| 114 | |
| 115 CONVERT_Y(rgb_buf_1, y_buf_1); | |
| 116 CONVERT_Y(rgb_buf_1, y_buf_1); | |
| 117 CONVERT_Y(rgb_buf_2, y_buf_2); | |
| 118 CONVERT_Y(rgb_buf_2, y_buf_2); | |
| 119 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); | |
| 120 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); | |
| 121 } | |
| 122 | |
| 123 static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1, | |
| 124 const uint8_t* rgb_buf_2, | |
| 125 uint8_t* y_buf_1, | |
| 126 uint8_t* y_buf_2, | |
| 127 uint8_t* u_buf, | |
| 128 uint8_t* v_buf) { | |
| 129 int sum_b = 0; | |
| 130 int sum_g = 0; | |
| 131 int sum_r = 0; | |
| 132 int r, g, b; | |
| 133 | |
| 134 CONVERT_Y(rgb_buf_1, y_buf_1); | |
| 135 CONVERT_Y(rgb_buf_2, y_buf_2); | |
| 136 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); | |
| 137 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); | |
| 138 } | |
| 139 | |
| 140 static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf, | |
| 141 uint8_t* y_buf, | |
| 142 uint8_t* u_buf, | |
| 143 uint8_t* v_buf) { | |
| 144 int sum_b = 0; | |
| 145 int sum_g = 0; | |
| 146 int sum_r = 0; | |
| 147 int r, g, b; | |
| 148 | |
| 149 CONVERT_Y(rgb_buf, y_buf); | |
| 150 CONVERT_Y(rgb_buf, y_buf); | |
| 151 *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); | |
| 152 *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); | |
| 153 } | |
| 154 | |
| 155 static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf, | |
| 156 uint8_t* y_buf, | |
| 157 uint8_t* u_buf, | |
| 158 uint8_t* v_buf) { | |
| 159 int sum_b = 0; | |
| 160 int sum_g = 0; | |
| 161 int sum_r = 0; | |
| 162 int r, g, b; | |
| 163 | |
| 164 CONVERT_Y(rgb_buf, y_buf); | |
| 165 *u_buf++ = RGBToU(r, g, b, 0); | |
| 166 *v_buf++ = RGBToV(r, g, b, 0); | |
| 167 } | |
| 168 | |
| 169 static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1, | |
| 170 const uint8_t* rgb_buf_2, | |
| 171 uint8_t* y_buf_1, | |
| 172 uint8_t* y_buf_2, | |
| 173 uint8_t* u_buf, | |
| 174 uint8_t* v_buf, | |
| 175 int width) { | |
| 176 while (width >= 4) { | |
| 177 // Name for the Y pixels: | |
| 178 // Row 1: a b c d | |
| 179 // Row 2: e f g h | |
| 180 // | |
| 181 // First row 4 pixels. | |
| 182 __m128i rgb_row_1 = _mm_loadu_si128( | |
| 183 reinterpret_cast<const __m128i*>(rgb_buf_1)); | |
| 184 __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); | |
| 185 | |
| 186 __m128i y_table = _mm_load_si128( | |
| 187 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable)); | |
| 188 | |
| 189 __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1); | |
| 190 rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table); | |
| 191 | |
| 192 __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1); | |
| 193 rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table); | |
| 194 | |
| 195 // Do a crazh shuffle so that we get: | |
| 196 // v------------ Multiply Add | |
| 197 // BG: a b c d | |
| 198 // A0: a b c d | |
| 199 __m128i bg_abcd = _mm_castps_si128( | |
| 200 _mm_shuffle_ps( | |
| 201 _mm_castsi128_ps(rgb_c_d), | |
| 202 _mm_castsi128_ps(rgb_a_b), | |
| 203 (3 << 6) | (1 << 4) | (3 << 2) | 1)); | |
| 204 __m128i r_abcd = _mm_castps_si128( | |
| 205 _mm_shuffle_ps( | |
| 206 _mm_castsi128_ps(rgb_c_d), | |
| 207 _mm_castsi128_ps(rgb_a_b), | |
| 208 (2 << 6) | (2 << 2))); | |
| 209 __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); | |
| 210 | |
| 211 // Down shift back to 8bits range. | |
| 212 __m128i y_offset = _mm_load_si128( | |
| 213 reinterpret_cast<const __m128i*>(kYOffset)); | |
| 214 y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); | |
| 215 y_abcd = _mm_add_epi32(y_abcd, y_offset); | |
| 216 y_abcd = _mm_packs_epi32(y_abcd, y_abcd); | |
| 217 y_abcd = _mm_packus_epi16(y_abcd, y_abcd); | |
| 218 *reinterpret_cast<uint32_t*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); | |
| 219 y_buf_1 += 4; | |
| 220 | |
| 221 // Second row 4 pixels. | |
| 222 __m128i rgb_row_2 = _mm_loadu_si128( | |
| 223 reinterpret_cast<const __m128i*>(rgb_buf_2)); | |
| 224 __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); | |
| 225 __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); | |
| 226 __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); | |
| 227 | |
| 228 // Add two rows together. | |
| 229 __m128i rgb_ae_bf = | |
| 230 _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f); | |
| 231 __m128i rgb_cg_dh = | |
| 232 _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h); | |
| 233 | |
| 234 // Multiply add like the previous row. | |
| 235 rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table); | |
| 236 rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table); | |
| 237 | |
| 238 __m128i bg_efgh = _mm_castps_si128( | |
| 239 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), | |
| 240 _mm_castsi128_ps(rgb_e_f), | |
| 241 (3 << 6) | (1 << 4) | (3 << 2) | 1)); | |
| 242 __m128i r_efgh = _mm_castps_si128( | |
| 243 _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), | |
| 244 _mm_castsi128_ps(rgb_e_f), | |
| 245 (2 << 6) | (2 << 2))); | |
| 246 __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); | |
| 247 y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); | |
| 248 y_efgh = _mm_add_epi32(y_efgh, y_offset); | |
| 249 y_efgh = _mm_packs_epi32(y_efgh, y_efgh); | |
| 250 y_efgh = _mm_packus_epi16(y_efgh, y_efgh); | |
| 251 *reinterpret_cast<uint32_t*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); | |
| 252 y_buf_2 += 4; | |
| 253 | |
| 254 __m128i rgb_ae_cg = _mm_castps_si128( | |
| 255 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), | |
| 256 _mm_castsi128_ps(rgb_ae_bf), | |
| 257 (3 << 6) | (2 << 4) | (3 << 2) | 2)); | |
| 258 __m128i rgb_bf_dh = _mm_castps_si128( | |
| 259 _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), | |
| 260 _mm_castsi128_ps(rgb_ae_bf), | |
| 261 (1 << 6) | (1 << 2))); | |
| 262 | |
| 263 // This is a 2x2 subsampling for 2 pixels. | |
| 264 __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); | |
| 265 | |
| 266 // Do a multiply add with U table. | |
| 267 __m128i u_a_b = _mm_madd_epi16( | |
| 268 rgb_abef_cdgh, | |
| 269 _mm_load_si128( | |
| 270 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); | |
| 271 u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), | |
| 272 _mm_shuffle_epi32(u_a_b, (2 << 2))); | |
| 273 // Right shift 14 because of 12 from fixed point and 2 from subsampling. | |
| 274 u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); | |
| 275 __m128i uv_offset = _mm_slli_epi32(y_offset, 3); | |
| 276 u_a_b = _mm_add_epi32(u_a_b, uv_offset); | |
| 277 u_a_b = _mm_packs_epi32(u_a_b, u_a_b); | |
| 278 u_a_b = _mm_packus_epi16(u_a_b, u_a_b); | |
| 279 *reinterpret_cast<uint16_t*>(u_buf) = | |
| 280 static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0)); | |
| 281 u_buf += 2; | |
| 282 | |
| 283 __m128i v_a_b = _mm_madd_epi16( | |
| 284 rgb_abef_cdgh, | |
| 285 _mm_load_si128( | |
| 286 reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); | |
| 287 v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), | |
| 288 _mm_shuffle_epi32(v_a_b, (2 << 2))); | |
| 289 v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); | |
| 290 v_a_b = _mm_add_epi32(v_a_b, uv_offset); | |
| 291 v_a_b = _mm_packs_epi32(v_a_b, v_a_b); | |
| 292 v_a_b = _mm_packus_epi16(v_a_b, v_a_b); | |
| 293 *reinterpret_cast<uint16_t*>(v_buf) = | |
| 294 static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0)); | |
| 295 v_buf += 2; | |
| 296 | |
| 297 rgb_buf_1 += 16; | |
| 298 rgb_buf_2 += 16; | |
| 299 | |
| 300 // Move forward by 4 pixels. | |
| 301 width -= 4; | |
| 302 } | |
| 303 | |
| 304 // Just use C code to convert the remaining pixels. | |
| 305 if (width >= 2) { | |
| 306 ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); | |
| 307 rgb_buf_1 += 8; | |
| 308 rgb_buf_2 += 8; | |
| 309 y_buf_1 += 2; | |
| 310 y_buf_2 += 2; | |
| 311 ++u_buf; | |
| 312 ++v_buf; | |
| 313 width -= 2; | |
| 314 } | |
| 315 | |
| 316 if (width) | |
| 317 ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); | |
| 318 } | |
| 319 | |
| 320 extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe, | |
| 321 uint8_t* yplane, | |
| 322 uint8_t* uplane, | |
| 323 uint8_t* vplane, | |
| 324 int width, | |
| 325 int height, | |
| 326 int rgbstride, | |
| 327 int ystride, | |
| 328 int uvstride) { | |
| 329 while (height >= 2) { | |
| 330 ConvertRGB32ToYUVRow_SSE2(rgbframe, | |
| 331 rgbframe + rgbstride, | |
| 332 yplane, | |
| 333 yplane + ystride, | |
| 334 uplane, | |
| 335 vplane, | |
| 336 width); | |
| 337 rgbframe += 2 * rgbstride; | |
| 338 yplane += 2 * ystride; | |
| 339 uplane += uvstride; | |
| 340 vplane += uvstride; | |
| 341 height -= 2; | |
| 342 } | |
| 343 | |
| 344 if (!height) | |
| 345 return; | |
| 346 | |
| 347 // Handle the last row. | |
| 348 while (width >= 2) { | |
| 349 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); | |
| 350 rgbframe += 8; | |
| 351 yplane += 2; | |
| 352 ++uplane; | |
| 353 ++vplane; | |
| 354 width -= 2; | |
| 355 } | |
| 356 | |
| 357 if (width) | |
| 358 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); | |
| 359 } | |
| 360 | |
| 361 void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe, | |
| 362 uint8_t* yplane, | |
| 363 uint8_t* uplane, | |
| 364 uint8_t* vplane, | |
| 365 int width, | |
| 366 int height, | |
| 367 int rgbstride, | |
| 368 int ystride, | |
| 369 int uvstride) { | |
| 370 while (height >= 2) { | |
| 371 int i = 0; | |
| 372 | |
| 373 // Convert a 2x2 block. | |
| 374 while (i + 2 <= width) { | |
| 375 ConvertRGBToYUV_V2H2(rgbframe + i * 4, | |
| 376 rgbframe + rgbstride + i * 4, | |
| 377 yplane + i, | |
| 378 yplane + ystride + i, | |
| 379 uplane + i / 2, | |
| 380 vplane + i / 2); | |
| 381 i += 2; | |
| 382 } | |
| 383 | |
| 384 // Convert the last pixel of two rows. | |
| 385 if (i < width) { | |
| 386 ConvertRGBToYUV_V2H1(rgbframe + i * 4, | |
| 387 rgbframe + rgbstride + i * 4, | |
| 388 yplane + i, | |
| 389 yplane + ystride + i, | |
| 390 uplane + i / 2, | |
| 391 vplane + i / 2); | |
| 392 } | |
| 393 | |
| 394 rgbframe += 2 * rgbstride; | |
| 395 yplane += 2 * ystride; | |
| 396 uplane += uvstride; | |
| 397 vplane += uvstride; | |
| 398 height -= 2; | |
| 399 } | |
| 400 | |
| 401 if (!height) | |
| 402 return; | |
| 403 | |
| 404 // Handle the last row. | |
| 405 while (width >= 2) { | |
| 406 ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); | |
| 407 rgbframe += 8; | |
| 408 yplane += 2; | |
| 409 ++uplane; | |
| 410 ++vplane; | |
| 411 width -= 2; | |
| 412 } | |
| 413 | |
| 414 // Handle the last pixel in the last row. | |
| 415 if (width) | |
| 416 ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); | |
| 417 } | |
| 418 | |
| 419 } // namespace media | |
| OLD | NEW |