Index: media/base/simd/convert_rgb_to_yuv_sse2.cc |
diff --git a/media/base/simd/convert_rgb_to_yuv_sse2.cc b/media/base/simd/convert_rgb_to_yuv_sse2.cc |
deleted file mode 100644 |
index a8732f93b156685efc4b6af0a684ff24ad190f28..0000000000000000000000000000000000000000 |
--- a/media/base/simd/convert_rgb_to_yuv_sse2.cc |
+++ /dev/null |
@@ -1,419 +0,0 @@ |
-// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include <stdint.h> |
- |
-#include "build/build_config.h" |
-#include "media/base/simd/convert_rgb_to_yuv.h" |
- |
-#if defined(COMPILER_MSVC) |
-#include <intrin.h> |
-#else |
-#include <mmintrin.h> |
-#include <emmintrin.h> |
-#endif |
- |
-#if defined(COMPILER_MSVC) |
-#define SIMD_ALIGNED(var) __declspec(align(16)) var |
-#else |
-#define SIMD_ALIGNED(var) var __attribute__((aligned(16))) |
-#endif |
- |
-namespace media { |
- |
-#define FIX_SHIFT 12 |
-#define FIX(x) ((x) * (1 << FIX_SHIFT)) |
- |
-// Define a convenient macro to do static cast. |
-#define INT16_FIX(x) static_cast<int16_t>(FIX(x)) |
- |
-// Android's pixel layout is RGBA, while other platforms |
-// are BGRA. |
-#if defined(OS_ANDROID) |
-SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { |
- INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, |
- INT16_FIX(0.257), INT16_FIX(0.504), INT16_FIX(0.098), 0, |
- -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, |
- -INT16_FIX(0.148), -INT16_FIX(0.291), INT16_FIX(0.439), 0, |
- INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, |
- INT16_FIX(0.439), -INT16_FIX(0.368), -INT16_FIX(0.071), 0, |
-}; |
-#else |
-SIMD_ALIGNED(const int16_t ConvertRGBAToYUV_kTable[8 * 3]) = { |
- INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, |
- INT16_FIX(0.098), INT16_FIX(0.504), INT16_FIX(0.257), 0, |
- INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, |
- INT16_FIX(0.439), -INT16_FIX(0.291), -INT16_FIX(0.148), 0, |
- -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, |
- -INT16_FIX(0.071), -INT16_FIX(0.368), INT16_FIX(0.439), 0, |
-}; |
-#endif |
- |
-#undef INT16_FIX |
- |
-// This is the final offset for the conversion from signed yuv values to |
-// unsigned values. It is arranged so that offset of 16 is applied to Y |
-// components and 128 is added to UV components for 2 pixels. |
-SIMD_ALIGNED(const int32_t kYOffset[4]) = {16, 16, 16, 16}; |
- |
-static inline uint8_t Clamp(int value) { |
- if (value < 0) |
- return 0; |
- if (value > 255) |
- return 255; |
- return static_cast<uint8_t>(value); |
-} |
- |
-static inline uint8_t RGBToY(int r, int g, int b) { |
- int y = ConvertRGBAToYUV_kTable[0] * b + |
- ConvertRGBAToYUV_kTable[1] * g + |
- ConvertRGBAToYUV_kTable[2] * r; |
- y >>= FIX_SHIFT; |
- return Clamp(y + 16); |
-} |
- |
-static inline uint8_t RGBToU(int r, int g, int b, int shift) { |
- int u = ConvertRGBAToYUV_kTable[8] * b + |
- ConvertRGBAToYUV_kTable[9] * g + |
- ConvertRGBAToYUV_kTable[10] * r; |
- u >>= FIX_SHIFT + shift; |
- return Clamp(u + 128); |
-} |
- |
-static inline uint8_t RGBToV(int r, int g, int b, int shift) { |
- int v = ConvertRGBAToYUV_kTable[16] * b + |
- ConvertRGBAToYUV_kTable[17] * g + |
- ConvertRGBAToYUV_kTable[18] * r; |
- v >>= FIX_SHIFT + shift; |
- return Clamp(v + 128); |
-} |
- |
-#define CONVERT_Y(rgb_buf, y_buf) \ |
- b = *rgb_buf++; \ |
- g = *rgb_buf++; \ |
- r = *rgb_buf++; \ |
- ++rgb_buf; \ |
- sum_b += b; \ |
- sum_g += g; \ |
- sum_r += r; \ |
- *y_buf++ = RGBToY(r, g, b); |
- |
-static inline void ConvertRGBToYUV_V2H2(const uint8_t* rgb_buf_1, |
- const uint8_t* rgb_buf_2, |
- uint8_t* y_buf_1, |
- uint8_t* y_buf_2, |
- uint8_t* u_buf, |
- uint8_t* v_buf) { |
- int sum_b = 0; |
- int sum_g = 0; |
- int sum_r = 0; |
- int r, g, b; |
- |
- |
- |
- CONVERT_Y(rgb_buf_1, y_buf_1); |
- CONVERT_Y(rgb_buf_1, y_buf_1); |
- CONVERT_Y(rgb_buf_2, y_buf_2); |
- CONVERT_Y(rgb_buf_2, y_buf_2); |
- *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 2); |
- *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 2); |
-} |
- |
-static inline void ConvertRGBToYUV_V2H1(const uint8_t* rgb_buf_1, |
- const uint8_t* rgb_buf_2, |
- uint8_t* y_buf_1, |
- uint8_t* y_buf_2, |
- uint8_t* u_buf, |
- uint8_t* v_buf) { |
- int sum_b = 0; |
- int sum_g = 0; |
- int sum_r = 0; |
- int r, g, b; |
- |
- CONVERT_Y(rgb_buf_1, y_buf_1); |
- CONVERT_Y(rgb_buf_2, y_buf_2); |
- *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); |
- *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); |
-} |
- |
-static inline void ConvertRGBToYUV_V1H2(const uint8_t* rgb_buf, |
- uint8_t* y_buf, |
- uint8_t* u_buf, |
- uint8_t* v_buf) { |
- int sum_b = 0; |
- int sum_g = 0; |
- int sum_r = 0; |
- int r, g, b; |
- |
- CONVERT_Y(rgb_buf, y_buf); |
- CONVERT_Y(rgb_buf, y_buf); |
- *u_buf++ = RGBToU(sum_r, sum_g, sum_b, 1); |
- *v_buf++ = RGBToV(sum_r, sum_g, sum_b, 1); |
-} |
- |
-static inline void ConvertRGBToYUV_V1H1(const uint8_t* rgb_buf, |
- uint8_t* y_buf, |
- uint8_t* u_buf, |
- uint8_t* v_buf) { |
- int sum_b = 0; |
- int sum_g = 0; |
- int sum_r = 0; |
- int r, g, b; |
- |
- CONVERT_Y(rgb_buf, y_buf); |
- *u_buf++ = RGBToU(r, g, b, 0); |
- *v_buf++ = RGBToV(r, g, b, 0); |
-} |
- |
-static void ConvertRGB32ToYUVRow_SSE2(const uint8_t* rgb_buf_1, |
- const uint8_t* rgb_buf_2, |
- uint8_t* y_buf_1, |
- uint8_t* y_buf_2, |
- uint8_t* u_buf, |
- uint8_t* v_buf, |
- int width) { |
- while (width >= 4) { |
- // Name for the Y pixels: |
- // Row 1: a b c d |
- // Row 2: e f g h |
- // |
- // First row 4 pixels. |
- __m128i rgb_row_1 = _mm_loadu_si128( |
- reinterpret_cast<const __m128i*>(rgb_buf_1)); |
- __m128i zero_1 = _mm_xor_si128(rgb_row_1, rgb_row_1); |
- |
- __m128i y_table = _mm_load_si128( |
- reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable)); |
- |
- __m128i rgb_a_b = _mm_unpackhi_epi8(rgb_row_1, zero_1); |
- rgb_a_b = _mm_madd_epi16(rgb_a_b, y_table); |
- |
- __m128i rgb_c_d = _mm_unpacklo_epi8(rgb_row_1, zero_1); |
- rgb_c_d = _mm_madd_epi16(rgb_c_d, y_table); |
- |
- // Do a crazh shuffle so that we get: |
- // v------------ Multiply Add |
- // BG: a b c d |
- // A0: a b c d |
- __m128i bg_abcd = _mm_castps_si128( |
- _mm_shuffle_ps( |
- _mm_castsi128_ps(rgb_c_d), |
- _mm_castsi128_ps(rgb_a_b), |
- (3 << 6) | (1 << 4) | (3 << 2) | 1)); |
- __m128i r_abcd = _mm_castps_si128( |
- _mm_shuffle_ps( |
- _mm_castsi128_ps(rgb_c_d), |
- _mm_castsi128_ps(rgb_a_b), |
- (2 << 6) | (2 << 2))); |
- __m128i y_abcd = _mm_add_epi32(bg_abcd, r_abcd); |
- |
- // Down shift back to 8bits range. |
- __m128i y_offset = _mm_load_si128( |
- reinterpret_cast<const __m128i*>(kYOffset)); |
- y_abcd = _mm_srai_epi32(y_abcd, FIX_SHIFT); |
- y_abcd = _mm_add_epi32(y_abcd, y_offset); |
- y_abcd = _mm_packs_epi32(y_abcd, y_abcd); |
- y_abcd = _mm_packus_epi16(y_abcd, y_abcd); |
- *reinterpret_cast<uint32_t*>(y_buf_1) = _mm_cvtsi128_si32(y_abcd); |
- y_buf_1 += 4; |
- |
- // Second row 4 pixels. |
- __m128i rgb_row_2 = _mm_loadu_si128( |
- reinterpret_cast<const __m128i*>(rgb_buf_2)); |
- __m128i zero_2 = _mm_xor_si128(rgb_row_2, rgb_row_2); |
- __m128i rgb_e_f = _mm_unpackhi_epi8(rgb_row_2, zero_2); |
- __m128i rgb_g_h = _mm_unpacklo_epi8(rgb_row_2, zero_2); |
- |
- // Add two rows together. |
- __m128i rgb_ae_bf = |
- _mm_add_epi16(_mm_unpackhi_epi8(rgb_row_1, zero_2), rgb_e_f); |
- __m128i rgb_cg_dh = |
- _mm_add_epi16(_mm_unpacklo_epi8(rgb_row_1, zero_2), rgb_g_h); |
- |
- // Multiply add like the previous row. |
- rgb_e_f = _mm_madd_epi16(rgb_e_f, y_table); |
- rgb_g_h = _mm_madd_epi16(rgb_g_h, y_table); |
- |
- __m128i bg_efgh = _mm_castps_si128( |
- _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), |
- _mm_castsi128_ps(rgb_e_f), |
- (3 << 6) | (1 << 4) | (3 << 2) | 1)); |
- __m128i r_efgh = _mm_castps_si128( |
- _mm_shuffle_ps(_mm_castsi128_ps(rgb_g_h), |
- _mm_castsi128_ps(rgb_e_f), |
- (2 << 6) | (2 << 2))); |
- __m128i y_efgh = _mm_add_epi32(bg_efgh, r_efgh); |
- y_efgh = _mm_srai_epi32(y_efgh, FIX_SHIFT); |
- y_efgh = _mm_add_epi32(y_efgh, y_offset); |
- y_efgh = _mm_packs_epi32(y_efgh, y_efgh); |
- y_efgh = _mm_packus_epi16(y_efgh, y_efgh); |
- *reinterpret_cast<uint32_t*>(y_buf_2) = _mm_cvtsi128_si32(y_efgh); |
- y_buf_2 += 4; |
- |
- __m128i rgb_ae_cg = _mm_castps_si128( |
- _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), |
- _mm_castsi128_ps(rgb_ae_bf), |
- (3 << 6) | (2 << 4) | (3 << 2) | 2)); |
- __m128i rgb_bf_dh = _mm_castps_si128( |
- _mm_shuffle_ps(_mm_castsi128_ps(rgb_cg_dh), |
- _mm_castsi128_ps(rgb_ae_bf), |
- (1 << 6) | (1 << 2))); |
- |
- // This is a 2x2 subsampling for 2 pixels. |
- __m128i rgb_abef_cdgh = _mm_add_epi16(rgb_ae_cg, rgb_bf_dh); |
- |
- // Do a multiply add with U table. |
- __m128i u_a_b = _mm_madd_epi16( |
- rgb_abef_cdgh, |
- _mm_load_si128( |
- reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 8))); |
- u_a_b = _mm_add_epi32(_mm_shuffle_epi32(u_a_b, ((3 << 2) | 1)), |
- _mm_shuffle_epi32(u_a_b, (2 << 2))); |
- // Right shift 14 because of 12 from fixed point and 2 from subsampling. |
- u_a_b = _mm_srai_epi32(u_a_b, FIX_SHIFT + 2); |
- __m128i uv_offset = _mm_slli_epi32(y_offset, 3); |
- u_a_b = _mm_add_epi32(u_a_b, uv_offset); |
- u_a_b = _mm_packs_epi32(u_a_b, u_a_b); |
- u_a_b = _mm_packus_epi16(u_a_b, u_a_b); |
- *reinterpret_cast<uint16_t*>(u_buf) = |
- static_cast<uint16_t>(_mm_extract_epi16(u_a_b, 0)); |
- u_buf += 2; |
- |
- __m128i v_a_b = _mm_madd_epi16( |
- rgb_abef_cdgh, |
- _mm_load_si128( |
- reinterpret_cast<const __m128i*>(ConvertRGBAToYUV_kTable + 16))); |
- v_a_b = _mm_add_epi32(_mm_shuffle_epi32(v_a_b, ((3 << 2) | 1)), |
- _mm_shuffle_epi32(v_a_b, (2 << 2))); |
- v_a_b = _mm_srai_epi32(v_a_b, FIX_SHIFT + 2); |
- v_a_b = _mm_add_epi32(v_a_b, uv_offset); |
- v_a_b = _mm_packs_epi32(v_a_b, v_a_b); |
- v_a_b = _mm_packus_epi16(v_a_b, v_a_b); |
- *reinterpret_cast<uint16_t*>(v_buf) = |
- static_cast<uint16_t>(_mm_extract_epi16(v_a_b, 0)); |
- v_buf += 2; |
- |
- rgb_buf_1 += 16; |
- rgb_buf_2 += 16; |
- |
- // Move forward by 4 pixels. |
- width -= 4; |
- } |
- |
- // Just use C code to convert the remaining pixels. |
- if (width >= 2) { |
- ConvertRGBToYUV_V2H2(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); |
- rgb_buf_1 += 8; |
- rgb_buf_2 += 8; |
- y_buf_1 += 2; |
- y_buf_2 += 2; |
- ++u_buf; |
- ++v_buf; |
- width -= 2; |
- } |
- |
- if (width) |
- ConvertRGBToYUV_V2H1(rgb_buf_1, rgb_buf_2, y_buf_1, y_buf_2, u_buf, v_buf); |
-} |
- |
-extern void ConvertRGB32ToYUV_SSE2(const uint8_t* rgbframe, |
- uint8_t* yplane, |
- uint8_t* uplane, |
- uint8_t* vplane, |
- int width, |
- int height, |
- int rgbstride, |
- int ystride, |
- int uvstride) { |
- while (height >= 2) { |
- ConvertRGB32ToYUVRow_SSE2(rgbframe, |
- rgbframe + rgbstride, |
- yplane, |
- yplane + ystride, |
- uplane, |
- vplane, |
- width); |
- rgbframe += 2 * rgbstride; |
- yplane += 2 * ystride; |
- uplane += uvstride; |
- vplane += uvstride; |
- height -= 2; |
- } |
- |
- if (!height) |
- return; |
- |
- // Handle the last row. |
- while (width >= 2) { |
- ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); |
- rgbframe += 8; |
- yplane += 2; |
- ++uplane; |
- ++vplane; |
- width -= 2; |
- } |
- |
- if (width) |
- ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); |
-} |
- |
-void ConvertRGB32ToYUV_SSE2_Reference(const uint8_t* rgbframe, |
- uint8_t* yplane, |
- uint8_t* uplane, |
- uint8_t* vplane, |
- int width, |
- int height, |
- int rgbstride, |
- int ystride, |
- int uvstride) { |
- while (height >= 2) { |
- int i = 0; |
- |
- // Convert a 2x2 block. |
- while (i + 2 <= width) { |
- ConvertRGBToYUV_V2H2(rgbframe + i * 4, |
- rgbframe + rgbstride + i * 4, |
- yplane + i, |
- yplane + ystride + i, |
- uplane + i / 2, |
- vplane + i / 2); |
- i += 2; |
- } |
- |
- // Convert the last pixel of two rows. |
- if (i < width) { |
- ConvertRGBToYUV_V2H1(rgbframe + i * 4, |
- rgbframe + rgbstride + i * 4, |
- yplane + i, |
- yplane + ystride + i, |
- uplane + i / 2, |
- vplane + i / 2); |
- } |
- |
- rgbframe += 2 * rgbstride; |
- yplane += 2 * ystride; |
- uplane += uvstride; |
- vplane += uvstride; |
- height -= 2; |
- } |
- |
- if (!height) |
- return; |
- |
- // Handle the last row. |
- while (width >= 2) { |
- ConvertRGBToYUV_V1H2(rgbframe, yplane, uplane, vplane); |
- rgbframe += 8; |
- yplane += 2; |
- ++uplane; |
- ++vplane; |
- width -= 2; |
- } |
- |
- // Handle the last pixel in the last row. |
- if (width) |
- ConvertRGBToYUV_V1H1(rgbframe, yplane, uplane, vplane); |
-} |
- |
-} // namespace media |