Index: media/base/yuv_convert.cc |
=================================================================== |
--- media/base/yuv_convert.cc (revision 44590) |
+++ media/base/yuv_convert.cc (working copy) |
@@ -28,29 +28,30 @@ |
#endif |
#endif |
-#if USE_SSE |
+#if USE_SSE2 |
#include <emmintrin.h> |
#endif |
namespace media { |
-// 16.16 fixed point arithmetic. |
+// 16.16 fixed point arithmetic |
const int kFractionBits = 16; |
const int kFractionMax = 1 << kFractionBits; |
+const int kFractionMask = ((1 << kFractionBits) - 1); |
// Convert a frame of YUV to 32 bit ARGB. |
void ConvertYUVToRGB32(const uint8* y_buf, |
const uint8* u_buf, |
const uint8* v_buf, |
uint8* rgb_buf, |
- int width, |
- int height, |
+ int source_width, |
+ int source_height, |
int y_pitch, |
int uv_pitch, |
int rgb_pitch, |
YUVType yuv_type) { |
unsigned int y_shift = yuv_type; |
- for (int y = 0; y < height; ++y) { |
+ for (int y = 0; y < source_height; ++y) { |
uint8* rgb_row = rgb_buf + y * rgb_pitch; |
const uint8* y_ptr = y_buf + y * y_pitch; |
const uint8* u_ptr = u_buf + (y >> y_shift) * uv_pitch; |
@@ -60,26 +61,25 @@ |
u_ptr, |
v_ptr, |
rgb_row, |
- width); |
+ source_width); |
} |
// MMX used for FastConvertYUVToRGB32Row requires emms instruction. |
EMMS(); |
} |
-#if USE_MMX |
-#if USE_SSE |
+#if USE_SSE2 |
// FilterRows combines two rows of the image using linear interpolation. |
// SSE2 version blends 8 pixels at a time. |
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
- int width, int scaled_y_fraction) { |
+ int source_width, int source_y_fraction) { |
__m128i zero = _mm_setzero_si128(); |
__m128i y1_fraction = _mm_set1_epi16( |
- static_cast<uint16>(scaled_y_fraction >> 8)); |
+ static_cast<uint16>(source_y_fraction >> 8)); |
__m128i y0_fraction = _mm_set1_epi16( |
- static_cast<uint16>((scaled_y_fraction >> 8) ^ 255)); |
+ static_cast<uint16>(256 - (source_y_fraction >> 8))); |
- uint8* end = ybuf + width; |
+ uint8* end = ybuf + source_width; |
if (ybuf < end) { |
do { |
__m128i y0 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y0_ptr)); |
@@ -99,17 +99,17 @@ |
} |
} |
-#else |
+#elif USE_MMX |
// MMX version blends 4 pixels at a time. |
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
- int width, int scaled_y_fraction) { |
+ int source_width, int source_y_fraction) { |
__m64 zero = _mm_setzero_si64(); |
__m64 y1_fraction = _mm_set1_pi16( |
- static_cast<int16>(scaled_y_fraction >> 8)); |
+ static_cast<int16>(source_y_fraction >> 8)); |
__m64 y0_fraction = _mm_set1_pi16( |
- static_cast<int16>((scaled_y_fraction >> 8) ^ 255)); |
+ static_cast<int16>(256 - (source_y_fraction >> 8))); |
- uint8* end = ybuf + width; |
+ uint8* end = ybuf + source_width; |
if (ybuf < end) { |
do { |
__m64 y0 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y0_ptr)); |
@@ -128,38 +128,39 @@ |
} while (ybuf < end); |
} |
} |
- |
-#endif // USE_SSE |
-#else // no MMX or SSE |
+#else // no MMX or SSE2 |
// C version blends 4 pixels at a time. |
static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
- int width, int scaled_y_fraction) { |
- int y0_fraction = kFractionMax - scaled_y_fraction; |
- int y1_fraction = scaled_y_fraction; |
- uint8* end = ybuf + width; |
+ int source_width, int source_y_fraction) { |
+ int y1_fraction = source_y_fraction >> 8; |
+ int y0_fraction = 256 - (source_y_fraction >> 8); |
+ |
+ int y0_fraction = kFractionMax - source_y_fraction; |
+ int y1_fraction = source_y_fraction; |
+ uint8* end = ybuf + source_width; |
if (ybuf < end) { |
do { |
- ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 16; |
- ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 16; |
- ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 16; |
- ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 16; |
+ ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 8; |
+ ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 8; |
+ ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 8; |
+ ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 8; |
y0_ptr += 4; |
y1_ptr += 4; |
ybuf += 4; |
} while (ybuf < end); |
} |
} |
-#endif // USE_MMX |
+#endif |
// Scale a frame of YUV to 32 bit ARGB. |
void ScaleYUVToRGB32(const uint8* y_buf, |
const uint8* u_buf, |
const uint8* v_buf, |
uint8* rgb_buf, |
+ int source_width, |
+ int source_height, |
int width, |
int height, |
- int scaled_width, |
- int scaled_height, |
int y_pitch, |
int uv_pitch, |
int rgb_pitch, |
@@ -170,7 +171,8 @@ |
// Disable filtering if the screen is too big (to avoid buffer overflows). |
// This should never happen to regular users: they don't have monitors |
// wider than 8192 pixels. |
- if (width > kFilterBufferSize) |
+ // TODO(fbarchard): Allow rotated videos to filter. |
+ if (source_width > kFilterBufferSize || view_rotate) |
filter = FILTER_NONE; |
unsigned int y_shift = yuv_type; |
@@ -185,46 +187,46 @@ |
(view_rotate == ROTATE_270) || |
(view_rotate == MIRROR_ROTATE_0) || |
(view_rotate == MIRROR_ROTATE_90)) { |
- y_buf += width - 1; |
- u_buf += width / 2 - 1; |
- v_buf += width / 2 - 1; |
- width = -width; |
+ y_buf += source_width - 1; |
+ u_buf += source_width / 2 - 1; |
+ v_buf += source_width / 2 - 1; |
+ source_width = -source_width; |
} |
// Rotations that start at bottom of image. |
if ((view_rotate == ROTATE_90) || |
(view_rotate == ROTATE_180) || |
(view_rotate == MIRROR_ROTATE_90) || |
(view_rotate == MIRROR_ROTATE_180)) { |
- y_buf += (height - 1) * y_pitch; |
- u_buf += ((height >> y_shift) - 1) * uv_pitch; |
- v_buf += ((height >> y_shift) - 1) * uv_pitch; |
- height = -height; |
+ y_buf += (source_height - 1) * y_pitch; |
+ u_buf += ((source_height >> y_shift) - 1) * uv_pitch; |
+ v_buf += ((source_height >> y_shift) - 1) * uv_pitch; |
+ source_height = -source_height; |
} |
// Handle zero sized destination. |
- if (scaled_width == 0 || scaled_height == 0) |
+ if (width == 0 || height == 0) |
return; |
- int scaled_dx = width * kFractionMax / scaled_width; |
- int scaled_dy = height * kFractionMax / scaled_height; |
- int scaled_dx_uv = scaled_dx; |
+ int source_dx = source_width * kFractionMax / width; |
+ int source_dy = source_height * kFractionMax / height; |
+ int source_dx_uv = source_dx; |
if ((view_rotate == ROTATE_90) || |
(view_rotate == ROTATE_270)) { |
- int tmp = scaled_height; |
- scaled_height = scaled_width; |
- scaled_width = tmp; |
- tmp = height; |
+ int tmp = height; |
height = width; |
width = tmp; |
- int original_dx = scaled_dx; |
- int original_dy = scaled_dy; |
- scaled_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits; |
- scaled_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits; |
- scaled_dy = original_dx; |
+ tmp = source_height; |
+ source_height = source_width; |
+ source_width = tmp; |
+ int original_dx = source_dx; |
+ int original_dy = source_dy; |
+ source_dx = ((original_dy >> kFractionBits) * y_pitch) << kFractionBits; |
+ source_dx_uv = ((original_dy >> kFractionBits) * uv_pitch) << kFractionBits; |
+ source_dy = original_dx; |
if (view_rotate == ROTATE_90) { |
y_pitch = -1; |
uv_pitch = -1; |
- height = -height; |
+ source_height = -source_height; |
} else { |
y_pitch = 1; |
uv_pitch = 1; |
@@ -236,8 +238,9 @@ |
uint8 ybuf[kFilterBufferSize + 16]; |
uint8 ubuf[kFilterBufferSize / 2 + 16]; |
uint8 vbuf[kFilterBufferSize / 2 + 16]; |
- int yscale_fixed = (height << kFractionBits) / scaled_height; |
- for (int y = 0; y < scaled_height; ++y) { |
+ // TODO(fbarchard): Fixed point math is off by 1 on negatives. |
+ int yscale_fixed = (source_height << kFractionBits) / height; |
+ for (int y = 0; y < height; ++y) { |
uint8* dest_pixel = rgb_buf + y * rgb_pitch; |
int source_y_subpixel = (y * yscale_fixed); |
int source_y = source_y_subpixel >> kFractionBits; |
@@ -250,8 +253,8 @@ |
const uint8* v0_ptr = v_buf + (source_y >> y_shift) * uv_pitch; |
const uint8* v1_ptr = v0_ptr + uv_pitch; |
- int scaled_y_fraction = source_y_subpixel & (kFractionMax - 1); |
- int scaled_uv_fraction = (source_y_subpixel >> y_shift) & (kFractionMax - 1); |
+ int source_y_fraction = source_y_subpixel & kFractionMask; |
+ int source_uv_fraction = (source_y_subpixel >> y_shift) & kFractionMask; |
const uint8* y_ptr = y0_ptr; |
const uint8* u_ptr = u0_ptr; |
@@ -260,42 +263,63 @@ |
// TODO(fbarchard): Remove memcpy when not necessary. |
if (filter == media::FILTER_BILINEAR) { |
if (yscale_fixed != kFractionMax && |
- scaled_y_fraction && ((source_y + 1) < height)) { |
- FilterRows(ybuf, y0_ptr, y1_ptr, width, scaled_y_fraction); |
+ source_y_fraction && ((source_y + 1) < source_height)) { |
+ FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); |
} else { |
- memcpy(ybuf, y0_ptr, width); |
+ memcpy(ybuf, y0_ptr, source_width); |
} |
y_ptr = ybuf; |
- ybuf[width] = ybuf[width-1]; |
- int uv_width = (width + 1) / 2; |
+ ybuf[source_width] = ybuf[source_width-1]; |
+ int uv_source_width = (source_width + 1) / 2; |
if (yscale_fixed != kFractionMax && |
- scaled_uv_fraction && |
- (((source_y >> y_shift) + 1) < (height >> y_shift))) { |
- FilterRows(ubuf, u0_ptr, u1_ptr, uv_width, scaled_uv_fraction); |
- FilterRows(vbuf, v0_ptr, v1_ptr, uv_width, scaled_uv_fraction); |
+ source_uv_fraction && |
+ (((source_y >> y_shift) + 1) < (source_height >> y_shift))) { |
+ FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); |
+ FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); |
} else { |
- memcpy(ubuf, u0_ptr, uv_width); |
- memcpy(vbuf, v0_ptr, uv_width); |
+ memcpy(ubuf, u0_ptr, uv_source_width); |
+ memcpy(vbuf, v0_ptr, uv_source_width); |
} |
u_ptr = ubuf; |
v_ptr = vbuf; |
- ubuf[uv_width] = ubuf[uv_width - 1]; |
- vbuf[uv_width] = vbuf[uv_width - 1]; |
+ ubuf[uv_source_width] = ubuf[uv_source_width - 1]; |
+ vbuf[uv_source_width] = vbuf[uv_source_width - 1]; |
} |
- if (scaled_dx == kFractionMax) { // Not scaled |
+ if (source_dx == kFractionMax) { // Not scaled |
FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
- dest_pixel, scaled_width); |
+ dest_pixel, width); |
} else { |
if (filter == FILTER_BILINEAR) |
LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
- dest_pixel, scaled_width, scaled_dx); |
- else |
+ dest_pixel, width, source_dx); |
+ else { |
+// Specialized scalers and rotation. |
+#if USE_MMX && defined(_MSC_VER) |
+ if (width == (source_width * 2)) { |
+ DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
+ dest_pixel, width); |
+ } else if ((source_dx & kFractionMask) == 0) { |
+ // Scaling by integer scale factor. ie half. |
+ ConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
+ dest_pixel, width, |
+ source_dx >> kFractionBits); |
+ } else if (source_dx_uv == source_dx) { // Not rotated. |
+ ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
+ dest_pixel, width, source_dx); |
+ } else { |
+ RotateConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
+ dest_pixel, width, |
+ source_dx >> kFractionBits, |
+ source_dx_uv >> kFractionBits); |
+ } |
+#else |
ScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
- dest_pixel, scaled_width, scaled_dx); |
+ dest_pixel, width, source_dx); |
+#endif |
+ } |
} |
} |
- |
- // MMX used for FastConvertYUVToRGB32Row requires emms instruction. |
+ // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. |
EMMS(); |
} |