| Index: media/base/yuv_convert.cc
|
| ===================================================================
|
| --- media/base/yuv_convert.cc (revision 45148)
|
| +++ media/base/yuv_convert.cc (working copy)
|
| @@ -70,14 +70,12 @@
|
|
|
| #if USE_SSE2
|
| // FilterRows combines two rows of the image using linear interpolation.
|
| -// SSE2 version blends 8 pixels at a time.
|
| +// Blends 8 pixels at a time.
|
| static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
|
| int source_width, int source_y_fraction) {
|
| __m128i zero = _mm_setzero_si128();
|
| __m128i y1_fraction = _mm_set1_epi16(
|
| static_cast<uint16>(source_y_fraction >> 8));
|
| - __m128i y0_fraction = _mm_set1_epi16(
|
| - static_cast<uint16>(256 - (source_y_fraction >> 8)));
|
|
|
| uint8* end = ybuf + source_width;
|
| if (ybuf < end) {
|
| @@ -86,64 +84,69 @@
|
| __m128i y1 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y1_ptr));
|
| y0 = _mm_unpacklo_epi8(y0, zero);
|
| y1 = _mm_unpacklo_epi8(y1, zero);
|
| - y0 = _mm_mullo_epi16(y0, y0_fraction);
|
| + y1 = _mm_sub_epi16(y1, y0);
|
| y1 = _mm_mullo_epi16(y1, y1_fraction);
|
| - y0 = _mm_add_epi16(y0, y1); // 8.8 fixed point result
|
| - y0 = _mm_srli_epi16(y0, 8);
|
| - y0 = _mm_packus_epi16(y0, y0);
|
| - _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y0);
|
| + y1 = _mm_srai_epi16(y1, 8);
|
| + y1 = _mm_add_epi16(y1, y0);
|
| + y1 = _mm_packus_epi16(y1, y1);
|
| + _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y1);
|
| y0_ptr += 8;
|
| y1_ptr += 8;
|
| ybuf += 8;
|
| } while (ybuf < end);
|
| }
|
| }
|
| -
|
| #elif USE_MMX
|
| -// MMX version blends 4 pixels at a time.
|
| static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
|
| int source_width, int source_y_fraction) {
|
| __m64 zero = _mm_setzero_si64();
|
| __m64 y1_fraction = _mm_set1_pi16(
|
| - static_cast<int16>(source_y_fraction >> 8));
|
| - __m64 y0_fraction = _mm_set1_pi16(
|
| - static_cast<int16>(256 - (source_y_fraction >> 8)));
|
| + static_cast<int16>(source_y_fraction >> 8));
|
|
|
| uint8* end = ybuf + source_width;
|
| if (ybuf < end) {
|
| do {
|
| - __m64 y0 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y0_ptr));
|
| - __m64 y1 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y1_ptr));
|
| - y0 = _mm_unpacklo_pi8(y0, zero);
|
| - y1 = _mm_unpacklo_pi8(y1, zero);
|
| - y0 = _mm_mullo_pi16(y0, y0_fraction);
|
| + __m64 y2 = *reinterpret_cast<const __m64 *>(y0_ptr);
|
| + __m64 y3 = *reinterpret_cast<const __m64 *>(y1_ptr);
|
| + __m64 y0 = _mm_unpacklo_pi8(y2, zero);
|
| + __m64 y1 = _mm_unpacklo_pi8(y3, zero);
|
| + y2 = _mm_unpackhi_pi8(y2, zero);
|
| + y3 = _mm_unpackhi_pi8(y3, zero);
|
| + y1 = _mm_sub_pi16(y1, y0);
|
| + y3 = _mm_sub_pi16(y3, y2);
|
| y1 = _mm_mullo_pi16(y1, y1_fraction);
|
| - y0 = _mm_add_pi16(y0, y1); // 8.8 fixed point result
|
| - y0 = _mm_srli_pi16(y0, 8);
|
| - y0 = _mm_packs_pu16(y0, y0);
|
| - *reinterpret_cast<int *>(ybuf) = _mm_cvtsi64_si32(y0);
|
| - y0_ptr += 4;
|
| - y1_ptr += 4;
|
| - ybuf += 4;
|
| + y3 = _mm_mullo_pi16(y3, y1_fraction);
|
| + y1 = _mm_srai_pi16(y1, 8);
|
| + y3 = _mm_srai_pi16(y3, 8);
|
| + y1 = _mm_add_pi16(y1, y0);
|
| + y3 = _mm_add_pi16(y3, y2);
|
| + y0 = _mm_packs_pu16(y1, y3);
|
| + *reinterpret_cast<__m64 *>(ybuf) = y0;
|
| + y0_ptr += 8;
|
| + y1_ptr += 8;
|
| + ybuf += 8;
|
| } while (ybuf < end);
|
| }
|
| }
|
| #else // no MMX or SSE2
|
| -// C version blends 4 pixels at a time.
|
| +
|
| static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
|
| int source_width, int source_y_fraction) {
|
| - int y1_fraction = source_y_fraction >> 8;
|
| - int y0_fraction = 256 - (source_y_fraction >> 8);
|
| + int y1_fraction = (source_y_fraction >> 8);
|
| uint8* end = ybuf + source_width;
|
| if (ybuf < end) {
|
| do {
|
| - ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 8;
|
| - ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 8;
|
| - ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 8;
|
| - ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 8;
|
| - y0_ptr += 4;
|
| - y1_ptr += 4;
|
| - ybuf += 4;
|
| + ybuf[0] = y0_ptr[0] + (((y1_ptr[0] - y0_ptr[0]) * y1_fraction) >> 8);
|
| + ybuf[1] = y0_ptr[1] + (((y1_ptr[1] - y0_ptr[1]) * y1_fraction) >> 8);
|
| + ybuf[2] = y0_ptr[2] + (((y1_ptr[2] - y0_ptr[2]) * y1_fraction) >> 8);
|
| + ybuf[3] = y0_ptr[3] + (((y1_ptr[3] - y0_ptr[3]) * y1_fraction) >> 8);
|
| + ybuf[4] = y0_ptr[4] + (((y1_ptr[4] - y0_ptr[4]) * y1_fraction) >> 8);
|
| + ybuf[5] = y0_ptr[5] + (((y1_ptr[5] - y0_ptr[5]) * y1_fraction) >> 8);
|
| + ybuf[6] = y0_ptr[6] + (((y1_ptr[6] - y0_ptr[6]) * y1_fraction) >> 8);
|
| + ybuf[7] = y0_ptr[7] + (((y1_ptr[7] - y0_ptr[7]) * y1_fraction) >> 8);
|
| + y0_ptr += 8;
|
| + y1_ptr += 8;
|
| + ybuf += 8;
|
| } while (ybuf < end);
|
| }
|
| }
|
| @@ -261,7 +264,7 @@
|
| const uint8* v_ptr = v0_ptr;
|
| // Apply vertical filtering if necessary.
|
| // TODO(fbarchard): Remove memcpy when not necessary.
|
| - if (filter == media::FILTER_BILINEAR) {
|
| + if (filter & media::FILTER_BILINEAR_V) {
|
| if (yscale_fixed != kFractionMax &&
|
| source_y_fraction && ((source_y + 1) < source_height)) {
|
| FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction);
|
| @@ -289,7 +292,7 @@
|
| FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
|
| dest_pixel, width);
|
| } else {
|
| - if (filter == FILTER_BILINEAR)
|
| + if (filter & FILTER_BILINEAR_H)
|
| LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr,
|
| dest_pixel, width, source_dx);
|
| else {
|
|
|