| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // This webpage shows layout of YV12 and other YUV formats | 5 // This webpage shows layout of YV12 and other YUV formats |
| 6 // http://www.fourcc.org/yuv.php | 6 // http://www.fourcc.org/yuv.php |
| 7 // The actual conversion is best described here | 7 // The actual conversion is best described here |
| 8 // http://en.wikipedia.org/wiki/YUV | 8 // http://en.wikipedia.org/wiki/YUV |
| 9 // An article on optimizing YUV conversion using tables instead of multiplies | 9 // An article on optimizing YUV conversion using tables instead of multiplies |
| 10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf | 10 // http://lestourtereaux.free.fr/papers/data/yuvrgb.pdf |
| (...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 63 rgb_row, | 63 rgb_row, |
| 64 width); | 64 width); |
| 65 } | 65 } |
| 66 | 66 |
| 67 // MMX used for FastConvertYUVToRGB32Row requires emms instruction. | 67 // MMX used for FastConvertYUVToRGB32Row requires emms instruction. |
| 68 EMMS(); | 68 EMMS(); |
| 69 } | 69 } |
| 70 | 70 |
| 71 #if USE_SSE2 | 71 #if USE_SSE2 |
| 72 // FilterRows combines two rows of the image using linear interpolation. | 72 // FilterRows combines two rows of the image using linear interpolation. |
| 73 // SSE2 version blends 8 pixels at a time. | 73 // Blends 8 pixels at a time. |
| 74 static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, | 74 static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
| 75 int source_width, int source_y_fraction) { | 75 int source_width, int source_y_fraction) { |
| 76 __m128i zero = _mm_setzero_si128(); | 76 __m128i zero = _mm_setzero_si128(); |
| 77 __m128i y1_fraction = _mm_set1_epi16( | 77 __m128i y1_fraction = _mm_set1_epi16( |
| 78 static_cast<uint16>(source_y_fraction >> 8)); | 78 static_cast<uint16>(source_y_fraction >> 8)); |
| 79 __m128i y0_fraction = _mm_set1_epi16( | |
| 80 static_cast<uint16>(256 - (source_y_fraction >> 8))); | |
| 81 | 79 |
| 82 uint8* end = ybuf + source_width; | 80 uint8* end = ybuf + source_width; |
| 83 if (ybuf < end) { | 81 if (ybuf < end) { |
| 84 do { | 82 do { |
| 85 __m128i y0 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y0_ptr)); | 83 __m128i y0 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y0_ptr)); |
| 86 __m128i y1 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y1_ptr)); | 84 __m128i y1 = _mm_loadl_epi64(reinterpret_cast<__m128i const*>(y1_ptr)); |
| 87 y0 = _mm_unpacklo_epi8(y0, zero); | 85 y0 = _mm_unpacklo_epi8(y0, zero); |
| 88 y1 = _mm_unpacklo_epi8(y1, zero); | 86 y1 = _mm_unpacklo_epi8(y1, zero); |
| 89 y0 = _mm_mullo_epi16(y0, y0_fraction); | 87 y1 = _mm_sub_epi16(y1, y0); |
| 90 y1 = _mm_mullo_epi16(y1, y1_fraction); | 88 y1 = _mm_mullo_epi16(y1, y1_fraction); |
| 91 y0 = _mm_add_epi16(y0, y1); // 8.8 fixed point result | 89 y1 = _mm_srai_epi16(y1, 8); |
| 92 y0 = _mm_srli_epi16(y0, 8); | 90 y1 = _mm_add_epi16(y1, y0); |
| 93 y0 = _mm_packus_epi16(y0, y0); | 91 y1 = _mm_packus_epi16(y1, y1); |
| 94 _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y0); | 92 _mm_storel_epi64(reinterpret_cast<__m128i *>(ybuf), y1); |
| 95 y0_ptr += 8; | 93 y0_ptr += 8; |
| 96 y1_ptr += 8; | 94 y1_ptr += 8; |
| 97 ybuf += 8; | 95 ybuf += 8; |
| 98 } while (ybuf < end); | 96 } while (ybuf < end); |
| 99 } | 97 } |
| 100 } | 98 } |
| 101 | |
| 102 #elif USE_MMX | 99 #elif USE_MMX |
| 103 // MMX version blends 4 pixels at a time. | |
| 104 static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, | 100 static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
| 105 int source_width, int source_y_fraction) { | 101 int source_width, int source_y_fraction) { |
| 106 __m64 zero = _mm_setzero_si64(); | 102 __m64 zero = _mm_setzero_si64(); |
| 107 __m64 y1_fraction = _mm_set1_pi16( | 103 __m64 y1_fraction = _mm_set1_pi16( |
| 108 static_cast<int16>(source_y_fraction >> 8)); | 104 static_cast<int16>(source_y_fraction >> 8)); |
| 109 __m64 y0_fraction = _mm_set1_pi16( | |
| 110 static_cast<int16>(256 - (source_y_fraction >> 8))); | |
| 111 | 105 |
| 112 uint8* end = ybuf + source_width; | 106 uint8* end = ybuf + source_width; |
| 113 if (ybuf < end) { | 107 if (ybuf < end) { |
| 114 do { | 108 do { |
| 115 __m64 y0 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y0_ptr)); | 109 __m64 y2 = *reinterpret_cast<const __m64 *>(y0_ptr); |
| 116 __m64 y1 = _mm_cvtsi32_si64(*reinterpret_cast<const int *>(y1_ptr)); | 110 __m64 y3 = *reinterpret_cast<const __m64 *>(y1_ptr); |
| 117 y0 = _mm_unpacklo_pi8(y0, zero); | 111 __m64 y0 = _mm_unpacklo_pi8(y2, zero); |
| 118 y1 = _mm_unpacklo_pi8(y1, zero); | 112 __m64 y1 = _mm_unpacklo_pi8(y3, zero); |
| 119 y0 = _mm_mullo_pi16(y0, y0_fraction); | 113 y2 = _mm_unpackhi_pi8(y2, zero); |
| 114 y3 = _mm_unpackhi_pi8(y3, zero); |
| 115 y1 = _mm_sub_pi16(y1, y0); |
| 116 y3 = _mm_sub_pi16(y3, y2); |
| 120 y1 = _mm_mullo_pi16(y1, y1_fraction); | 117 y1 = _mm_mullo_pi16(y1, y1_fraction); |
| 121 y0 = _mm_add_pi16(y0, y1); // 8.8 fixed point result | 118 y3 = _mm_mullo_pi16(y3, y1_fraction); |
| 122 y0 = _mm_srli_pi16(y0, 8); | 119 y1 = _mm_srai_pi16(y1, 8); |
| 123 y0 = _mm_packs_pu16(y0, y0); | 120 y3 = _mm_srai_pi16(y3, 8); |
| 124 *reinterpret_cast<int *>(ybuf) = _mm_cvtsi64_si32(y0); | 121 y1 = _mm_add_pi16(y1, y0); |
| 125 y0_ptr += 4; | 122 y3 = _mm_add_pi16(y3, y2); |
| 126 y1_ptr += 4; | 123 y0 = _mm_packs_pu16(y1, y3); |
| 127 ybuf += 4; | 124 *reinterpret_cast<__m64 *>(ybuf) = y0; |
| 125 y0_ptr += 8; |
| 126 y1_ptr += 8; |
| 127 ybuf += 8; |
| 128 } while (ybuf < end); | 128 } while (ybuf < end); |
| 129 } | 129 } |
| 130 } | 130 } |
| 131 #else // no MMX or SSE2 | 131 #else // no MMX or SSE2 |
| 132 // C version blends 4 pixels at a time. | 132 |
| 133 static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, | 133 static void FilterRows(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr, |
| 134 int source_width, int source_y_fraction) { | 134 int source_width, int source_y_fraction) { |
| 135 int y1_fraction = source_y_fraction >> 8; | 135 int y1_fraction = (source_y_fraction >> 8); |
| 136 int y0_fraction = 256 - (source_y_fraction >> 8); | |
| 137 uint8* end = ybuf + source_width; | 136 uint8* end = ybuf + source_width; |
| 138 if (ybuf < end) { | 137 if (ybuf < end) { |
| 139 do { | 138 do { |
| 140 ybuf[0] = (y0_ptr[0] * (y0_fraction) + y1_ptr[0] * (y1_fraction)) >> 8; | 139 ybuf[0] = y0_ptr[0] + (((y1_ptr[0] - y0_ptr[0]) * y1_fraction) >> 8); |
| 141 ybuf[1] = (y0_ptr[1] * (y0_fraction) + y1_ptr[1] * (y1_fraction)) >> 8; | 140 ybuf[1] = y0_ptr[1] + (((y1_ptr[1] - y0_ptr[1]) * y1_fraction) >> 8); |
| 142 ybuf[2] = (y0_ptr[2] * (y0_fraction) + y1_ptr[2] * (y1_fraction)) >> 8; | 141 ybuf[2] = y0_ptr[2] + (((y1_ptr[2] - y0_ptr[2]) * y1_fraction) >> 8); |
| 143 ybuf[3] = (y0_ptr[3] * (y0_fraction) + y1_ptr[3] * (y1_fraction)) >> 8; | 142 ybuf[3] = y0_ptr[3] + (((y1_ptr[3] - y0_ptr[3]) * y1_fraction) >> 8); |
| 144 y0_ptr += 4; | 143 ybuf[4] = y0_ptr[4] + (((y1_ptr[4] - y0_ptr[4]) * y1_fraction) >> 8); |
| 145 y1_ptr += 4; | 144 ybuf[5] = y0_ptr[5] + (((y1_ptr[5] - y0_ptr[5]) * y1_fraction) >> 8); |
| 146 ybuf += 4; | 145 ybuf[6] = y0_ptr[6] + (((y1_ptr[6] - y0_ptr[6]) * y1_fraction) >> 8); |
| 146 ybuf[7] = y0_ptr[7] + (((y1_ptr[7] - y0_ptr[7]) * y1_fraction) >> 8); |
| 147 y0_ptr += 8; |
| 148 y1_ptr += 8; |
| 149 ybuf += 8; |
| 147 } while (ybuf < end); | 150 } while (ybuf < end); |
| 148 } | 151 } |
| 149 } | 152 } |
| 150 #endif | 153 #endif |
| 151 | 154 |
| 152 // Scale a frame of YUV to 32 bit ARGB. | 155 // Scale a frame of YUV to 32 bit ARGB. |
| 153 void ScaleYUVToRGB32(const uint8* y_buf, | 156 void ScaleYUVToRGB32(const uint8* y_buf, |
| 154 const uint8* u_buf, | 157 const uint8* u_buf, |
| 155 const uint8* v_buf, | 158 const uint8* v_buf, |
| 156 uint8* rgb_buf, | 159 uint8* rgb_buf, |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 254 const uint8* v1_ptr = v0_ptr + uv_pitch; | 257 const uint8* v1_ptr = v0_ptr + uv_pitch; |
| 255 | 258 |
| 256 int source_y_fraction = source_y_subpixel & kFractionMask; | 259 int source_y_fraction = source_y_subpixel & kFractionMask; |
| 257 int source_uv_fraction = (source_y_subpixel >> y_shift) & kFractionMask; | 260 int source_uv_fraction = (source_y_subpixel >> y_shift) & kFractionMask; |
| 258 | 261 |
| 259 const uint8* y_ptr = y0_ptr; | 262 const uint8* y_ptr = y0_ptr; |
| 260 const uint8* u_ptr = u0_ptr; | 263 const uint8* u_ptr = u0_ptr; |
| 261 const uint8* v_ptr = v0_ptr; | 264 const uint8* v_ptr = v0_ptr; |
| 262 // Apply vertical filtering if necessary. | 265 // Apply vertical filtering if necessary. |
| 263 // TODO(fbarchard): Remove memcpy when not necessary. | 266 // TODO(fbarchard): Remove memcpy when not necessary. |
| 264 if (filter == media::FILTER_BILINEAR) { | 267 if (filter & media::FILTER_BILINEAR_V) { |
| 265 if (yscale_fixed != kFractionMax && | 268 if (yscale_fixed != kFractionMax && |
| 266 source_y_fraction && ((source_y + 1) < source_height)) { | 269 source_y_fraction && ((source_y + 1) < source_height)) { |
| 267 FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); | 270 FilterRows(ybuf, y0_ptr, y1_ptr, source_width, source_y_fraction); |
| 268 } else { | 271 } else { |
| 269 memcpy(ybuf, y0_ptr, source_width); | 272 memcpy(ybuf, y0_ptr, source_width); |
| 270 } | 273 } |
| 271 y_ptr = ybuf; | 274 y_ptr = ybuf; |
| 272 ybuf[source_width] = ybuf[source_width-1]; | 275 ybuf[source_width] = ybuf[source_width-1]; |
| 273 int uv_source_width = (source_width + 1) / 2; | 276 int uv_source_width = (source_width + 1) / 2; |
| 274 if (yscale_fixed != kFractionMax && | 277 if (yscale_fixed != kFractionMax && |
| 275 source_uv_fraction && | 278 source_uv_fraction && |
| 276 (((source_y >> y_shift) + 1) < (source_height >> y_shift))) { | 279 (((source_y >> y_shift) + 1) < (source_height >> y_shift))) { |
| 277 FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); | 280 FilterRows(ubuf, u0_ptr, u1_ptr, uv_source_width, source_uv_fraction); |
| 278 FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); | 281 FilterRows(vbuf, v0_ptr, v1_ptr, uv_source_width, source_uv_fraction); |
| 279 } else { | 282 } else { |
| 280 memcpy(ubuf, u0_ptr, uv_source_width); | 283 memcpy(ubuf, u0_ptr, uv_source_width); |
| 281 memcpy(vbuf, v0_ptr, uv_source_width); | 284 memcpy(vbuf, v0_ptr, uv_source_width); |
| 282 } | 285 } |
| 283 u_ptr = ubuf; | 286 u_ptr = ubuf; |
| 284 v_ptr = vbuf; | 287 v_ptr = vbuf; |
| 285 ubuf[uv_source_width] = ubuf[uv_source_width - 1]; | 288 ubuf[uv_source_width] = ubuf[uv_source_width - 1]; |
| 286 vbuf[uv_source_width] = vbuf[uv_source_width - 1]; | 289 vbuf[uv_source_width] = vbuf[uv_source_width - 1]; |
| 287 } | 290 } |
| 288 if (source_dx == kFractionMax) { // Not scaled | 291 if (source_dx == kFractionMax) { // Not scaled |
| 289 FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, | 292 FastConvertYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
| 290 dest_pixel, width); | 293 dest_pixel, width); |
| 291 } else { | 294 } else { |
| 292 if (filter == FILTER_BILINEAR) | 295 if (filter & FILTER_BILINEAR_H) |
| 293 LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, | 296 LinearScaleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
| 294 dest_pixel, width, source_dx); | 297 dest_pixel, width, source_dx); |
| 295 else { | 298 else { |
| 296 // Specialized scalers and rotation. | 299 // Specialized scalers and rotation. |
| 297 #if USE_MMX && defined(_MSC_VER) | 300 #if USE_MMX && defined(_MSC_VER) |
| 298 if (width == (source_width * 2)) { | 301 if (width == (source_width * 2)) { |
| 299 DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, | 302 DoubleYUVToRGB32Row(y_ptr, u_ptr, v_ptr, |
| 300 dest_pixel, width); | 303 dest_pixel, width); |
| 301 } else if ((source_dx & kFractionMask) == 0) { | 304 } else if ((source_dx & kFractionMask) == 0) { |
| 302 // Scaling by integer scale factor. ie half. | 305 // Scaling by integer scale factor. ie half. |
| (...skipping 14 matching lines...) Expand all Loading... |
| 317 dest_pixel, width, source_dx); | 320 dest_pixel, width, source_dx); |
| 318 #endif | 321 #endif |
| 319 } | 322 } |
| 320 } | 323 } |
| 321 } | 324 } |
| 322 // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. | 325 // MMX used for FastConvertYUVToRGB32Row and FilterRows requires emms. |
| 323 EMMS(); | 326 EMMS(); |
| 324 } | 327 } |
| 325 | 328 |
| 326 } // namespace media | 329 } // namespace media |
| OLD | NEW |