| Index: source/row_win.cc
|
| diff --git a/source/row_win.cc b/source/row_win.cc
|
| index 64c029ce2de6c5c732f5b57b25222bacac344e60..e5b27b835e57d5b314709a24f2171cb9f9ed3d6d 100644
|
| --- a/source/row_win.cc
|
| +++ b/source/row_win.cc
|
| @@ -37,7 +37,20 @@ extern "C" {
|
| u_buf += 4; \
|
| xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
|
| xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
|
| + y_buf += 8;
|
| +
|
| +// Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
|
| +#define READYUVA422 \
|
| + xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
|
| + xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
|
| + xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
|
| + xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
|
| + u_buf += 4; \
|
| + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
|
| + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
|
| y_buf += 8; \
|
| + xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \
|
| + a_buf += 8;
|
|
|
| // Convert 8 pixels: 8 UV and 8 Y.
|
| #define YUVTORGB(yuvconstants) \
|
| @@ -78,9 +91,9 @@ extern "C" {
|
| xmm1 = _mm_loadu_si128(&xmm2); \
|
| xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \
|
| xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \
|
| - _mm_storeu_si128((__m128i *)dst_argb, xmm2); \
|
| - _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
|
| - dst_argb += 32;
|
| + _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \
|
| + _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \
|
| + dst_abgr += 32;
|
|
|
|
|
| #if defined(HAS_I422TOARGBROW_SSSE3)
|
| @@ -106,7 +119,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
| void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
| const uint8* u_buf,
|
| const uint8* v_buf,
|
| - uint8* dst_argb,
|
| + uint8* dst_abgr,
|
| struct YuvConstants* yuvconstants,
|
| int width) {
|
| __m128i xmm0, xmm1, xmm2, xmm4;
|
| @@ -120,6 +133,45 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
| }
|
| }
|
| #endif
|
| +
|
| +#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
|
| +void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + const uint8* a_buf,
|
| + uint8* dst_argb,
|
| + struct YuvConstants* yuvconstants,
|
| + int width) {
|
| + __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
|
| + const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
| + while (width > 0) {
|
| + READYUVA422
|
| + YUVTORGB(yuvconstants)
|
| + STOREARGB
|
| + width -= 8;
|
| + }
|
| +}
|
| +#endif
|
| +
|
| +#if defined(HAS_I422ALPHATOABGRROW_SSSE3)
|
| +void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + const uint8* a_buf,
|
| + uint8* dst_abgr,
|
| + struct YuvConstants* yuvconstants,
|
| + int width) {
|
| + __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
|
| + const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
| + while (width > 0) {
|
| + READYUVA422
|
| + YUVTORGB(yuvconstants)
|
| + STOREABGR
|
| + width -= 8;
|
| + }
|
| +}
|
| +#endif
|
| +
|
| // 32 bit
|
| #else // defined(_M_X64)
|
| #ifdef HAS_ARGBTOYROW_SSSE3
|
|
|