source/row_win.cc - Issue 1364813002: yuy2 avx2 initial change

Unified Diff: source/row_win.cc

Issue 1364813002: yuy2 avx2 initial change (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master

Patch Set: avx2 yuy2/uyvy to argb Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/row_win.cc

diff --git a/source/row_win.cc b/source/row_win.cc

index 2af97ae40e705630909843b66b89eb3dc7efe312..752eb78dfca637ca582e4e9ef07d6aeb2d24f3b6 100644

--- a/source/row_win.cc

+++ b/source/row_win.cc

@@ -243,6 +243,30 @@ static const uvec8 kShuffleMaskARGBToRAW_0 = {

2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 128u, 128u, 128u, 128u, 8u, 14u, 13u, 12u

};

+// YUY2 shuf 16 Y to 32 Y.

+static const lvec8 kShuffleYUY2Y = {

+ 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14,

+ 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14

+};

+// YUY2 shuf 8 UV to 16 UV.

+static const lvec8 kShuffleYUY2UV = {

+ 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15,

+ 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15

+};

+// UYVY shuf 16 Y to 32 Y.

+static const lvec8 kShuffleUYVYY = {

+ 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,

+ 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15

+};

+// UYVY shuf 8 UV to 16 UV.

+static const lvec8 kShuffleUYVYUV = {

+ 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14,

+ 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14

+};

// Duplicates gray value 3 times and fills in alpha opaque.

__declspec(naked)

void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {

@@ -1899,6 +1923,24 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,

__asm lea eax, [eax + 16] \

}

+// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.

+#define READYUY2_AVX2 __asm { \

+ __asm vmovdqu ymm4, [eax] /* YUY2 */ \

+ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \

+ __asm vmovdqu ymm0, [eax] /* UV */ \

+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \

+ __asm lea eax, [eax + 32] \

+ }

+// Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.

+#define READUYVY_AVX2 __asm { \

+ __asm vmovdqu ymm4, [eax] /* UYVY */ \

+ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleUYVYY \

+ __asm vmovdqu ymm0, [eax] /* UV */ \

+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleUYVYUV \

+ __asm lea eax, [eax + 32] \

+ }

// Convert 16 pixels: 16 UV and 16 Y.

#define YUVTORGB_AVX2(YuvConstants) __asm { \

__asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\

@@ -2168,6 +2210,65 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,

}

#endif // HAS_NV12TOARGBROW_AVX2

+// 16 pixels.

+// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).

+__declspec(naked)

+void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,

+ uint8* dst_argb,

+ struct YuvConstants* yuvconstants,

+ int width) {

+ __asm {

+ push ebp

+ mov eax, [esp + 4 + 4] // yuy2

+ mov edx, [esp + 4 + 8] // argb

+ mov ebp, [esp + 4 + 12] // yuvconstants

+ mov ecx, [esp + 4 + 16] // width

+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha

+ convertloop:

+ READYUY2_AVX2

+ YUVTORGB_AVX2(ebp)

+ STOREARGB_AVX2

+ sub ecx, 16

+ jg convertloop

+ pop ebp

+ vzeroupper

+ ret

+ }

+// 16 pixels.

+// 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).

+__declspec(naked)

+void UYVYToARGBRow_AVX2(const uint8* src_uyvy,

+ uint8* dst_argb,

+ struct YuvConstants* yuvconstants,

+ int width) {

+ __asm {

+ push ebp

+ mov eax, [esp + 4 + 4] // uyvy

+ mov edx, [esp + 4 + 8] // argb

+ mov ebp, [esp + 4 + 12] // yuvconstants

+ mov ecx, [esp + 4 + 16] // width

+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha

+ convertloop:

+ READUYVY_AVX2

+ YUVTORGB_AVX2(ebp)

+ STOREARGB_AVX2

+ sub ecx, 16

+ jg convertloop

+ pop ebp

+ vzeroupper

+ ret

+ }

#ifdef HAS_I422TOBGRAROW_AVX2

// 16 pixels

// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).

@@ -2338,17 +2439,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,

__asm lea eax, [eax + 8] \

}

-// YUY2 shuf 8 Y to 16 Y.

-static const vec8 kShuffleYUY2Y = {

- 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14

-};

-// YUY2 shuf 4 UV to 8 UV.

-static const vec8 kShuffleYUY2UV = {

- 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15

-};

-// Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.

+// Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV.

#define READYUY2 __asm { \

__asm movdqu xmm4, [eax] /* YUY2 */ \

__asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \

@@ -2357,24 +2448,13 @@ static const vec8 kShuffleYUY2UV = {

__asm lea eax, [eax + 16] \

}

-// UYVY shuf 8 Y to 16 Y.

-static const vec8 kShuffleUYVYY = {

- 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15

-};

-// UYVY shuf 4 UV to 8 UV.

-static const vec8 kShuffleUYVYUV = {

- 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14

-};

-// Read 4 UYVY with 8 Y and update 4 UV to 8 UV.

+// Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV.

#define READUYVY __asm { \

__asm movdqu xmm4, [eax] /* UYVY */ \

__asm pshufb xmm4, xmmword ptr kShuffleUYVYY \

__asm movdqu xmm0, [eax] /* UV */ \

__asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \

__asm lea eax, [eax + 16] \

- __asm lea eax, [eax + 8] \

}

// Convert 8 pixels: 8 UV and 8 Y.

« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »