source/row_win.cc - Issue 1388273002: Reimplement NV21ToARGB to allow different color matrix.

Unified Diff: source/row_win.cc

Issue 1388273002: Reimplement NV21ToARGB to allow different color matrix. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master

Patch Set: include scale_row.h for scaling macros Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/row_win.cc

diff --git a/source/row_win.cc b/source/row_win.cc

index 6e35c70c6c94aa924a5268f6d9958acefde0397c..d0e691cdb04ff7e8d5fef8f7667102be757d9f89 100644

--- a/source/row_win.cc

+++ b/source/row_win.cc

@@ -319,6 +319,12 @@ static const lvec8 kShuffleUYVYUV = {

0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14

};

+// NV21 shuf 8 VU to 16 UV.

+static const lvec8 kShuffleNV21 = {

+ 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,

+};

// Duplicates gray value 3 times and fills in alpha opaque.

__declspec(naked)

void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {

@@ -1992,6 +1998,18 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,

__asm lea eax, [eax + 16] \

}

+// Read 8 UV from NV21, upsample to 16 UV.

+#define READNV21_AVX2 __asm { \

+ __asm vmovdqu xmm0, [esi] /* UV */ \

+ __asm lea esi, [esi + 16] \

+ __asm vpermq ymm0, ymm0, 0xd8 \

+ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \

+ __asm vmovdqu xmm4, [eax] /* Y */ \

+ __asm vpermq ymm4, ymm4, 0xd8 \

+ __asm vpunpcklbw ymm4, ymm4, ymm4 \

+ __asm lea eax, [eax + 16] \

+ }

// Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.

#define READYUY2_AVX2 __asm { \

__asm vmovdqu ymm4, [eax] /* YUY2 */ \

@@ -2365,6 +2383,41 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,

}

#endif // HAS_NV12TOARGBROW_AVX2

+#ifdef HAS_NV21TOARGBROW_AVX2

+// 16 pixels.

+// 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).

+__declspec(naked)

+void NV21ToARGBRow_AVX2(const uint8* y_buf,

+ const uint8* vu_buf,

+ uint8* dst_argb,

+ struct YuvConstants* yuvconstants,

+ int width) {

+ __asm {

+ push esi

+ push ebx

+ mov eax, [esp + 8 + 4] // Y

+ mov esi, [esp + 8 + 8] // VU

+ mov edx, [esp + 8 + 12] // argb

+ mov ebx, [esp + 8 + 16] // yuvconstants

+ mov ecx, [esp + 8 + 20] // width

+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha

+ convertloop:

+ READNV21_AVX2

+ YUVTORGB_AVX2(ebx)

+ STOREARGB_AVX2

+ sub ecx, 16

+ jg convertloop

+ pop ebx

+ pop esi

+ vzeroupper

+ ret

+ }

+#endif // HAS_NV21TOARGBROW_AVX2

// 16 pixels.

// 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).

__declspec(naked)

@@ -2608,6 +2661,16 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,

__asm lea eax, [eax + 8] \

}

+// Read 4 VU from NV21, upsample to 8 UV.

+#define READNV21 __asm { \

+ __asm movq xmm0, qword ptr [esi] /* UV */ \

+ __asm lea esi, [esi + 8] \

+ __asm pshufb xmm0, xmmword ptr kShuffleNV21 \

+ __asm movq xmm4, qword ptr [eax] \

+ __asm punpcklbw xmm4, xmm4 \

+ __asm lea eax, [eax + 8] \

+ }

// Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV.

#define READYUY2 __asm { \

__asm movdqu xmm4, [eax] /* YUY2 */ \

@@ -3153,6 +3216,38 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,

}

// 8 pixels.

+// 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).

+__declspec(naked)

+void NV21ToARGBRow_SSSE3(const uint8* y_buf,

+ const uint8* vu_buf,

+ uint8* dst_argb,

+ struct YuvConstants* yuvconstants,

+ int width) {

+ __asm {

+ push esi

+ push ebx

+ mov eax, [esp + 8 + 4] // Y

+ mov esi, [esp + 8 + 8] // VU

+ mov edx, [esp + 8 + 12] // argb

+ mov ebx, [esp + 8 + 16] // yuvconstants

+ mov ecx, [esp + 8 + 20] // width

+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha

+ convertloop:

+ READNV21

+ YUVTORGB(ebx)

+ STOREARGB

+ sub ecx, 8

+ jg convertloop

+ pop ebx

+ pop esi

+ ret

+ }

+// 8 pixels.

// 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).

__declspec(naked)

void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,

« no previous file with comments | « source/row_neon64.cc ('k') | source/scale_gcc.cc » ('j') | no next file with comments »