Index: source/row_win.cc |
diff --git a/source/row_win.cc b/source/row_win.cc |
index 1e2a24c9e69d74a9a0e68f49f0f0c291f5c9699d..2c6de73ec7f4b9f21d98b822f832832b014033da 100644 |
--- a/source/row_win.cc |
+++ b/source/row_win.cc |
@@ -327,11 +327,11 @@ static const lvec8 kShuffleNV21 = { |
// Duplicates gray value 3 times and fills in alpha opaque. |
__declspec(naked) |
-void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { |
+void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width) { |
__asm { |
mov eax, [esp + 4] // src_y |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 |
pslld xmm5, 24 |
@@ -356,11 +356,11 @@ void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { |
#ifdef HAS_J400TOARGBROW_AVX2 |
// Duplicates gray value 3 times and fills in alpha opaque. |
__declspec(naked) |
-void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix) { |
+void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width) { |
__asm { |
mov eax, [esp + 4] // src_y |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 |
vpslld ymm5, ymm5, 24 |
@@ -386,11 +386,11 @@ void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int pix) { |
#endif // HAS_J400TOARGBROW_AVX2 |
__declspec(naked) |
-void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { |
+void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width) { |
__asm { |
mov eax, [esp + 4] // src_rgb24 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 |
pslld xmm5, 24 |
movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGB |
@@ -425,11 +425,11 @@ void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix) { |
__declspec(naked) |
void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, [esp + 4] // src_raw |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm5, xmm5 // generate mask 0xff000000 |
pslld xmm5, 24 |
movdqa xmm4, xmmword ptr kShuffleMaskRAWToARGB |
@@ -471,7 +471,7 @@ void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, |
// 20 instructions. |
__declspec(naked) |
void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, 0x01080108 // generate multiplier to repeat 5 bits |
movd xmm5, eax |
@@ -489,7 +489,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, |
mov eax, [esp + 4] // src_rgb565 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
sub edx, eax |
sub edx, eax |
@@ -527,7 +527,7 @@ void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, |
// G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 |
__declspec(naked) |
void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, 0x01080108 // generate multiplier to repeat 5 bits |
vmovd xmm5, eax |
@@ -545,7 +545,7 @@ void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, |
mov eax, [esp + 4] // src_rgb565 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
sub edx, eax |
sub edx, eax |
@@ -578,7 +578,7 @@ void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, |
#ifdef HAS_ARGB1555TOARGBROW_AVX2 |
__declspec(naked) |
void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, 0x01080108 // generate multiplier to repeat 5 bits |
vmovd xmm5, eax |
@@ -594,7 +594,7 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, |
mov eax, [esp + 4] // src_argb1555 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
sub edx, eax |
sub edx, eax |
@@ -630,7 +630,7 @@ void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb, |
#ifdef HAS_ARGB4444TOARGBROW_AVX2 |
__declspec(naked) |
void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f |
vmovd xmm4, eax |
@@ -638,7 +638,7 @@ void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, |
vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles |
mov eax, [esp + 4] // src_argb4444 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
sub edx, eax |
sub edx, eax |
@@ -668,7 +668,7 @@ void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb, |
// 24 instructions |
__declspec(naked) |
void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, 0x01080108 // generate multiplier to repeat 5 bits |
movd xmm5, eax |
@@ -685,7 +685,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, |
mov eax, [esp + 4] // src_argb1555 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
sub edx, eax |
sub edx, eax |
@@ -721,7 +721,7 @@ void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb, |
// 18 instructions. |
__declspec(naked) |
void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, |
- int pix) { |
+ int width) { |
__asm { |
mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f |
movd xmm4, eax |
@@ -730,7 +730,7 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, |
pslld xmm5, 4 |
mov eax, [esp + 4] // src_argb4444 |
mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
sub edx, eax |
sub edx, eax |
@@ -758,11 +758,11 @@ void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb, |
} |
__declspec(naked) |
-void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 |
convertloop: |
@@ -796,11 +796,11 @@ void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { |
} |
__declspec(naked) |
-void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW |
convertloop: |
@@ -834,11 +834,11 @@ void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix) { |
} |
__declspec(naked) |
-void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm3, xmm3 // generate mask 0x0000001f |
psrld xmm3, 27 |
pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 |
@@ -872,13 +872,13 @@ void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
__declspec(naked) |
void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, |
- const uint32 dither4, int pix) { |
+ const uint32 dither4, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
movd xmm6, [esp + 12] // dither4 |
- mov ecx, [esp + 16] // pix |
+ mov ecx, [esp + 16] // width |
punpcklbw xmm6, xmm6 // make dither 16 bytes |
movdqa xmm7, xmm6 |
punpcklwd xmm6, xmm6 |
@@ -918,12 +918,12 @@ void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb, |
#ifdef HAS_ARGBTORGB565DITHERROW_AVX2 |
__declspec(naked) |
void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, |
- const uint32 dither4, int pix) { |
+ const uint32 dither4, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
vbroadcastss xmm6, [esp + 12] // dither4 |
- mov ecx, [esp + 16] // pix |
+ mov ecx, [esp + 16] // width |
vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes |
vpermq ymm6, ymm6, 0xd8 |
vpunpcklwd ymm6, ymm6, ymm6 |
@@ -960,11 +960,11 @@ void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb, |
// TODO(fbarchard): Improve sign extension/packing. |
__declspec(naked) |
-void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm4, xmm4 // generate mask 0x0000001f |
psrld xmm4, 27 |
movdqa xmm5, xmm4 // generate mask 0x000003e0 |
@@ -1001,11 +1001,11 @@ void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
} |
__declspec(naked) |
-void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm4, xmm4 // generate mask 0xf000f000 |
psllw xmm4, 12 |
movdqa xmm3, xmm4 // generate mask 0x00f000f0 |
@@ -1031,11 +1031,11 @@ void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
#ifdef HAS_ARGBTORGB565ROW_AVX2 |
__declspec(naked) |
-void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f |
vpsrld ymm3, ymm3, 27 |
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 |
@@ -1068,11 +1068,11 @@ void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
#ifdef HAS_ARGBTOARGB1555ROW_AVX2 |
__declspec(naked) |
-void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
vpcmpeqb ymm4, ymm4, ymm4 |
vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f |
vpslld ymm5, ymm4, 5 // generate mask 0x000003e0 |
@@ -1108,11 +1108,11 @@ void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
#ifdef HAS_ARGBTOARGB4444ROW_AVX2 |
__declspec(naked) |
-void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
+void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_rgb |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000 |
vpsllw ymm4, ymm4, 12 |
vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0 |
@@ -1139,11 +1139,11 @@ void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int pix) { |
// Convert 16 ARGB pixels (64 bytes) to 16 Y values. |
__declspec(naked) |
-void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
+void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
movdqa xmm4, xmmword ptr kARGBToY |
movdqa xmm5, xmmword ptr kAddY16 |
@@ -1174,11 +1174,11 @@ void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
// Convert 16 ARGB pixels (64 bytes) to 16 YJ values. |
// Same as ARGBToYRow but different coefficients, no add 16, but do rounding. |
__declspec(naked) |
-void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
+void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
movdqa xmm4, xmmword ptr kARGBToYJ |
movdqa xmm5, xmmword ptr kAddYJ64 |
@@ -1215,11 +1215,11 @@ static const lvec32 kPermdARGBToY_AVX = { |
// Convert 32 ARGB pixels (128 bytes) to 32 Y values. |
__declspec(naked) |
-void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { |
+void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
vbroadcastf128 ymm4, xmmword ptr kARGBToY |
vbroadcastf128 ymm5, xmmword ptr kAddY16 |
vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX |
@@ -1254,11 +1254,11 @@ void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { |
#ifdef HAS_ARGBTOYJROW_AVX2 |
// Convert 32 ARGB pixels (128 bytes) to 32 Y values. |
__declspec(naked) |
-void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { |
+void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
vbroadcastf128 ymm4, xmmword ptr kARGBToYJ |
vbroadcastf128 ymm5, xmmword ptr kAddYJ64 |
vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX |
@@ -1293,11 +1293,11 @@ void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix) { |
#endif // HAS_ARGBTOYJROW_AVX2 |
__declspec(naked) |
-void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
+void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
movdqa xmm4, xmmword ptr kBGRAToY |
movdqa xmm5, xmmword ptr kAddY16 |
@@ -1326,11 +1326,11 @@ void BGRAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
} |
__declspec(naked) |
-void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
+void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
movdqa xmm4, xmmword ptr kABGRToY |
movdqa xmm5, xmmword ptr kAddY16 |
@@ -1359,11 +1359,11 @@ void ABGRToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
} |
__declspec(naked) |
-void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix) { |
+void RGBAToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] /* src_argb */ |
mov edx, [esp + 8] /* dst_y */ |
- mov ecx, [esp + 12] /* pix */ |
+ mov ecx, [esp + 12] /* width */ |
movdqa xmm4, xmmword ptr kRGBAToY |
movdqa xmm5, xmmword ptr kAddY16 |
@@ -1401,7 +1401,7 @@ void ARGBToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, |
mov esi, [esp + 8 + 8] // src_stride_argb |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
movdqa xmm5, xmmword ptr kAddUV128 |
movdqa xmm6, xmmword ptr kARGBToV |
movdqa xmm7, xmmword ptr kARGBToU |
@@ -1471,7 +1471,7 @@ void ARGBToUVJRow_SSSE3(const uint8* src_argb0, int src_stride_argb, |
mov esi, [esp + 8 + 8] // src_stride_argb |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
movdqa xmm5, xmmword ptr kAddUVJ128 |
movdqa xmm6, xmmword ptr kARGBToVJ |
movdqa xmm7, xmmword ptr kARGBToUJ |
@@ -1543,7 +1543,7 @@ void ARGBToUVRow_AVX2(const uint8* src_argb0, int src_stride_argb, |
mov esi, [esp + 8 + 8] // src_stride_argb |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
vbroadcastf128 ymm5, xmmword ptr kAddUV128 |
vbroadcastf128 ymm6, xmmword ptr kARGBToV |
vbroadcastf128 ymm7, xmmword ptr kARGBToU |
@@ -1606,7 +1606,7 @@ void ARGBToUV444Row_SSSE3(const uint8* src_argb0, |
mov eax, [esp + 4 + 4] // src_argb |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
movdqa xmm5, xmmword ptr kAddUV128 |
movdqa xmm6, xmmword ptr kARGBToV |
movdqa xmm7, xmmword ptr kARGBToU |
@@ -1663,7 +1663,7 @@ void ARGBToUV422Row_SSSE3(const uint8* src_argb0, |
mov eax, [esp + 4 + 4] // src_argb |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
movdqa xmm5, xmmword ptr kAddUV128 |
movdqa xmm6, xmmword ptr kARGBToV |
movdqa xmm7, xmmword ptr kARGBToU |
@@ -1723,7 +1723,7 @@ void BGRAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, |
mov esi, [esp + 8 + 8] // src_stride_argb |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
movdqa xmm5, xmmword ptr kAddUV128 |
movdqa xmm6, xmmword ptr kBGRAToV |
movdqa xmm7, xmmword ptr kBGRAToU |
@@ -1793,7 +1793,7 @@ void ABGRToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, |
mov esi, [esp + 8 + 8] // src_stride_argb |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
movdqa xmm5, xmmword ptr kAddUV128 |
movdqa xmm6, xmmword ptr kABGRToV |
movdqa xmm7, xmmword ptr kABGRToU |
@@ -1863,7 +1863,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb, |
mov esi, [esp + 8 + 8] // src_stride_argb |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
movdqa xmm5, xmmword ptr kAddUV128 |
movdqa xmm6, xmmword ptr kRGBAToV |
movdqa xmm7, xmmword ptr kRGBAToU |
@@ -3638,13 +3638,13 @@ void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width) { |
#ifdef HAS_SPLITUVROW_SSE2 |
__declspec(naked) |
-void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { |
+void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push edi |
mov eax, [esp + 4 + 4] // src_uv |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff |
psrlw xmm5, 8 |
sub edi, edx |
@@ -3676,13 +3676,13 @@ void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { |
#ifdef HAS_SPLITUVROW_AVX2 |
__declspec(naked) |
-void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) { |
+void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push edi |
mov eax, [esp + 4 + 4] // src_uv |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff |
vpsrlw ymm5, ymm5, 8 |
sub edi, edx |
@@ -4024,11 +4024,11 @@ void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) { |
#ifdef HAS_YUY2TOYROW_AVX2 |
__declspec(naked) |
-void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) { |
+void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] // src_yuy2 |
mov edx, [esp + 8] // dst_y |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff |
vpsrlw ymm5, ymm5, 8 |
@@ -4051,7 +4051,7 @@ void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) { |
__declspec(naked) |
void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push esi |
push edi |
@@ -4059,7 +4059,7 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, |
mov esi, [esp + 8 + 8] // stride_yuy2 |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff |
vpsrlw ymm5, ymm5, 8 |
sub edi, edx |
@@ -4095,13 +4095,13 @@ void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2, |
__declspec(naked) |
void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push edi |
mov eax, [esp + 4 + 4] // src_yuy2 |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff |
vpsrlw ymm5, ymm5, 8 |
sub edi, edx |
@@ -4134,11 +4134,11 @@ void YUY2ToUV422Row_AVX2(const uint8* src_yuy2, |
__declspec(naked) |
void UYVYToYRow_AVX2(const uint8* src_uyvy, |
- uint8* dst_y, int pix) { |
+ uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] // src_uyvy |
mov edx, [esp + 8] // dst_y |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
convertloop: |
vmovdqu ymm0, [eax] |
@@ -4159,7 +4159,7 @@ void UYVYToYRow_AVX2(const uint8* src_uyvy, |
__declspec(naked) |
void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push esi |
push edi |
@@ -4167,7 +4167,7 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, |
mov esi, [esp + 8 + 8] // stride_yuy2 |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff |
vpsrlw ymm5, ymm5, 8 |
sub edi, edx |
@@ -4203,13 +4203,13 @@ void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy, |
__declspec(naked) |
void UYVYToUV422Row_AVX2(const uint8* src_uyvy, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push edi |
mov eax, [esp + 4 + 4] // src_yuy2 |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff |
vpsrlw ymm5, ymm5, 8 |
sub edi, edx |
@@ -4244,11 +4244,11 @@ void UYVYToUV422Row_AVX2(const uint8* src_uyvy, |
#ifdef HAS_YUY2TOYROW_SSE2 |
__declspec(naked) |
void YUY2ToYRow_SSE2(const uint8* src_yuy2, |
- uint8* dst_y, int pix) { |
+ uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] // src_yuy2 |
mov edx, [esp + 8] // dst_y |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff |
psrlw xmm5, 8 |
@@ -4269,7 +4269,7 @@ void YUY2ToYRow_SSE2(const uint8* src_yuy2, |
__declspec(naked) |
void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push esi |
push edi |
@@ -4277,7 +4277,7 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, |
mov esi, [esp + 8 + 8] // stride_yuy2 |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff |
psrlw xmm5, 8 |
sub edi, edx |
@@ -4312,13 +4312,13 @@ void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2, |
__declspec(naked) |
void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push edi |
mov eax, [esp + 4 + 4] // src_yuy2 |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff |
psrlw xmm5, 8 |
sub edi, edx |
@@ -4348,11 +4348,11 @@ void YUY2ToUV422Row_SSE2(const uint8* src_yuy2, |
__declspec(naked) |
void UYVYToYRow_SSE2(const uint8* src_uyvy, |
- uint8* dst_y, int pix) { |
+ uint8* dst_y, int width) { |
__asm { |
mov eax, [esp + 4] // src_uyvy |
mov edx, [esp + 8] // dst_y |
- mov ecx, [esp + 12] // pix |
+ mov ecx, [esp + 12] // width |
convertloop: |
movdqu xmm0, [eax] |
@@ -4371,7 +4371,7 @@ void UYVYToYRow_SSE2(const uint8* src_uyvy, |
__declspec(naked) |
void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push esi |
push edi |
@@ -4379,7 +4379,7 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, |
mov esi, [esp + 8 + 8] // stride_yuy2 |
mov edx, [esp + 8 + 12] // dst_u |
mov edi, [esp + 8 + 16] // dst_v |
- mov ecx, [esp + 8 + 20] // pix |
+ mov ecx, [esp + 8 + 20] // width |
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff |
psrlw xmm5, 8 |
sub edi, edx |
@@ -4414,13 +4414,13 @@ void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy, |
__declspec(naked) |
void UYVYToUV422Row_SSE2(const uint8* src_uyvy, |
- uint8* dst_u, uint8* dst_v, int pix) { |
+ uint8* dst_u, uint8* dst_v, int width) { |
__asm { |
push edi |
mov eax, [esp + 4 + 4] // src_yuy2 |
mov edx, [esp + 4 + 8] // dst_u |
mov edi, [esp + 4 + 12] // dst_v |
- mov ecx, [esp + 4 + 16] // pix |
+ mov ecx, [esp + 4 + 16] // width |
pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff |
psrlw xmm5, 8 |
sub edi, edx |
@@ -6126,13 +6126,13 @@ void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, |
// For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. |
__declspec(naked) |
void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, |
- const uint8* shuffler, int pix) { |
+ const uint8* shuffler, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_argb |
mov ecx, [esp + 12] // shuffler |
movdqu xmm5, [ecx] |
- mov ecx, [esp + 16] // pix |
+ mov ecx, [esp + 16] // width |
wloop: |
movdqu xmm0, [eax] |
@@ -6152,13 +6152,13 @@ void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, |
#ifdef HAS_ARGBSHUFFLEROW_AVX2 |
__declspec(naked) |
void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, |
- const uint8* shuffler, int pix) { |
+ const uint8* shuffler, int width) { |
__asm { |
mov eax, [esp + 4] // src_argb |
mov edx, [esp + 8] // dst_argb |
mov ecx, [esp + 12] // shuffler |
vbroadcastf128 ymm5, [ecx] // same shuffle in high as low. |
- mov ecx, [esp + 16] // pix |
+ mov ecx, [esp + 16] // width |
wloop: |
vmovdqu ymm0, [eax] |
@@ -6180,14 +6180,14 @@ void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb, |
__declspec(naked) |
void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, |
- const uint8* shuffler, int pix) { |
+ const uint8* shuffler, int width) { |
__asm { |
push ebx |
push esi |
mov eax, [esp + 8 + 4] // src_argb |
mov edx, [esp + 8 + 8] // dst_argb |
mov esi, [esp + 8 + 12] // shuffler |
- mov ecx, [esp + 8 + 16] // pix |
+ mov ecx, [esp + 8 + 16] // width |
pxor xmm5, xmm5 |
mov ebx, [esi] // shuffler |