Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Unified Diff: source/scale_win.cc

Issue 2084533006: YUV scale filter columns improved filtering accuracy (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: bump version Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/scale_neon.cc ('k') | unit_test/scale_test.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/scale_win.cc
diff --git a/source/scale_win.cc b/source/scale_win.cc
index 21b1ed923fa65087a0b213337b52dd9d4b049a25..f17097365cc07c0640238592208d677007522bf5 100644
--- a/source/scale_win.cc
+++ b/source/scale_win.cc
@@ -860,6 +860,16 @@ void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
}
#endif // HAS_SCALEADDROW_AVX2
+// Constant for making pixels signed to avoid pmaddubsw
+// saturation.
+static uvec8 kFsub80 =
+ { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+// Constant for making pixels unsigned and adding .5 for rounding.
+static uvec16 kFadd40 =
+ { 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
+
// Bilinear column filtering. SSSE3 version.
__declspec(naked)
void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
@@ -877,6 +887,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
movd xmm5, eax
pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction.
psrlw xmm6, 9
+ pcmpeqb xmm7, xmm7 // generate 0x0001
+ psrlw xmm7, 15
pextrw eax, xmm2, 1 // get x0 integer. preroll
sub ecx, 2
jl xloop29
@@ -899,20 +911,22 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
movd xmm4, ebx
pshufb xmm1, xmm5 // 0011
punpcklwd xmm0, xmm4
+ psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
pxor xmm1, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm1 // 16 bit, 2 pixels.
+ paddusb xmm1, xmm7 // +1 so 0..7f and 80..1
+ pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels.
pextrw eax, xmm2, 1 // get x0 integer. next iteration.
pextrw edx, xmm2, 3 // get x1 integer. next iteration.
- psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // 8 bits, 2 pixels.
- movd ebx, xmm0
+ paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round.
+ psrlw xmm1, 7 // 8.7 fixed point to low 8 bits.
+ packuswb xmm1, xmm1 // 8 bits, 2 pixels.
+ movd ebx, xmm1
mov [edi], bx
lea edi, [edi + 2]
sub ecx, 2 // 2 pixels
jge xloop2
xloop29:
-
add ecx, 2 - 1
jl xloop99
@@ -921,11 +935,14 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
movd xmm0, ebx
psrlw xmm2, 9 // 7 bit fractions.
pshufb xmm2, xmm5 // 0011
+ psubb xmm0, xmmword ptr kFsub80 // make pixels signed.
pxor xmm2, xmm6 // 0..7f and 7f..0
- pmaddubsw xmm0, xmm2 // 16 bit
- psrlw xmm0, 7 // 8.7 fixed point to low 8 bits.
- packuswb xmm0, xmm0 // 8 bits
- movd ebx, xmm0
+ paddusb xmm2, xmm7 // +1 so 0..7f and 80..1
+ pmaddubsw xmm2, xmm0 // 16 bit
+ paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round.
+ psrlw xmm2, 7 // 8.7 fixed point to low 8 bits.
+ packuswb xmm2, xmm2 // 8 bits
+ movd ebx, xmm2
mov [edi], bl
xloop99:
« no previous file with comments | « source/scale_neon.cc ('k') | unit_test/scale_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698