Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1008)

Unified Diff: unit_test/planar_test.cc

Issue 1505433002: AVX2 YUV alpha blender and improved unittests (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: off by 1 fix on win Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/row_win.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: unit_test/planar_test.cc
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc
index fc22fe139091ee65c10a80b9511631061bc34f93..f5a8b2129f35909d66e61d4afd78a1c3ec5b64fb 100644
--- a/unit_test/planar_test.cc
+++ b/unit_test/planar_test.cc
@@ -1163,16 +1163,14 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) {
EXPECT_LE(max_diff, 1);
}
-#ifdef HAS_BLENDPLANEROW_SSSE3
+#ifdef HAS_BLENDPLANEROW_AVX2
// TODO(fbarchard): Switch to I420Blend.
-static void TestBlendPlane(int width, int height, int benchmark_iterations,
- int invert, int off) {
+static void TestBlendPlaneRow(int width, int height, int benchmark_iterations,
+ int invert, int off) {
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3);
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2);
width = width * height;
height = 1;
- if (width < 1) {
- width = 1;
- }
if (width < 256) {
width = 256;
}
@@ -1181,23 +1179,39 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations,
align_buffer_64(src_argb_a, kStride * height + off);
align_buffer_64(src_argb_b, kStride * height + off);
align_buffer_64(src_argb_alpha, kStride * height + off);
- align_buffer_64(dst_argb_c, kStride * height);
- align_buffer_64(dst_argb_opt, kStride * height);
+ align_buffer_64(dst_argb_c, kStride * height + off);
+ align_buffer_64(dst_argb_opt, kStride * height + off);
+ memset(dst_argb_c, 255, kStride * height + off);
+ memset(dst_argb_opt, 255, kStride * height + off);
if (has_ssse3) {
- for (int i = 0; i < 255; ++i) {
- src_argb_a[i] = i;
- src_argb_b[i] = 255 - i;
- src_argb_alpha[i] = 255;
+ // Test source is maintained exactly if alpha is 255.
+ for (int i = 0; i < 256; ++i) {
+ src_argb_a[i + off] = i;
+ src_argb_b[i + off] = 255 - i;
+ src_argb_alpha[i + off] = 255;
}
- memset(dst_argb_opt, 0xfb, kStride * height);
BlendPlaneRow_SSSE3(src_argb_a + off,
src_argb_b + off,
src_argb_alpha + off,
- dst_argb_opt,
- width * height);
- for (int i = 0; i < kStride * height; ++i) {
- EXPECT_EQ(src_argb_a[i], dst_argb_opt[i]);
+ dst_argb_opt + off,
+ 256);
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
+ }
+ // Test destination is maintained exactly if alpha is 0.
+ for (int i = 0; i < 256; ++i) {
+ src_argb_a[i + off] = i;
+ src_argb_b[i + off] = 255 - i;
+ src_argb_alpha[i + off] = 0;
+ }
+ BlendPlaneRow_SSSE3(src_argb_a + off,
+ src_argb_b + off,
+ src_argb_alpha + off,
+ dst_argb_opt + off,
+ 256);
+ for (int i = 0; i < 256; ++i) {
+ EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
}
}
for (int i = 0; i < kStride * height; ++i) {
@@ -1205,34 +1219,122 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations,
src_argb_b[i + off] = (fastrand() & 0xff);
src_argb_alpha[i + off] = (fastrand() & 0xff);
}
- memset(dst_argb_c, 255, kStride * height);
- memset(dst_argb_opt, 255, kStride * height);
BlendPlaneRow_C(src_argb_a + off,
src_argb_b + off,
src_argb_alpha + off,
- dst_argb_c,
+ dst_argb_c + off,
width * height);
for (int i = 0; i < benchmark_iterations; ++i) {
- if (has_ssse3) {
- BlendPlaneRow_SSSE3(src_argb_a + off,
- src_argb_b + off,
- src_argb_alpha + off,
- dst_argb_opt,
- width * height);
+ if (has_avx2) {
+ BlendPlaneRow_AVX2(src_argb_a + off,
+ src_argb_b + off,
+ src_argb_alpha + off,
+ dst_argb_opt + off,
+ width * height);
} else {
- BlendPlaneRow_C(src_argb_a + off,
- src_argb_b + off,
- src_argb_alpha + off,
- dst_argb_opt,
- width * height);
+ if (has_ssse3) {
+ BlendPlaneRow_SSSE3(src_argb_a + off,
+ src_argb_b + off,
+ src_argb_alpha + off,
+ dst_argb_opt + off,
+ width * height);
+ } else {
+ BlendPlaneRow_C(src_argb_a + off,
+ src_argb_b + off,
+ src_argb_alpha + off,
+ dst_argb_opt + off,
+ width * height);
+ }
}
}
for (int i = 0; i < kStride * height; ++i) {
- EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]);
+ EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
}
free_aligned_buffer_64(src_argb_a);
free_aligned_buffer_64(src_argb_b);
+ free_aligned_buffer_64(src_argb_alpha);
+ free_aligned_buffer_64(dst_argb_c);
+ free_aligned_buffer_64(dst_argb_opt);
+ return;
+}
+
+TEST_F(LibYUVPlanarTest, BlendPlaneRow_Opt) {
+ TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ +1, 0);
+}
+TEST_F(LibYUVPlanarTest, BlendPlaneRow_Unaligned) {
+ TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ +1, 1);
+}
+#endif
+
+static void TestBlendPlane(int width, int height, int benchmark_iterations,
+ int disable_cpu_flags, int benchmark_cpu_info,
+ int invert, int off) {
+ if (width < 1) {
+ width = 1;
+ }
+ const int kBpp = 1;
+ const int kStride = width * kBpp;
+ align_buffer_64(src_argb_a, kStride * height + off);
+ align_buffer_64(src_argb_b, kStride * height + off);
+ align_buffer_64(src_argb_alpha, kStride * height + off);
+ align_buffer_64(dst_argb_c, kStride * height + off);
+ align_buffer_64(dst_argb_opt, kStride * height + off);
+ memset(dst_argb_c, 255, kStride * height + off);
+ memset(dst_argb_opt, 255, kStride * height + off);
+
+ // Test source is maintained exactly if alpha is 255.
+ for (int i = 0; i < width; ++i) {
+ src_argb_a[i + off] = i & 255;
+ src_argb_b[i + off] = 255 - (i & 255);
+ }
+ memset(src_argb_alpha + off, 255, width);
+ BlendPlane(src_argb_a + off, width,
+ src_argb_b + off, width,
+ src_argb_alpha + off, width,
+ dst_argb_opt + off, width,
+ width, 1);
+ for (int i = 0; i < width; ++i) {
+ EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]);
+ }
+ // Test destination is maintained exactly if alpha is 0.
+ memset(src_argb_alpha + off, 0, width);
+ BlendPlane(src_argb_a + off, width,
+ src_argb_b + off, width,
+ src_argb_alpha + off, width,
+ dst_argb_opt + off, width,
+ width, 1);
+ for (int i = 0; i < width; ++i) {
+ EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]);
+ }
+ for (int i = 0; i < kStride * height; ++i) {
+ src_argb_a[i + off] = (fastrand() & 0xff);
+ src_argb_b[i + off] = (fastrand() & 0xff);
+ src_argb_alpha[i + off] = (fastrand() & 0xff);
+ }
+
+ MaskCpuFlags(disable_cpu_flags);
+ BlendPlane(src_argb_a + off, width,
+ src_argb_b + off, width,
+ src_argb_alpha + off, width,
+ dst_argb_c + off, width,
+ width, height);
+ MaskCpuFlags(benchmark_cpu_info);
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ BlendPlane(src_argb_a + off, width,
+ src_argb_b + off, width,
+ src_argb_alpha + off, width,
+ dst_argb_opt + off, width,
+ width, height);
+ }
+ for (int i = 0; i < kStride * height; ++i) {
+ EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]);
+ }
+ free_aligned_buffer_64(src_argb_a);
+ free_aligned_buffer_64(src_argb_b);
+ free_aligned_buffer_64(src_argb_alpha);
free_aligned_buffer_64(dst_argb_c);
free_aligned_buffer_64(dst_argb_opt);
return;
@@ -1240,9 +1342,106 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations,
TEST_F(LibYUVPlanarTest, BlendPlane_Opt) {
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
- +1, 0);
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
+}
+TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) {
+ TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
+}
+
+#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a))
+
+static void TestI420Blend(int width, int height, int benchmark_iterations,
+ int disable_cpu_flags, int benchmark_cpu_info,
+ int invert, int off) {
+ width = ((width) > 0) ? (width) : 1;
+ const int kStrideUV = SUBSAMPLE(width, 2);
+ const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2);
+ align_buffer_64(src_y0, width * height + off);
+ align_buffer_64(src_u0, kSizeUV + off);
+ align_buffer_64(src_v0, kSizeUV + off);
+ align_buffer_64(src_y1, width * height + off);
+ align_buffer_64(src_u1, kSizeUV + off);
+ align_buffer_64(src_v1, kSizeUV + off);
+ align_buffer_64(src_a, width * height + off);
+ align_buffer_64(dst_y_c, width * height + off);
+ align_buffer_64(dst_u_c, kSizeUV + off);
+ align_buffer_64(dst_v_c, kSizeUV + off);
+ align_buffer_64(dst_y_opt, width * height + off);
+ align_buffer_64(dst_u_opt, kSizeUV + off);
+ align_buffer_64(dst_v_opt, kSizeUV + off);
+
+ MemRandomize(src_y0, width * height + off);
+ MemRandomize(src_u0, kSizeUV + off);
+ MemRandomize(src_v0, kSizeUV + off);
+ MemRandomize(src_y1, width * height + off);
+ MemRandomize(src_u1, kSizeUV + off);
+ MemRandomize(src_v1, kSizeUV + off);
+ MemRandomize(src_a, width * height + off);
+ memset(dst_y_c, 255, width * height + off);
+ memset(dst_u_c, 255, kSizeUV + off);
+ memset(dst_v_c, 255, kSizeUV + off);
+ memset(dst_y_opt, 255, width * height + off);
+ memset(dst_u_opt, 255, kSizeUV + off);
+ memset(dst_v_opt, 255, kSizeUV + off);
+
+ MaskCpuFlags(disable_cpu_flags);
+ I420Blend(src_y0 + off, width,
+ src_u0 + off, kStrideUV,
+ src_v0 + off, kStrideUV,
+ src_y1 + off, width,
+ src_u1 + off, kStrideUV,
+ src_v1 + off, kStrideUV,
+ src_a + off, width,
+ dst_y_c + off, width,
+ dst_u_c + off, kStrideUV,
+ dst_v_c + off, kStrideUV,
+ width, height);
+ MaskCpuFlags(benchmark_cpu_info);
+ for (int i = 0; i < benchmark_iterations; ++i) {
+ I420Blend(src_y0 + off, width,
+ src_u0 + off, kStrideUV,
+ src_v0 + off, kStrideUV,
+ src_y1 + off, width,
+ src_u1 + off, kStrideUV,
+ src_v1 + off, kStrideUV,
+ src_a + off, width,
+ dst_y_opt + off, width,
+ dst_u_opt + off, kStrideUV,
+ dst_v_opt + off, kStrideUV,
+ width, height);
+ }
+ for (int i = 0; i < width * height; ++i) {
+ EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]);
+ }
+ for (int i = 0; i < kSizeUV; ++i) {
+ EXPECT_NEAR(dst_u_c[i + off], dst_u_opt[i + off], 1); // Subsample off by 1
+ EXPECT_NEAR(dst_v_c[i + off], dst_v_opt[i + off], 1);
+ }
+ free_aligned_buffer_64(src_y0);
+ free_aligned_buffer_64(src_u0);
+ free_aligned_buffer_64(src_v0);
+ free_aligned_buffer_64(src_y1);
+ free_aligned_buffer_64(src_u1);
+ free_aligned_buffer_64(src_v1);
+ free_aligned_buffer_64(src_a);
+ free_aligned_buffer_64(dst_y_c);
+ free_aligned_buffer_64(dst_u_c);
+ free_aligned_buffer_64(dst_v_c);
+ free_aligned_buffer_64(dst_y_opt);
+ free_aligned_buffer_64(dst_u_opt);
+ free_aligned_buffer_64(dst_v_opt);
+ return;
+}
+
+TEST_F(LibYUVPlanarTest, I420Blend_Opt) {
+ TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0);
+}
+TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) {
+ TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_,
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 1);
}
-#endif
TEST_F(LibYUVPlanarTest, TestAffine) {
SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]);
« no previous file with comments | « source/row_win.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698