Index: unit_test/planar_test.cc |
diff --git a/unit_test/planar_test.cc b/unit_test/planar_test.cc |
index fc22fe139091ee65c10a80b9511631061bc34f93..f5a8b2129f35909d66e61d4afd78a1c3ec5b64fb 100644 |
--- a/unit_test/planar_test.cc |
+++ b/unit_test/planar_test.cc |
@@ -1163,16 +1163,14 @@ TEST_F(LibYUVPlanarTest, ARGBBlend_Opt) { |
EXPECT_LE(max_diff, 1); |
} |
-#ifdef HAS_BLENDPLANEROW_SSSE3 |
+#ifdef HAS_BLENDPLANEROW_AVX2 |
// TODO(fbarchard): Switch to I420Blend. |
-static void TestBlendPlane(int width, int height, int benchmark_iterations, |
- int invert, int off) { |
+static void TestBlendPlaneRow(int width, int height, int benchmark_iterations, |
+ int invert, int off) { |
int has_ssse3 = TestCpuFlag(kCpuHasSSSE3); |
+ int has_avx2 = TestCpuFlag(kCpuHasAVX2); |
width = width * height; |
height = 1; |
- if (width < 1) { |
- width = 1; |
- } |
if (width < 256) { |
width = 256; |
} |
@@ -1181,23 +1179,39 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations, |
align_buffer_64(src_argb_a, kStride * height + off); |
align_buffer_64(src_argb_b, kStride * height + off); |
align_buffer_64(src_argb_alpha, kStride * height + off); |
- align_buffer_64(dst_argb_c, kStride * height); |
- align_buffer_64(dst_argb_opt, kStride * height); |
+ align_buffer_64(dst_argb_c, kStride * height + off); |
+ align_buffer_64(dst_argb_opt, kStride * height + off); |
+ memset(dst_argb_c, 255, kStride * height + off); |
+ memset(dst_argb_opt, 255, kStride * height + off); |
if (has_ssse3) { |
- for (int i = 0; i < 255; ++i) { |
- src_argb_a[i] = i; |
- src_argb_b[i] = 255 - i; |
- src_argb_alpha[i] = 255; |
+ // Test source is maintained exactly if alpha is 255. |
+ for (int i = 0; i < 256; ++i) { |
+ src_argb_a[i + off] = i; |
+ src_argb_b[i + off] = 255 - i; |
+ src_argb_alpha[i + off] = 255; |
} |
- memset(dst_argb_opt, 0xfb, kStride * height); |
BlendPlaneRow_SSSE3(src_argb_a + off, |
src_argb_b + off, |
src_argb_alpha + off, |
- dst_argb_opt, |
- width * height); |
- for (int i = 0; i < kStride * height; ++i) { |
- EXPECT_EQ(src_argb_a[i], dst_argb_opt[i]); |
+ dst_argb_opt + off, |
+ 256); |
+ for (int i = 0; i < 256; ++i) { |
+ EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); |
+ } |
+ // Test destination is maintained exactly if alpha is 0. |
+ for (int i = 0; i < 256; ++i) { |
+ src_argb_a[i + off] = i; |
+ src_argb_b[i + off] = 255 - i; |
+ src_argb_alpha[i + off] = 0; |
+ } |
+ BlendPlaneRow_SSSE3(src_argb_a + off, |
+ src_argb_b + off, |
+ src_argb_alpha + off, |
+ dst_argb_opt + off, |
+ 256); |
+ for (int i = 0; i < 256; ++i) { |
+ EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); |
} |
} |
for (int i = 0; i < kStride * height; ++i) { |
@@ -1205,34 +1219,122 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations, |
src_argb_b[i + off] = (fastrand() & 0xff); |
src_argb_alpha[i + off] = (fastrand() & 0xff); |
} |
- memset(dst_argb_c, 255, kStride * height); |
- memset(dst_argb_opt, 255, kStride * height); |
BlendPlaneRow_C(src_argb_a + off, |
src_argb_b + off, |
src_argb_alpha + off, |
- dst_argb_c, |
+ dst_argb_c + off, |
width * height); |
for (int i = 0; i < benchmark_iterations; ++i) { |
- if (has_ssse3) { |
- BlendPlaneRow_SSSE3(src_argb_a + off, |
- src_argb_b + off, |
- src_argb_alpha + off, |
- dst_argb_opt, |
- width * height); |
+ if (has_avx2) { |
+ BlendPlaneRow_AVX2(src_argb_a + off, |
+ src_argb_b + off, |
+ src_argb_alpha + off, |
+ dst_argb_opt + off, |
+ width * height); |
} else { |
- BlendPlaneRow_C(src_argb_a + off, |
- src_argb_b + off, |
- src_argb_alpha + off, |
- dst_argb_opt, |
- width * height); |
+ if (has_ssse3) { |
+ BlendPlaneRow_SSSE3(src_argb_a + off, |
+ src_argb_b + off, |
+ src_argb_alpha + off, |
+ dst_argb_opt + off, |
+ width * height); |
+ } else { |
+ BlendPlaneRow_C(src_argb_a + off, |
+ src_argb_b + off, |
+ src_argb_alpha + off, |
+ dst_argb_opt + off, |
+ width * height); |
+ } |
} |
} |
for (int i = 0; i < kStride * height; ++i) { |
- EXPECT_EQ(dst_argb_c[i], dst_argb_opt[i]); |
+ EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); |
} |
free_aligned_buffer_64(src_argb_a); |
free_aligned_buffer_64(src_argb_b); |
+ free_aligned_buffer_64(src_argb_alpha); |
+ free_aligned_buffer_64(dst_argb_c); |
+ free_aligned_buffer_64(dst_argb_opt); |
+ return; |
+} |
+ |
+TEST_F(LibYUVPlanarTest, BlendPlaneRow_Opt) { |
+ TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_, |
+ +1, 0); |
+} |
+TEST_F(LibYUVPlanarTest, BlendPlaneRow_Unaligned) { |
+ TestBlendPlaneRow(benchmark_width_, benchmark_height_, benchmark_iterations_, |
+ +1, 1); |
+} |
+#endif |
+ |
+static void TestBlendPlane(int width, int height, int benchmark_iterations, |
+ int disable_cpu_flags, int benchmark_cpu_info, |
+ int invert, int off) { |
+ if (width < 1) { |
+ width = 1; |
+ } |
+ const int kBpp = 1; |
+ const int kStride = width * kBpp; |
+ align_buffer_64(src_argb_a, kStride * height + off); |
+ align_buffer_64(src_argb_b, kStride * height + off); |
+ align_buffer_64(src_argb_alpha, kStride * height + off); |
+ align_buffer_64(dst_argb_c, kStride * height + off); |
+ align_buffer_64(dst_argb_opt, kStride * height + off); |
+ memset(dst_argb_c, 255, kStride * height + off); |
+ memset(dst_argb_opt, 255, kStride * height + off); |
+ |
+ // Test source is maintained exactly if alpha is 255. |
+ for (int i = 0; i < width; ++i) { |
+ src_argb_a[i + off] = i & 255; |
+ src_argb_b[i + off] = 255 - (i & 255); |
+ } |
+ memset(src_argb_alpha + off, 255, width); |
+ BlendPlane(src_argb_a + off, width, |
+ src_argb_b + off, width, |
+ src_argb_alpha + off, width, |
+ dst_argb_opt + off, width, |
+ width, 1); |
+ for (int i = 0; i < width; ++i) { |
+ EXPECT_EQ(src_argb_a[i + off], dst_argb_opt[i + off]); |
+ } |
+ // Test destination is maintained exactly if alpha is 0. |
+ memset(src_argb_alpha + off, 0, width); |
+ BlendPlane(src_argb_a + off, width, |
+ src_argb_b + off, width, |
+ src_argb_alpha + off, width, |
+ dst_argb_opt + off, width, |
+ width, 1); |
+ for (int i = 0; i < width; ++i) { |
+ EXPECT_EQ(src_argb_b[i + off], dst_argb_opt[i + off]); |
+ } |
+ for (int i = 0; i < kStride * height; ++i) { |
+ src_argb_a[i + off] = (fastrand() & 0xff); |
+ src_argb_b[i + off] = (fastrand() & 0xff); |
+ src_argb_alpha[i + off] = (fastrand() & 0xff); |
+ } |
+ |
+ MaskCpuFlags(disable_cpu_flags); |
+ BlendPlane(src_argb_a + off, width, |
+ src_argb_b + off, width, |
+ src_argb_alpha + off, width, |
+ dst_argb_c + off, width, |
+ width, height); |
+ MaskCpuFlags(benchmark_cpu_info); |
+ for (int i = 0; i < benchmark_iterations; ++i) { |
+ BlendPlane(src_argb_a + off, width, |
+ src_argb_b + off, width, |
+ src_argb_alpha + off, width, |
+ dst_argb_opt + off, width, |
+ width, height); |
+ } |
+ for (int i = 0; i < kStride * height; ++i) { |
+ EXPECT_EQ(dst_argb_c[i + off], dst_argb_opt[i + off]); |
+ } |
+ free_aligned_buffer_64(src_argb_a); |
+ free_aligned_buffer_64(src_argb_b); |
+ free_aligned_buffer_64(src_argb_alpha); |
free_aligned_buffer_64(dst_argb_c); |
free_aligned_buffer_64(dst_argb_opt); |
return; |
@@ -1240,9 +1342,106 @@ static void TestBlendPlane(int width, int height, int benchmark_iterations, |
TEST_F(LibYUVPlanarTest, BlendPlane_Opt) { |
TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, |
- +1, 0); |
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
+} |
+TEST_F(LibYUVPlanarTest, BlendPlane_Unaligned) { |
+ TestBlendPlane(benchmark_width_, benchmark_height_, benchmark_iterations_, |
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
+} |
+ |
+#define SUBSAMPLE(v, a) ((((v) + (a) - 1)) / (a)) |
+ |
+static void TestI420Blend(int width, int height, int benchmark_iterations, |
+ int disable_cpu_flags, int benchmark_cpu_info, |
+ int invert, int off) { |
+ width = ((width) > 0) ? (width) : 1; |
+ const int kStrideUV = SUBSAMPLE(width, 2); |
+ const int kSizeUV = kStrideUV * SUBSAMPLE(height, 2); |
+ align_buffer_64(src_y0, width * height + off); |
+ align_buffer_64(src_u0, kSizeUV + off); |
+ align_buffer_64(src_v0, kSizeUV + off); |
+ align_buffer_64(src_y1, width * height + off); |
+ align_buffer_64(src_u1, kSizeUV + off); |
+ align_buffer_64(src_v1, kSizeUV + off); |
+ align_buffer_64(src_a, width * height + off); |
+ align_buffer_64(dst_y_c, width * height + off); |
+ align_buffer_64(dst_u_c, kSizeUV + off); |
+ align_buffer_64(dst_v_c, kSizeUV + off); |
+ align_buffer_64(dst_y_opt, width * height + off); |
+ align_buffer_64(dst_u_opt, kSizeUV + off); |
+ align_buffer_64(dst_v_opt, kSizeUV + off); |
+ |
+ MemRandomize(src_y0, width * height + off); |
+ MemRandomize(src_u0, kSizeUV + off); |
+ MemRandomize(src_v0, kSizeUV + off); |
+ MemRandomize(src_y1, width * height + off); |
+ MemRandomize(src_u1, kSizeUV + off); |
+ MemRandomize(src_v1, kSizeUV + off); |
+ MemRandomize(src_a, width * height + off); |
+ memset(dst_y_c, 255, width * height + off); |
+ memset(dst_u_c, 255, kSizeUV + off); |
+ memset(dst_v_c, 255, kSizeUV + off); |
+ memset(dst_y_opt, 255, width * height + off); |
+ memset(dst_u_opt, 255, kSizeUV + off); |
+ memset(dst_v_opt, 255, kSizeUV + off); |
+ |
+ MaskCpuFlags(disable_cpu_flags); |
+ I420Blend(src_y0 + off, width, |
+ src_u0 + off, kStrideUV, |
+ src_v0 + off, kStrideUV, |
+ src_y1 + off, width, |
+ src_u1 + off, kStrideUV, |
+ src_v1 + off, kStrideUV, |
+ src_a + off, width, |
+ dst_y_c + off, width, |
+ dst_u_c + off, kStrideUV, |
+ dst_v_c + off, kStrideUV, |
+ width, height); |
+ MaskCpuFlags(benchmark_cpu_info); |
+ for (int i = 0; i < benchmark_iterations; ++i) { |
+ I420Blend(src_y0 + off, width, |
+ src_u0 + off, kStrideUV, |
+ src_v0 + off, kStrideUV, |
+ src_y1 + off, width, |
+ src_u1 + off, kStrideUV, |
+ src_v1 + off, kStrideUV, |
+ src_a + off, width, |
+ dst_y_opt + off, width, |
+ dst_u_opt + off, kStrideUV, |
+ dst_v_opt + off, kStrideUV, |
+ width, height); |
+ } |
+ for (int i = 0; i < width * height; ++i) { |
+ EXPECT_EQ(dst_y_c[i + off], dst_y_opt[i + off]); |
+ } |
+ for (int i = 0; i < kSizeUV; ++i) { |
+ EXPECT_NEAR(dst_u_c[i + off], dst_u_opt[i + off], 1); // Subsample off by 1 |
+ EXPECT_NEAR(dst_v_c[i + off], dst_v_opt[i + off], 1); |
+ } |
+ free_aligned_buffer_64(src_y0); |
+ free_aligned_buffer_64(src_u0); |
+ free_aligned_buffer_64(src_v0); |
+ free_aligned_buffer_64(src_y1); |
+ free_aligned_buffer_64(src_u1); |
+ free_aligned_buffer_64(src_v1); |
+ free_aligned_buffer_64(src_a); |
+ free_aligned_buffer_64(dst_y_c); |
+ free_aligned_buffer_64(dst_u_c); |
+ free_aligned_buffer_64(dst_v_c); |
+ free_aligned_buffer_64(dst_y_opt); |
+ free_aligned_buffer_64(dst_u_opt); |
+ free_aligned_buffer_64(dst_v_opt); |
+ return; |
+} |
+ |
+TEST_F(LibYUVPlanarTest, I420Blend_Opt) { |
+ TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, |
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 0); |
+} |
+TEST_F(LibYUVPlanarTest, I420Blend_Unaligned) { |
+ TestI420Blend(benchmark_width_, benchmark_height_, benchmark_iterations_, |
+ disable_cpu_flags_, benchmark_cpu_info_, +1, 1); |
} |
-#endif |
TEST_F(LibYUVPlanarTest, TestAffine) { |
SIMD_ALIGNED(uint8 orig_pixels_0[1280][4]); |