Index: source/libvpx/test/convolve_test.cc |
=================================================================== |
--- source/libvpx/test/convolve_test.cc (revision 293588) |
+++ source/libvpx/test/convolve_test.cc (working copy) |
@@ -10,6 +10,7 @@ |
#include <string.h> |
#include "test/acm_random.h" |
+#include "test/clear_system_state.h" |
#include "test/register_state_check.h" |
#include "test/util.h" |
#include "third_party/googletest/src/include/gtest/gtest.h" |
@@ -16,6 +17,7 @@ |
#include "./vpx_config.h" |
#include "./vp9_rtcd.h" |
+#include "vp9/common/vp9_common.h" |
#include "vp9/common/vp9_filter.h" |
#include "vpx_mem/vpx_mem.h" |
#include "vpx_ports/mem.h" |
@@ -31,13 +33,16 @@ |
int w, int h); |
struct ConvolveFunctions { |
- ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg, |
+ ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, |
+ ConvolveFunc h8, ConvolveFunc h8_avg, |
ConvolveFunc v8, ConvolveFunc v8_avg, |
ConvolveFunc hv8, ConvolveFunc hv8_avg, |
int bd) |
- : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg), |
- hv8_avg_(hv8_avg), use_highbd_(bd) {} |
+ : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), |
+ v8_avg_(v8_avg), hv8_avg_(hv8_avg), use_highbd_(bd) {} |
+ ConvolveFunc copy_; |
+ ConvolveFunc avg_; |
ConvolveFunc h8_; |
ConvolveFunc v8_; |
ConvolveFunc hv8_; |
@@ -298,6 +303,8 @@ |
vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; |
output_ = reinterpret_cast<uint8_t*>( |
vpx_memalign(kDataAlignment, kOutputBufferSize)); |
+ output_ref_ = reinterpret_cast<uint8_t*>( |
+ vpx_memalign(kDataAlignment, kOutputBufferSize)); |
#if CONFIG_VP9_HIGHBITDEPTH |
input16_ = reinterpret_cast<uint16_t*>( |
vpx_memalign(kDataAlignment, |
@@ -304,19 +311,27 @@ |
(kInputBufferSize + 1) * sizeof(uint16_t))) + 1; |
output16_ = reinterpret_cast<uint16_t*>( |
vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); |
+ output16_ref_ = reinterpret_cast<uint16_t*>( |
+ vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); |
#endif |
} |
+ virtual void TearDown() { libvpx_test::ClearSystemState(); } |
+ |
static void TearDownTestCase() { |
vpx_free(input_ - 1); |
input_ = NULL; |
vpx_free(output_); |
output_ = NULL; |
+ vpx_free(output_ref_); |
+ output_ref_ = NULL; |
#if CONFIG_VP9_HIGHBITDEPTH |
vpx_free(input16_ - 1); |
input16_ = NULL; |
vpx_free(output16_); |
output16_ = NULL; |
+ vpx_free(output16_ref_); |
+ output16_ref_ = NULL; |
#endif |
} |
@@ -382,6 +397,13 @@ |
#endif |
} |
+ void CopyOutputToRef() { |
+ vpx_memcpy(output_ref_, output_, kOutputBufferSize); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ vpx_memcpy(output16_ref_, output16_, kOutputBufferSize); |
+#endif |
+ } |
+ |
void CheckGuardBlocks() { |
for (int i = 0; i < kOutputBufferSize; ++i) { |
if (IsIndexInBorder(i)) |
@@ -415,6 +437,19 @@ |
#endif |
} |
+ uint8_t *output_ref() const { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_highbd_ == 0) { |
+ return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
+ } else { |
+ return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize + |
+ BorderLeft()); |
+ } |
+#else |
+ return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
+#endif |
+ } |
+ |
uint16_t lookup(uint8_t *list, int index) const { |
#if CONFIG_VP9_HIGHBITDEPTH |
if (UUT_->use_highbd_ == 0) { |
@@ -493,9 +528,11 @@ |
const ConvolveFunctions* UUT_; |
static uint8_t* input_; |
static uint8_t* output_; |
+ static uint8_t* output_ref_; |
#if CONFIG_VP9_HIGHBITDEPTH |
static uint16_t* input16_; |
static uint16_t* output16_; |
+ static uint16_t* output16_ref_; |
int mask_; |
#endif |
}; |
@@ -502,9 +539,11 @@ |
uint8_t* ConvolveTest::input_ = NULL; |
uint8_t* ConvolveTest::output_ = NULL; |
+uint8_t* ConvolveTest::output_ref_ = NULL; |
#if CONFIG_VP9_HIGHBITDEPTH |
uint16_t* ConvolveTest::input16_ = NULL; |
uint16_t* ConvolveTest::output16_ = NULL; |
+uint16_t* ConvolveTest::output16_ref_ = NULL; |
#endif |
TEST_P(ConvolveTest, GuardBlocks) { |
@@ -511,6 +550,43 @@ |
CheckGuardBlocks(); |
} |
+TEST_P(ConvolveTest, Copy) { |
+ uint8_t* const in = input(); |
+ uint8_t* const out = output(); |
+ |
+ ASM_REGISTER_STATE_CHECK( |
+ UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, |
+ Width(), Height())); |
+ |
+ CheckGuardBlocks(); |
+ |
+ for (int y = 0; y < Height(); ++y) |
+ for (int x = 0; x < Width(); ++x) |
+ ASSERT_EQ(lookup(out, y * kOutputStride + x), |
+ lookup(in, y * kInputStride + x)) |
+ << "(" << x << "," << y << ")"; |
+} |
+ |
+TEST_P(ConvolveTest, Avg) { |
+ uint8_t* const in = input(); |
+ uint8_t* const out = output(); |
+ uint8_t* const out_ref = output_ref(); |
+ CopyOutputToRef(); |
+ |
+ ASM_REGISTER_STATE_CHECK( |
+ UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0, |
+ Width(), Height())); |
+ |
+ CheckGuardBlocks(); |
+ |
+ for (int y = 0; y < Height(); ++y) |
+ for (int x = 0; x < Width(); ++x) |
+ ASSERT_EQ(lookup(out, y * kOutputStride + x), |
+ ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) + |
+ lookup(out_ref, y * kOutputStride + x), 1)) |
+ << "(" << x << "," << y << ")"; |
+} |
+ |
TEST_P(ConvolveTest, CopyHoriz) { |
uint8_t* const in = input(); |
uint8_t* const out = output(); |
@@ -1188,6 +1264,30 @@ |
} |
#endif // HAVE_SSE2 && ARCH_X86_64 |
+void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride, |
+ filter_x, filter_x_stride, |
+ filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride, |
+ filter_x, filter_x_stride, |
+ filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, |
uint8_t *dst, ptrdiff_t dst_stride, |
const int16_t *filter_x, |
@@ -1260,6 +1360,30 @@ |
filter_y, filter_y_stride, w, h, 8); |
} |
+void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride, |
+ filter_x, filter_x_stride, |
+ filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride, |
+ filter_x, filter_x_stride, |
+ filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, |
uint8_t *dst, ptrdiff_t dst_stride, |
const int16_t *filter_x, |
@@ -1332,6 +1456,30 @@ |
filter_y, filter_y_stride, w, h, 10); |
} |
+void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride, |
+ filter_x, filter_x_stride, |
+ filter_y, filter_y_stride, w, h, 12); |
+} |
+ |
+void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride, |
+ filter_x, filter_x_stride, |
+ filter_y, filter_y_stride, w, h, 12); |
+} |
+ |
void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, |
uint8_t *dst, ptrdiff_t dst_stride, |
const int16_t *filter_x, |
@@ -1405,6 +1553,7 @@ |
} |
const ConvolveFunctions convolve8_c( |
+ wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, |
wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, |
wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, |
wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); |
@@ -1423,6 +1572,7 @@ |
make_tuple(32, 64, &convolve8_c), |
make_tuple(64, 64, &convolve8_c))); |
const ConvolveFunctions convolve10_c( |
+ wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, |
wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, |
wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, |
wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); |
@@ -1441,6 +1591,7 @@ |
make_tuple(32, 64, &convolve10_c), |
make_tuple(64, 64, &convolve10_c))); |
const ConvolveFunctions convolve12_c( |
+ wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, |
wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, |
wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, |
wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); |
@@ -1462,6 +1613,7 @@ |
#else |
const ConvolveFunctions convolve8_c( |
+ vp9_convolve_copy_c, vp9_convolve_avg_c, |
vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c, |
vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c, |
vp9_convolve8_c, vp9_convolve8_avg_c, 0); |
@@ -1485,14 +1637,17 @@ |
#if HAVE_SSE2 && ARCH_X86_64 |
#if CONFIG_VP9_HIGHBITDEPTH |
const ConvolveFunctions convolve8_sse2( |
+ wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, |
wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, |
wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, |
wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); |
const ConvolveFunctions convolve10_sse2( |
+ wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, |
wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, |
wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, |
wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); |
const ConvolveFunctions convolve12_sse2( |
+ wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, |
wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, |
wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, |
wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); |
@@ -1538,6 +1693,7 @@ |
make_tuple(64, 64, &convolve12_sse2))); |
#else |
const ConvolveFunctions convolve8_sse2( |
+ vp9_convolve_copy_sse2, vp9_convolve_avg_sse2, |
vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2, |
vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2, |
vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0); |
@@ -1561,6 +1717,7 @@ |
#if HAVE_SSSE3 |
const ConvolveFunctions convolve8_ssse3( |
+ vp9_convolve_copy_c, vp9_convolve_avg_c, |
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, |
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, |
vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0); |
@@ -1583,6 +1740,7 @@ |
#if HAVE_AVX2 && HAVE_SSSE3 |
const ConvolveFunctions convolve8_avx2( |
+ vp9_convolve_copy_c, vp9_convolve_avg_c, |
vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, |
vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, |
vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0); |
@@ -1603,11 +1761,20 @@ |
make_tuple(64, 64, &convolve8_avx2))); |
#endif // HAVE_AVX2 && HAVE_SSSE3 |
+#if HAVE_NEON |
#if HAVE_NEON_ASM |
const ConvolveFunctions convolve8_neon( |
+ vp9_convolve_copy_neon, vp9_convolve_avg_neon, |
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, |
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, |
vp9_convolve8_neon, vp9_convolve8_avg_neon, 0); |
+#else // HAVE_NEON |
+const ConvolveFunctions convolve8_neon( |
+ vp9_convolve_copy_neon, vp9_convolve_avg_neon, |
+ vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, |
+ vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, |
+ vp9_convolve8_neon, vp9_convolve8_avg_neon, 0); |
+#endif // HAVE_NEON_ASM |
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_neon), |
@@ -1623,10 +1790,11 @@ |
make_tuple(64, 32, &convolve8_neon), |
make_tuple(32, 64, &convolve8_neon), |
make_tuple(64, 64, &convolve8_neon))); |
-#endif |
+#endif // HAVE_NEON |
#if HAVE_DSPR2 |
const ConvolveFunctions convolve8_dspr2( |
+ vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2, |
vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2, |
vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2, |
vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0); |