| Index: source/libvpx/test/convolve_test.cc
|
| ===================================================================
|
| --- source/libvpx/test/convolve_test.cc (revision 293588)
|
| +++ source/libvpx/test/convolve_test.cc (working copy)
|
| @@ -10,6 +10,7 @@
|
|
|
| #include <string.h>
|
| #include "test/acm_random.h"
|
| +#include "test/clear_system_state.h"
|
| #include "test/register_state_check.h"
|
| #include "test/util.h"
|
| #include "third_party/googletest/src/include/gtest/gtest.h"
|
| @@ -16,6 +17,7 @@
|
|
|
| #include "./vpx_config.h"
|
| #include "./vp9_rtcd.h"
|
| +#include "vp9/common/vp9_common.h"
|
| #include "vp9/common/vp9_filter.h"
|
| #include "vpx_mem/vpx_mem.h"
|
| #include "vpx_ports/mem.h"
|
| @@ -31,13 +33,16 @@
|
| int w, int h);
|
|
|
| struct ConvolveFunctions {
|
| - ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
|
| + ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
|
| + ConvolveFunc h8, ConvolveFunc h8_avg,
|
| ConvolveFunc v8, ConvolveFunc v8_avg,
|
| ConvolveFunc hv8, ConvolveFunc hv8_avg,
|
| int bd)
|
| - : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
|
| - hv8_avg_(hv8_avg), use_highbd_(bd) {}
|
| + : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
|
| + v8_avg_(v8_avg), hv8_avg_(hv8_avg), use_highbd_(bd) {}
|
|
|
| + ConvolveFunc copy_;
|
| + ConvolveFunc avg_;
|
| ConvolveFunc h8_;
|
| ConvolveFunc v8_;
|
| ConvolveFunc hv8_;
|
| @@ -298,6 +303,8 @@
|
| vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
|
| output_ = reinterpret_cast<uint8_t*>(
|
| vpx_memalign(kDataAlignment, kOutputBufferSize));
|
| + output_ref_ = reinterpret_cast<uint8_t*>(
|
| + vpx_memalign(kDataAlignment, kOutputBufferSize));
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| input16_ = reinterpret_cast<uint16_t*>(
|
| vpx_memalign(kDataAlignment,
|
| @@ -304,19 +311,27 @@
|
| (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
|
| output16_ = reinterpret_cast<uint16_t*>(
|
| vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
|
| + output16_ref_ = reinterpret_cast<uint16_t*>(
|
| + vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
|
| #endif
|
| }
|
|
|
| + virtual void TearDown() { libvpx_test::ClearSystemState(); }
|
| +
|
| static void TearDownTestCase() {
|
| vpx_free(input_ - 1);
|
| input_ = NULL;
|
| vpx_free(output_);
|
| output_ = NULL;
|
| + vpx_free(output_ref_);
|
| + output_ref_ = NULL;
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| vpx_free(input16_ - 1);
|
| input16_ = NULL;
|
| vpx_free(output16_);
|
| output16_ = NULL;
|
| + vpx_free(output16_ref_);
|
| + output16_ref_ = NULL;
|
| #endif
|
| }
|
|
|
| @@ -382,6 +397,13 @@
|
| #endif
|
| }
|
|
|
| + void CopyOutputToRef() {
|
| + vpx_memcpy(output_ref_, output_, kOutputBufferSize);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + vpx_memcpy(output16_ref_, output16_, kOutputBufferSize);
|
| +#endif
|
| + }
|
| +
|
| void CheckGuardBlocks() {
|
| for (int i = 0; i < kOutputBufferSize; ++i) {
|
| if (IsIndexInBorder(i))
|
| @@ -415,6 +437,19 @@
|
| #endif
|
| }
|
|
|
| + uint8_t *output_ref() const {
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + if (UUT_->use_highbd_ == 0) {
|
| + return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
|
| + } else {
|
| + return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
|
| + BorderLeft());
|
| + }
|
| +#else
|
| + return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
|
| +#endif
|
| + }
|
| +
|
| uint16_t lookup(uint8_t *list, int index) const {
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| if (UUT_->use_highbd_ == 0) {
|
| @@ -493,9 +528,11 @@
|
| const ConvolveFunctions* UUT_;
|
| static uint8_t* input_;
|
| static uint8_t* output_;
|
| + static uint8_t* output_ref_;
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| static uint16_t* input16_;
|
| static uint16_t* output16_;
|
| + static uint16_t* output16_ref_;
|
| int mask_;
|
| #endif
|
| };
|
| @@ -502,9 +539,11 @@
|
|
|
| uint8_t* ConvolveTest::input_ = NULL;
|
| uint8_t* ConvolveTest::output_ = NULL;
|
| +uint8_t* ConvolveTest::output_ref_ = NULL;
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| uint16_t* ConvolveTest::input16_ = NULL;
|
| uint16_t* ConvolveTest::output16_ = NULL;
|
| +uint16_t* ConvolveTest::output16_ref_ = NULL;
|
| #endif
|
|
|
| TEST_P(ConvolveTest, GuardBlocks) {
|
| @@ -511,6 +550,43 @@
|
| CheckGuardBlocks();
|
| }
|
|
|
| +TEST_P(ConvolveTest, Copy) {
|
| + uint8_t* const in = input();
|
| + uint8_t* const out = output();
|
| +
|
| + ASM_REGISTER_STATE_CHECK(
|
| + UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
|
| + Width(), Height()));
|
| +
|
| + CheckGuardBlocks();
|
| +
|
| + for (int y = 0; y < Height(); ++y)
|
| + for (int x = 0; x < Width(); ++x)
|
| + ASSERT_EQ(lookup(out, y * kOutputStride + x),
|
| + lookup(in, y * kInputStride + x))
|
| + << "(" << x << "," << y << ")";
|
| +}
|
| +
|
| +TEST_P(ConvolveTest, Avg) {
|
| + uint8_t* const in = input();
|
| + uint8_t* const out = output();
|
| + uint8_t* const out_ref = output_ref();
|
| + CopyOutputToRef();
|
| +
|
| + ASM_REGISTER_STATE_CHECK(
|
| + UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
|
| + Width(), Height()));
|
| +
|
| + CheckGuardBlocks();
|
| +
|
| + for (int y = 0; y < Height(); ++y)
|
| + for (int x = 0; x < Width(); ++x)
|
| + ASSERT_EQ(lookup(out, y * kOutputStride + x),
|
| + ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
|
| + lookup(out_ref, y * kOutputStride + x), 1))
|
| + << "(" << x << "," << y << ")";
|
| +}
|
| +
|
| TEST_P(ConvolveTest, CopyHoriz) {
|
| uint8_t* const in = input();
|
| uint8_t* const out = output();
|
| @@ -1188,6 +1264,30 @@
|
| }
|
| #endif // HAVE_SSE2 && ARCH_X86_64
|
|
|
| +void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
|
| + uint8_t *dst, ptrdiff_t dst_stride,
|
| + const int16_t *filter_x,
|
| + int filter_x_stride,
|
| + const int16_t *filter_y,
|
| + int filter_y_stride,
|
| + int w, int h) {
|
| + vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
|
| + filter_x, filter_x_stride,
|
| + filter_y, filter_y_stride, w, h, 8);
|
| +}
|
| +
|
| +void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
|
| + uint8_t *dst, ptrdiff_t dst_stride,
|
| + const int16_t *filter_x,
|
| + int filter_x_stride,
|
| + const int16_t *filter_y,
|
| + int filter_y_stride,
|
| + int w, int h) {
|
| + vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
|
| + filter_x, filter_x_stride,
|
| + filter_y, filter_y_stride, w, h, 8);
|
| +}
|
| +
|
| void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
|
| uint8_t *dst, ptrdiff_t dst_stride,
|
| const int16_t *filter_x,
|
| @@ -1260,6 +1360,30 @@
|
| filter_y, filter_y_stride, w, h, 8);
|
| }
|
|
|
| +void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
|
| + uint8_t *dst, ptrdiff_t dst_stride,
|
| + const int16_t *filter_x,
|
| + int filter_x_stride,
|
| + const int16_t *filter_y,
|
| + int filter_y_stride,
|
| + int w, int h) {
|
| + vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
|
| + filter_x, filter_x_stride,
|
| + filter_y, filter_y_stride, w, h, 10);
|
| +}
|
| +
|
| +void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
|
| + uint8_t *dst, ptrdiff_t dst_stride,
|
| + const int16_t *filter_x,
|
| + int filter_x_stride,
|
| + const int16_t *filter_y,
|
| + int filter_y_stride,
|
| + int w, int h) {
|
| + vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
|
| + filter_x, filter_x_stride,
|
| + filter_y, filter_y_stride, w, h, 10);
|
| +}
|
| +
|
| void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
|
| uint8_t *dst, ptrdiff_t dst_stride,
|
| const int16_t *filter_x,
|
| @@ -1332,6 +1456,30 @@
|
| filter_y, filter_y_stride, w, h, 10);
|
| }
|
|
|
| +void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
|
| + uint8_t *dst, ptrdiff_t dst_stride,
|
| + const int16_t *filter_x,
|
| + int filter_x_stride,
|
| + const int16_t *filter_y,
|
| + int filter_y_stride,
|
| + int w, int h) {
|
| + vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
|
| + filter_x, filter_x_stride,
|
| + filter_y, filter_y_stride, w, h, 12);
|
| +}
|
| +
|
| +void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
|
| + uint8_t *dst, ptrdiff_t dst_stride,
|
| + const int16_t *filter_x,
|
| + int filter_x_stride,
|
| + const int16_t *filter_y,
|
| + int filter_y_stride,
|
| + int w, int h) {
|
| + vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
|
| + filter_x, filter_x_stride,
|
| + filter_y, filter_y_stride, w, h, 12);
|
| +}
|
| +
|
| void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
|
| uint8_t *dst, ptrdiff_t dst_stride,
|
| const int16_t *filter_x,
|
| @@ -1405,6 +1553,7 @@
|
| }
|
|
|
| const ConvolveFunctions convolve8_c(
|
| + wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
|
| wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
|
| wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
|
| wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
|
| @@ -1423,6 +1572,7 @@
|
| make_tuple(32, 64, &convolve8_c),
|
| make_tuple(64, 64, &convolve8_c)));
|
| const ConvolveFunctions convolve10_c(
|
| + wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
|
| wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
|
| wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
|
| wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
|
| @@ -1441,6 +1591,7 @@
|
| make_tuple(32, 64, &convolve10_c),
|
| make_tuple(64, 64, &convolve10_c)));
|
| const ConvolveFunctions convolve12_c(
|
| + wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
|
| wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
|
| wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
|
| wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
|
| @@ -1462,6 +1613,7 @@
|
| #else
|
|
|
| const ConvolveFunctions convolve8_c(
|
| + vp9_convolve_copy_c, vp9_convolve_avg_c,
|
| vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
|
| vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
|
| vp9_convolve8_c, vp9_convolve8_avg_c, 0);
|
| @@ -1485,14 +1637,17 @@
|
| #if HAVE_SSE2 && ARCH_X86_64
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| const ConvolveFunctions convolve8_sse2(
|
| + wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
|
| wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
|
| wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
|
| wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
|
| const ConvolveFunctions convolve10_sse2(
|
| + wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
|
| wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
|
| wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
|
| wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
|
| const ConvolveFunctions convolve12_sse2(
|
| + wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
|
| wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
|
| wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
|
| wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
|
| @@ -1538,6 +1693,7 @@
|
| make_tuple(64, 64, &convolve12_sse2)));
|
| #else
|
| const ConvolveFunctions convolve8_sse2(
|
| + vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
|
| vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
|
| vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
|
| vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
|
| @@ -1561,6 +1717,7 @@
|
|
|
| #if HAVE_SSSE3
|
| const ConvolveFunctions convolve8_ssse3(
|
| + vp9_convolve_copy_c, vp9_convolve_avg_c,
|
| vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
|
| vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
|
| vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
|
| @@ -1583,6 +1740,7 @@
|
|
|
| #if HAVE_AVX2 && HAVE_SSSE3
|
| const ConvolveFunctions convolve8_avx2(
|
| + vp9_convolve_copy_c, vp9_convolve_avg_c,
|
| vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
|
| vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
|
| vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
|
| @@ -1603,11 +1761,20 @@
|
| make_tuple(64, 64, &convolve8_avx2)));
|
| #endif // HAVE_AVX2 && HAVE_SSSE3
|
|
|
| +#if HAVE_NEON
|
| #if HAVE_NEON_ASM
|
| const ConvolveFunctions convolve8_neon(
|
| + vp9_convolve_copy_neon, vp9_convolve_avg_neon,
|
| vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
|
| vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
|
| vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
|
| +#else // HAVE_NEON
|
| +const ConvolveFunctions convolve8_neon(
|
| + vp9_convolve_copy_neon, vp9_convolve_avg_neon,
|
| + vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
|
| + vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
|
| + vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
|
| +#endif // HAVE_NEON_ASM
|
|
|
| INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
|
| make_tuple(4, 4, &convolve8_neon),
|
| @@ -1623,10 +1790,11 @@
|
| make_tuple(64, 32, &convolve8_neon),
|
| make_tuple(32, 64, &convolve8_neon),
|
| make_tuple(64, 64, &convolve8_neon)));
|
| -#endif
|
| +#endif // HAVE_NEON
|
|
|
| #if HAVE_DSPR2
|
| const ConvolveFunctions convolve8_dspr2(
|
| + vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2,
|
| vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
|
| vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
|
| vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
|
|
|