Index: source/libvpx/test/convolve_test.cc |
=================================================================== |
--- source/libvpx/test/convolve_test.cc (revision 292072) |
+++ source/libvpx/test/convolve_test.cc (working copy) |
@@ -21,6 +21,9 @@ |
#include "vpx_ports/mem.h" |
namespace { |
+ |
+static const unsigned int kMaxDimension = 64; |
+ |
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, |
uint8_t *dst, ptrdiff_t dst_stride, |
const int16_t *filter_x, int filter_x_stride, |
@@ -30,9 +33,10 @@ |
struct ConvolveFunctions { |
ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg, |
ConvolveFunc v8, ConvolveFunc v8_avg, |
- ConvolveFunc hv8, ConvolveFunc hv8_avg) |
+ ConvolveFunc hv8, ConvolveFunc hv8_avg, |
+ int bd) |
: h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg), |
- hv8_avg_(hv8_avg) {} |
+ hv8_avg_(hv8_avg), use_high_bd_(bd) {} |
ConvolveFunc h8_; |
ConvolveFunc v8_; |
@@ -40,6 +44,7 @@ |
ConvolveFunc h8_avg_; |
ConvolveFunc v8_avg_; |
ConvolveFunc hv8_avg_; |
+ int use_high_bd_; // 0 if high bitdepth not used, else the actual bit depth. |
}; |
typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; |
@@ -68,7 +73,120 @@ |
const int kInterp_Extend = 4; |
const unsigned int intermediate_height = |
(kInterp_Extend - 1) + output_height + kInterp_Extend; |
+ unsigned int i, j; |
+ // Size of intermediate_buffer is max_intermediate_height * filter_max_width, |
+ // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height |
+ // + kInterp_Extend |
+ // = 3 + 16 + 4 |
+ // = 23 |
+ // and filter_max_width = 16 |
+ // |
+ uint8_t intermediate_buffer[71 * kMaxDimension]; |
+ const int intermediate_next_stride = 1 - intermediate_height * output_width; |
+ |
+ // Horizontal pass (src -> transposed intermediate). |
+ uint8_t *output_ptr = intermediate_buffer; |
+ const int src_next_row_stride = src_stride - output_width; |
+ src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); |
+ for (i = 0; i < intermediate_height; ++i) { |
+ for (j = 0; j < output_width; ++j) { |
+ // Apply filter... |
+ const int temp = (src_ptr[0] * HFilter[0]) + |
+ (src_ptr[1] * HFilter[1]) + |
+ (src_ptr[2] * HFilter[2]) + |
+ (src_ptr[3] * HFilter[3]) + |
+ (src_ptr[4] * HFilter[4]) + |
+ (src_ptr[5] * HFilter[5]) + |
+ (src_ptr[6] * HFilter[6]) + |
+ (src_ptr[7] * HFilter[7]) + |
+ (VP9_FILTER_WEIGHT >> 1); // Rounding |
+ |
+ // Normalize back to 0-255... |
+ *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); |
+ ++src_ptr; |
+ output_ptr += intermediate_height; |
+ } |
+ src_ptr += src_next_row_stride; |
+ output_ptr += intermediate_next_stride; |
+ } |
+ |
+ // Vertical pass (transposed intermediate -> dst). |
+ src_ptr = intermediate_buffer; |
+ const int dst_next_row_stride = dst_stride - output_width; |
+ for (i = 0; i < output_height; ++i) { |
+ for (j = 0; j < output_width; ++j) { |
+ // Apply filter... |
+ const int temp = (src_ptr[0] * VFilter[0]) + |
+ (src_ptr[1] * VFilter[1]) + |
+ (src_ptr[2] * VFilter[2]) + |
+ (src_ptr[3] * VFilter[3]) + |
+ (src_ptr[4] * VFilter[4]) + |
+ (src_ptr[5] * VFilter[5]) + |
+ (src_ptr[6] * VFilter[6]) + |
+ (src_ptr[7] * VFilter[7]) + |
+ (VP9_FILTER_WEIGHT >> 1); // Rounding |
+ |
+ // Normalize back to 0-255... |
+ *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); |
+ src_ptr += intermediate_height; |
+ } |
+ src_ptr += intermediate_next_stride; |
+ dst_ptr += dst_next_row_stride; |
+ } |
+} |
+ |
+void block2d_average_c(uint8_t *src, |
+ unsigned int src_stride, |
+ uint8_t *output_ptr, |
+ unsigned int output_stride, |
+ unsigned int output_width, |
+ unsigned int output_height) { |
+ unsigned int i, j; |
+ for (i = 0; i < output_height; ++i) { |
+ for (j = 0; j < output_width; ++j) { |
+ output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; |
+ } |
+ output_ptr += output_stride; |
+ } |
+} |
+ |
+void filter_average_block2d_8_c(const uint8_t *src_ptr, |
+ const unsigned int src_stride, |
+ const int16_t *HFilter, |
+ const int16_t *VFilter, |
+ uint8_t *dst_ptr, |
+ unsigned int dst_stride, |
+ unsigned int output_width, |
+ unsigned int output_height) { |
+ uint8_t tmp[kMaxDimension * kMaxDimension]; |
+ |
+ assert(output_width <= kMaxDimension); |
+ assert(output_height <= kMaxDimension); |
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, |
+ output_width, output_height); |
+ block2d_average_c(tmp, 64, dst_ptr, dst_stride, |
+ output_width, output_height); |
+} |
+ |
+#if CONFIG_VP9_HIGHBITDEPTH |
+void high_filter_block2d_8_c(const uint16_t *src_ptr, |
+ const unsigned int src_stride, |
+ const int16_t *HFilter, |
+ const int16_t *VFilter, |
+ uint16_t *dst_ptr, |
+ unsigned int dst_stride, |
+ unsigned int output_width, |
+ unsigned int output_height, |
+ int bd) { |
+ // Between passes, we use an intermediate buffer whose height is extended to |
+ // have enough horizontally filtered values as input for the vertical pass. |
+ // This buffer is allocated to be big enough for the largest block type we |
+ // support. |
+ const int kInterp_Extend = 4; |
+ const unsigned int intermediate_height = |
+ (kInterp_Extend - 1) + output_height + kInterp_Extend; |
+ |
/* Size of intermediate_buffer is max_intermediate_height * filter_max_width, |
* where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height |
* + kInterp_Extend |
@@ -76,12 +194,12 @@ |
* = 23 |
* and filter_max_width = 16 |
*/ |
- uint8_t intermediate_buffer[71 * 64]; |
+ uint16_t intermediate_buffer[71 * kMaxDimension]; |
const int intermediate_next_stride = 1 - intermediate_height * output_width; |
// Horizontal pass (src -> transposed intermediate). |
{ |
- uint8_t *output_ptr = intermediate_buffer; |
+ uint16_t *output_ptr = intermediate_buffer; |
const int src_next_row_stride = src_stride - output_width; |
unsigned int i, j; |
src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); |
@@ -99,7 +217,7 @@ |
(VP9_FILTER_WEIGHT >> 1); // Rounding |
// Normalize back to 0-255... |
- *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); |
+ *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd); |
++src_ptr; |
output_ptr += intermediate_height; |
} |
@@ -110,7 +228,7 @@ |
// Vertical pass (transposed intermediate -> dst). |
{ |
- uint8_t *src_ptr = intermediate_buffer; |
+ uint16_t *src_ptr = intermediate_buffer; |
const int dst_next_row_stride = dst_stride - output_width; |
unsigned int i, j; |
for (i = 0; i < output_height; ++i) { |
@@ -127,7 +245,7 @@ |
(VP9_FILTER_WEIGHT >> 1); // Rounding |
// Normalize back to 0-255... |
- *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); |
+ *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd); |
src_ptr += intermediate_height; |
} |
src_ptr += intermediate_next_stride; |
@@ -136,12 +254,13 @@ |
} |
} |
-void block2d_average_c(uint8_t *src, |
- unsigned int src_stride, |
- uint8_t *output_ptr, |
- unsigned int output_stride, |
- unsigned int output_width, |
- unsigned int output_height) { |
+void high_block2d_average_c(uint16_t *src, |
+ unsigned int src_stride, |
+ uint16_t *output_ptr, |
+ unsigned int output_stride, |
+ unsigned int output_width, |
+ unsigned int output_height, |
+ int bd) { |
unsigned int i, j; |
for (i = 0; i < output_height; ++i) { |
for (j = 0; j < output_width; ++j) { |
@@ -151,23 +270,25 @@ |
} |
} |
-void filter_average_block2d_8_c(const uint8_t *src_ptr, |
- const unsigned int src_stride, |
- const int16_t *HFilter, |
- const int16_t *VFilter, |
- uint8_t *dst_ptr, |
- unsigned int dst_stride, |
- unsigned int output_width, |
- unsigned int output_height) { |
- uint8_t tmp[64 * 64]; |
+void high_filter_average_block2d_8_c(const uint16_t *src_ptr, |
+ const unsigned int src_stride, |
+ const int16_t *HFilter, |
+ const int16_t *VFilter, |
+ uint16_t *dst_ptr, |
+ unsigned int dst_stride, |
+ unsigned int output_width, |
+ unsigned int output_height, |
+ int bd) { |
+ uint16_t tmp[kMaxDimension * kMaxDimension]; |
- assert(output_width <= 64); |
- assert(output_height <= 64); |
- filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, |
- output_width, output_height); |
- block2d_average_c(tmp, 64, dst_ptr, dst_stride, |
- output_width, output_height); |
+ assert(output_width <= kMaxDimension); |
+ assert(output_height <= kMaxDimension); |
+ high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, |
+ output_width, output_height, bd); |
+ high_block2d_average_c(tmp, 64, dst_ptr, dst_stride, |
+ output_width, output_height, bd); |
} |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { |
public: |
@@ -177,6 +298,13 @@ |
vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; |
output_ = reinterpret_cast<uint8_t*>( |
vpx_memalign(kDataAlignment, kOutputBufferSize)); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ input16_ = reinterpret_cast<uint16_t*>( |
+ vpx_memalign(kDataAlignment, |
+ (kInputBufferSize + 1) * sizeof(uint16_t))) + 1; |
+ output16_ = reinterpret_cast<uint16_t*>( |
+ vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); |
+#endif |
} |
static void TearDownTestCase() { |
@@ -184,6 +312,12 @@ |
input_ = NULL; |
vpx_free(output_); |
output_ = NULL; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ vpx_free(input16_ - 1); |
+ input16_ = NULL; |
+ vpx_free(output16_); |
+ output16_ = NULL; |
+#endif |
} |
protected: |
@@ -191,7 +325,6 @@ |
static const int kOuterBlockSize = 256; |
static const int kInputStride = kOuterBlockSize; |
static const int kOutputStride = kOuterBlockSize; |
- static const int kMaxDimension = 64; |
static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; |
static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; |
@@ -212,6 +345,12 @@ |
virtual void SetUp() { |
UUT_ = GET_PARAM(2); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ != 0) |
+ mask_ = (1 << UUT_->use_high_bd_) - 1; |
+ else |
+ mask_ = 255; |
+#endif |
/* Set up guard blocks for an inner block centered in the outer block */ |
for (int i = 0; i < kOutputBufferSize; ++i) { |
if (IsIndexInBorder(i)) |
@@ -222,15 +361,25 @@ |
::libvpx_test::ACMRandom prng; |
for (int i = 0; i < kInputBufferSize; ++i) { |
- if (i & 1) |
+ if (i & 1) { |
input_[i] = 255; |
- else |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ input16_[i] = mask_; |
+#endif |
+ } else { |
input_[i] = prng.Rand8Extremes(); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ input16_[i] = prng.Rand16() & mask_; |
+#endif |
+ } |
} |
} |
void SetConstantInput(int value) { |
memset(input_, value, kInputBufferSize); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ vpx_memset16(input16_, value, kInputBufferSize); |
+#endif |
} |
void CheckGuardBlocks() { |
@@ -240,20 +389,123 @@ |
} |
} |
- uint8_t* input() const { |
+ uint8_t *input() const { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0) { |
+ return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
+ } else { |
+ return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize + |
+ BorderLeft()); |
+ } |
+#else |
return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
+#endif |
} |
- uint8_t* output() const { |
+ uint8_t *output() const { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0) { |
+ return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
+ } else { |
+ return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize + |
+ BorderLeft()); |
+ } |
+#else |
return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
+#endif |
} |
+ uint16_t lookup(uint8_t *list, int index) const { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0) { |
+ return list[index]; |
+ } else { |
+ return CONVERT_TO_SHORTPTR(list)[index]; |
+ } |
+#else |
+ return list[index]; |
+#endif |
+ } |
+ |
+ void assign_val(uint8_t *list, int index, uint16_t val) const { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0) { |
+ list[index] = (uint8_t) val; |
+ } else { |
+ CONVERT_TO_SHORTPTR(list)[index] = val; |
+ } |
+#else |
+ list[index] = (uint8_t) val; |
+#endif |
+ } |
+ |
+ void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr, |
+ const unsigned int src_stride, |
+ const int16_t *HFilter, |
+ const int16_t *VFilter, |
+ uint8_t *dst_ptr, |
+ unsigned int dst_stride, |
+ unsigned int output_width, |
+ unsigned int output_height) { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0) { |
+ filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
+ dst_ptr, dst_stride, output_width, |
+ output_height); |
+ } else { |
+ high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, |
+ HFilter, VFilter, |
+ CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, |
+ output_width, output_height, |
+ UUT_->use_high_bd_); |
+ } |
+#else |
+ filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
+ dst_ptr, dst_stride, output_width, |
+ output_height); |
+#endif |
+ } |
+ |
+ void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, |
+ const unsigned int src_stride, |
+ const int16_t *HFilter, |
+ const int16_t *VFilter, |
+ uint8_t *dst_ptr, |
+ unsigned int dst_stride, |
+ unsigned int output_width, |
+ unsigned int output_height) { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0) { |
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
+ dst_ptr, dst_stride, output_width, output_height); |
+ } else { |
+ high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, |
+ HFilter, VFilter, |
+ CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, |
+ output_width, output_height, UUT_->use_high_bd_); |
+ } |
+#else |
+ filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
+ dst_ptr, dst_stride, output_width, output_height); |
+#endif |
+ } |
+ |
const ConvolveFunctions* UUT_; |
static uint8_t* input_; |
static uint8_t* output_; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ static uint16_t* input16_; |
+ static uint16_t* output16_; |
+ int mask_; |
+#endif |
}; |
+ |
uint8_t* ConvolveTest::input_ = NULL; |
uint8_t* ConvolveTest::output_ = NULL; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+uint16_t* ConvolveTest::input16_ = NULL; |
+uint16_t* ConvolveTest::output16_ = NULL; |
+#endif |
TEST_P(ConvolveTest, GuardBlocks) { |
CheckGuardBlocks(); |
@@ -272,7 +524,8 @@ |
for (int y = 0; y < Height(); ++y) |
for (int x = 0; x < Width(); ++x) |
- ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) |
+ ASSERT_EQ(lookup(out, y * kOutputStride + x), |
+ lookup(in, y * kInputStride + x)) |
<< "(" << x << "," << y << ")"; |
} |
@@ -289,7 +542,8 @@ |
for (int y = 0; y < Height(); ++y) |
for (int x = 0; x < Width(); ++x) |
- ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) |
+ ASSERT_EQ(lookup(out, y * kOutputStride + x), |
+ lookup(in, y * kInputStride + x)) |
<< "(" << x << "," << y << ")"; |
} |
@@ -306,7 +560,8 @@ |
for (int y = 0; y < Height(); ++y) |
for (int x = 0; x < Width(); ++x) |
- ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) |
+ ASSERT_EQ(lookup(out, y * kOutputStride + x), |
+ lookup(in, y * kInputStride + x)) |
<< "(" << x << "," << y << ")"; |
} |
@@ -339,9 +594,19 @@ |
TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { |
uint8_t* const in = input(); |
uint8_t* const out = output(); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ uint8_t ref8[kOutputStride * kMaxDimension]; |
+ uint16_t ref16[kOutputStride * kMaxDimension]; |
+ uint8_t* ref; |
+ if (UUT_->use_high_bd_ == 0) { |
+ ref = ref8; |
+ } else { |
+ ref = CONVERT_TO_BYTEPTR(ref16); |
+ } |
+#else |
uint8_t ref[kOutputStride * kMaxDimension]; |
+#endif |
- |
for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { |
const InterpKernel *filters = |
vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); |
@@ -350,10 +615,10 @@ |
for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { |
for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { |
- filter_block2d_8_c(in, kInputStride, |
- filters[filter_x], filters[filter_y], |
- ref, kOutputStride, |
- Width(), Height()); |
+ wrapper_filter_block2d_8_c(in, kInputStride, |
+ filters[filter_x], filters[filter_y], |
+ ref, kOutputStride, |
+ Width(), Height()); |
if (filters == eighttap_smooth || (filter_x && filter_y)) |
ASM_REGISTER_STATE_CHECK( |
@@ -375,7 +640,8 @@ |
for (int y = 0; y < Height(); ++y) |
for (int x = 0; x < Width(); ++x) |
- ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x]) |
+ ASSERT_EQ(lookup(ref, y * kOutputStride + x), |
+ lookup(out, y * kOutputStride + x)) |
<< "mismatch at (" << x << "," << y << "), " |
<< "filters (" << filter_bank << "," |
<< filter_x << "," << filter_y << ")"; |
@@ -387,16 +653,36 @@ |
TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { |
uint8_t* const in = input(); |
uint8_t* const out = output(); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ uint8_t ref8[kOutputStride * kMaxDimension]; |
+ uint16_t ref16[kOutputStride * kMaxDimension]; |
+ uint8_t* ref; |
+ if (UUT_->use_high_bd_ == 0) { |
+ ref = ref8; |
+ } else { |
+ ref = CONVERT_TO_BYTEPTR(ref16); |
+ } |
+#else |
uint8_t ref[kOutputStride * kMaxDimension]; |
+#endif |
// Populate ref and out with some random data |
::libvpx_test::ACMRandom prng; |
for (int y = 0; y < Height(); ++y) { |
for (int x = 0; x < Width(); ++x) { |
- const uint8_t r = prng.Rand8Extremes(); |
+ uint16_t r; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) { |
+ r = prng.Rand8Extremes(); |
+ } else { |
+ r = prng.Rand16() & mask_; |
+ } |
+#else |
+ r = prng.Rand8Extremes(); |
+#endif |
- out[y * kOutputStride + x] = r; |
- ref[y * kOutputStride + x] = r; |
+ assign_val(out, y * kOutputStride + x, r); |
+ assign_val(ref, y * kOutputStride + x, r); |
} |
} |
@@ -408,10 +694,10 @@ |
for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { |
for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { |
- filter_average_block2d_8_c(in, kInputStride, |
- filters[filter_x], filters[filter_y], |
- ref, kOutputStride, |
- Width(), Height()); |
+ wrapper_filter_average_block2d_8_c(in, kInputStride, |
+ filters[filter_x], filters[filter_y], |
+ ref, kOutputStride, |
+ Width(), Height()); |
if (filters == eighttap_smooth || (filter_x && filter_y)) |
ASM_REGISTER_STATE_CHECK( |
@@ -433,7 +719,8 @@ |
for (int y = 0; y < Height(); ++y) |
for (int x = 0; x < Width(); ++x) |
- ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x]) |
+ ASSERT_EQ(lookup(ref, y * kOutputStride + x), |
+ lookup(out, y * kOutputStride + x)) |
<< "mismatch at (" << x << "," << y << "), " |
<< "filters (" << filter_bank << "," |
<< filter_x << "," << filter_y << ")"; |
@@ -442,6 +729,103 @@ |
} |
} |
+TEST_P(ConvolveTest, FilterExtremes) { |
+ uint8_t *const in = input(); |
+ uint8_t *const out = output(); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ uint8_t ref8[kOutputStride * kMaxDimension]; |
+ uint16_t ref16[kOutputStride * kMaxDimension]; |
+ uint8_t *ref; |
+ if (UUT_->use_high_bd_ == 0) { |
+ ref = ref8; |
+ } else { |
+ ref = CONVERT_TO_BYTEPTR(ref16); |
+ } |
+#else |
+ uint8_t ref[kOutputStride * kMaxDimension]; |
+#endif |
+ |
+ // Populate ref and out with some random data |
+ ::libvpx_test::ACMRandom prng; |
+ for (int y = 0; y < Height(); ++y) { |
+ for (int x = 0; x < Width(); ++x) { |
+ uint16_t r; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) { |
+ r = prng.Rand8Extremes(); |
+ } else { |
+ r = prng.Rand16() & mask_; |
+ } |
+#else |
+ r = prng.Rand8Extremes(); |
+#endif |
+ assign_val(out, y * kOutputStride + x, r); |
+ assign_val(ref, y * kOutputStride + x, r); |
+ } |
+ } |
+ |
+ for (int axis = 0; axis < 2; axis++) { |
+ int seed_val = 0; |
+ while (seed_val < 256) { |
+ for (int y = 0; y < 8; ++y) { |
+ for (int x = 0; x < 8; ++x) { |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, |
+ ((seed_val >> (axis ? y : x)) & 1) * mask_); |
+#else |
+ assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, |
+ ((seed_val >> (axis ? y : x)) & 1) * 255); |
+#endif |
+ if (axis) seed_val++; |
+ } |
+ if (axis) |
+ seed_val-= 8; |
+ else |
+ seed_val++; |
+ } |
+ if (axis) seed_val += 8; |
+ |
+ for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { |
+ const InterpKernel *filters = |
+ vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); |
+ const InterpKernel *const eighttap_smooth = |
+ vp9_get_interp_kernel(EIGHTTAP_SMOOTH); |
+ for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { |
+ for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { |
+ wrapper_filter_block2d_8_c(in, kInputStride, |
+ filters[filter_x], filters[filter_y], |
+ ref, kOutputStride, |
+ Width(), Height()); |
+ if (filters == eighttap_smooth || (filter_x && filter_y)) |
+ ASM_REGISTER_STATE_CHECK( |
+ UUT_->hv8_(in, kInputStride, out, kOutputStride, |
+ filters[filter_x], 16, filters[filter_y], 16, |
+ Width(), Height())); |
+ else if (filter_y) |
+ ASM_REGISTER_STATE_CHECK( |
+ UUT_->v8_(in, kInputStride, out, kOutputStride, |
+ kInvalidFilter, 16, filters[filter_y], 16, |
+ Width(), Height())); |
+ else |
+ ASM_REGISTER_STATE_CHECK( |
+ UUT_->h8_(in, kInputStride, out, kOutputStride, |
+ filters[filter_x], 16, kInvalidFilter, 16, |
+ Width(), Height())); |
+ |
+ for (int y = 0; y < Height(); ++y) |
+ for (int x = 0; x < Width(); ++x) |
+ ASSERT_EQ(lookup(ref, y * kOutputStride + x), |
+ lookup(out, y * kOutputStride + x)) |
+ << "mismatch at (" << x << "," << y << "), " |
+ << "filters (" << filter_bank << "," |
+ << filter_x << "," << filter_y << ")"; |
+ } |
+ } |
+ } |
+ } |
+ } |
+} |
+ |
DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = { |
{ 0, 0, 0, 0, 0, 0, 0, 128}, |
{ 0, 0, 0, 0, 0, 0, 128}, |
@@ -505,7 +889,8 @@ |
kPixelSelected + ((kInitialSubPelOffset |
+ kFilterPeriodAdjust * kInputPixelStep) |
>> SUBPEL_BITS); |
- ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width(); |
+ ASSERT_EQ(lookup(in, ref_x), lookup(out, x)) |
+ << "x == " << x << "width = " << Width(); |
} |
/* Test the vertical filter. */ |
@@ -520,7 +905,8 @@ |
kPixelSelected + ((kInitialSubPelOffset |
+ kFilterPeriodAdjust * kInputPixelStep) |
>> SUBPEL_BITS); |
- ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y; |
+ ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride)) |
+ << "y == " << y; |
} |
/* Test the horizontal and vertical filters in combination. */ |
@@ -543,7 +929,8 @@ |
+ kFilterPeriodAdjustX * kInputPixelStep) |
>> SUBPEL_BITS); |
- ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x]) |
+ ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x), |
+ lookup(out, y * kOutputStride + x)) |
<< "x == " << x << ", y == " << y; |
} |
} |
@@ -570,7 +957,8 @@ |
for (int y = 0; y < Height(); ++y) { |
for (int x = 0; x < Width(); ++x) { |
- ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x]) |
+ ASSERT_EQ(lookup(in, y * kInputStride + x), |
+ lookup(out, y * kOutputStride + x)) |
<< "x == " << x << ", y == " << y |
<< ", frac == " << frac << ", step == " << step; |
} |
@@ -581,10 +969,480 @@ |
using std::tr1::make_tuple; |
+#if CONFIG_VP9_HIGHBITDEPTH |
+#if HAVE_SSE2 && ARCH_X86_64 |
+void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 8); |
+} |
+ |
+void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 8); |
+} |
+ |
+void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 10); |
+} |
+ |
+void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 12); |
+} |
+ |
+void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 12); |
+} |
+ |
+void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 12); |
+} |
+#endif // HAVE_SSE2 && ARCH_X86_64 |
+ |
+void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
+} |
+ |
+void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 8); |
+} |
+ |
+void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 10); |
+} |
+ |
+void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
+} |
+ |
+void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
+void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride, |
+ uint8_t *dst, ptrdiff_t dst_stride, |
+ const int16_t *filter_x, |
+ int filter_x_stride, |
+ const int16_t *filter_y, |
+ int filter_y_stride, |
+ int w, int h) { |
+ vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, |
+ filter_x_stride, filter_y, filter_y_stride, |
+ w, h, 12); |
+} |
+ |
const ConvolveFunctions convolve8_c( |
+ wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, |
+ wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, |
+ wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); |
+INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values( |
+ make_tuple(4, 4, &convolve8_c), |
+ make_tuple(8, 4, &convolve8_c), |
+ make_tuple(4, 8, &convolve8_c), |
+ make_tuple(8, 8, &convolve8_c), |
+ make_tuple(16, 8, &convolve8_c), |
+ make_tuple(8, 16, &convolve8_c), |
+ make_tuple(16, 16, &convolve8_c), |
+ make_tuple(32, 16, &convolve8_c), |
+ make_tuple(16, 32, &convolve8_c), |
+ make_tuple(32, 32, &convolve8_c), |
+ make_tuple(64, 32, &convolve8_c), |
+ make_tuple(32, 64, &convolve8_c), |
+ make_tuple(64, 64, &convolve8_c))); |
+const ConvolveFunctions convolve10_c( |
+ wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, |
+ wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, |
+ wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); |
+INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values( |
+ make_tuple(4, 4, &convolve10_c), |
+ make_tuple(8, 4, &convolve10_c), |
+ make_tuple(4, 8, &convolve10_c), |
+ make_tuple(8, 8, &convolve10_c), |
+ make_tuple(16, 8, &convolve10_c), |
+ make_tuple(8, 16, &convolve10_c), |
+ make_tuple(16, 16, &convolve10_c), |
+ make_tuple(32, 16, &convolve10_c), |
+ make_tuple(16, 32, &convolve10_c), |
+ make_tuple(32, 32, &convolve10_c), |
+ make_tuple(64, 32, &convolve10_c), |
+ make_tuple(32, 64, &convolve10_c), |
+ make_tuple(64, 64, &convolve10_c))); |
+const ConvolveFunctions convolve12_c( |
+ wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, |
+ wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, |
+ wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); |
+INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values( |
+ make_tuple(4, 4, &convolve12_c), |
+ make_tuple(8, 4, &convolve12_c), |
+ make_tuple(4, 8, &convolve12_c), |
+ make_tuple(8, 8, &convolve12_c), |
+ make_tuple(16, 8, &convolve12_c), |
+ make_tuple(8, 16, &convolve12_c), |
+ make_tuple(16, 16, &convolve12_c), |
+ make_tuple(32, 16, &convolve12_c), |
+ make_tuple(16, 32, &convolve12_c), |
+ make_tuple(32, 32, &convolve12_c), |
+ make_tuple(64, 32, &convolve12_c), |
+ make_tuple(32, 64, &convolve12_c), |
+ make_tuple(64, 64, &convolve12_c))); |
+ |
+#else |
+ |
+const ConvolveFunctions convolve8_c( |
vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c, |
vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c, |
- vp9_convolve8_c, vp9_convolve8_avg_c); |
+ vp9_convolve8_c, vp9_convolve8_avg_c, 0); |
INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_c), |
@@ -600,12 +1458,69 @@ |
make_tuple(64, 32, &convolve8_c), |
make_tuple(32, 64, &convolve8_c), |
make_tuple(64, 64, &convolve8_c))); |
+#endif |
-#if HAVE_SSE2 |
+#if HAVE_SSE2 && ARCH_X86_64 |
+#if CONFIG_VP9_HIGHBITDEPTH |
const ConvolveFunctions convolve8_sse2( |
+ wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, |
+ wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, |
+ wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); |
+INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values( |
+ make_tuple(4, 4, &convolve8_sse2), |
+ make_tuple(8, 4, &convolve8_sse2), |
+ make_tuple(4, 8, &convolve8_sse2), |
+ make_tuple(8, 8, &convolve8_sse2), |
+ make_tuple(16, 8, &convolve8_sse2), |
+ make_tuple(8, 16, &convolve8_sse2), |
+ make_tuple(16, 16, &convolve8_sse2), |
+ make_tuple(32, 16, &convolve8_sse2), |
+ make_tuple(16, 32, &convolve8_sse2), |
+ make_tuple(32, 32, &convolve8_sse2), |
+ make_tuple(64, 32, &convolve8_sse2), |
+ make_tuple(32, 64, &convolve8_sse2), |
+ make_tuple(64, 64, &convolve8_sse2))); |
+const ConvolveFunctions convolve10_sse2( |
+ wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, |
+ wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, |
+ wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); |
+INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values( |
+ make_tuple(4, 4, &convolve10_sse2), |
+ make_tuple(8, 4, &convolve10_sse2), |
+ make_tuple(4, 8, &convolve10_sse2), |
+ make_tuple(8, 8, &convolve10_sse2), |
+ make_tuple(16, 8, &convolve10_sse2), |
+ make_tuple(8, 16, &convolve10_sse2), |
+ make_tuple(16, 16, &convolve10_sse2), |
+ make_tuple(32, 16, &convolve10_sse2), |
+ make_tuple(16, 32, &convolve10_sse2), |
+ make_tuple(32, 32, &convolve10_sse2), |
+ make_tuple(64, 32, &convolve10_sse2), |
+ make_tuple(32, 64, &convolve10_sse2), |
+ make_tuple(64, 64, &convolve10_sse2))); |
+const ConvolveFunctions convolve12_sse2( |
+ wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, |
+ wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, |
+ wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); |
+INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values( |
+ make_tuple(4, 4, &convolve12_sse2), |
+ make_tuple(8, 4, &convolve12_sse2), |
+ make_tuple(4, 8, &convolve12_sse2), |
+ make_tuple(8, 8, &convolve12_sse2), |
+ make_tuple(16, 8, &convolve12_sse2), |
+ make_tuple(8, 16, &convolve12_sse2), |
+ make_tuple(16, 16, &convolve12_sse2), |
+ make_tuple(32, 16, &convolve12_sse2), |
+ make_tuple(16, 32, &convolve12_sse2), |
+ make_tuple(32, 32, &convolve12_sse2), |
+ make_tuple(64, 32, &convolve12_sse2), |
+ make_tuple(32, 64, &convolve12_sse2), |
+ make_tuple(64, 64, &convolve12_sse2))); |
+#else |
+const ConvolveFunctions convolve8_sse2( |
vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2, |
vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2, |
- vp9_convolve8_sse2, vp9_convolve8_avg_sse2); |
+ vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0); |
INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_sse2), |
@@ -621,13 +1536,14 @@ |
make_tuple(64, 32, &convolve8_sse2), |
make_tuple(32, 64, &convolve8_sse2), |
make_tuple(64, 64, &convolve8_sse2))); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
#endif |
#if HAVE_SSSE3 |
const ConvolveFunctions convolve8_ssse3( |
vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, |
vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, |
- vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3); |
+ vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0); |
INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_ssse3), |
@@ -645,11 +1561,11 @@ |
make_tuple(64, 64, &convolve8_ssse3))); |
#endif |
-#if HAVE_AVX2 |
+#if HAVE_AVX2 && HAVE_SSSE3 |
const ConvolveFunctions convolve8_avx2( |
vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, |
vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, |
- vp9_convolve8_avx2, vp9_convolve8_avg_ssse3); |
+ vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0); |
INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_avx2), |
@@ -665,13 +1581,13 @@ |
make_tuple(64, 32, &convolve8_avx2), |
make_tuple(32, 64, &convolve8_avx2), |
make_tuple(64, 64, &convolve8_avx2))); |
-#endif |
+#endif // HAVE_AVX2 && HAVE_SSSE3 |
#if HAVE_NEON_ASM |
const ConvolveFunctions convolve8_neon( |
vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, |
vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, |
- vp9_convolve8_neon, vp9_convolve8_avg_neon); |
+ vp9_convolve8_neon, vp9_convolve8_avg_neon, 0); |
INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_neon), |
@@ -693,7 +1609,7 @@ |
const ConvolveFunctions convolve8_dspr2( |
vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2, |
vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2, |
- vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2); |
+ vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0); |
INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values( |
make_tuple(4, 4, &convolve8_dspr2), |