OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <string.h> | 11 #include <string.h> |
12 #include "test/acm_random.h" | 12 #include "test/acm_random.h" |
13 #include "test/register_state_check.h" | 13 #include "test/register_state_check.h" |
14 #include "test/util.h" | 14 #include "test/util.h" |
15 #include "third_party/googletest/src/include/gtest/gtest.h" | 15 #include "third_party/googletest/src/include/gtest/gtest.h" |
16 | 16 |
17 #include "./vpx_config.h" | 17 #include "./vpx_config.h" |
18 #include "./vp9_rtcd.h" | 18 #include "./vp9_rtcd.h" |
19 #include "vp9/common/vp9_filter.h" | 19 #include "vp9/common/vp9_filter.h" |
20 #include "vpx_mem/vpx_mem.h" | 20 #include "vpx_mem/vpx_mem.h" |
21 #include "vpx_ports/mem.h" | 21 #include "vpx_ports/mem.h" |
22 | 22 |
23 namespace { | 23 namespace { |
| 24 |
| 25 static const unsigned int kMaxDimension = 64; |
| 26 |
24 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, | 27 typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, |
25 uint8_t *dst, ptrdiff_t dst_stride, | 28 uint8_t *dst, ptrdiff_t dst_stride, |
26 const int16_t *filter_x, int filter_x_stride, | 29 const int16_t *filter_x, int filter_x_stride, |
27 const int16_t *filter_y, int filter_y_stride, | 30 const int16_t *filter_y, int filter_y_stride, |
28 int w, int h); | 31 int w, int h); |
29 | 32 |
30 struct ConvolveFunctions { | 33 struct ConvolveFunctions { |
31 ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg, | 34 ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg, |
32 ConvolveFunc v8, ConvolveFunc v8_avg, | 35 ConvolveFunc v8, ConvolveFunc v8_avg, |
33 ConvolveFunc hv8, ConvolveFunc hv8_avg) | 36 ConvolveFunc hv8, ConvolveFunc hv8_avg, |
| 37 int bd) |
34 : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg), | 38 : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg), |
35 hv8_avg_(hv8_avg) {} | 39 hv8_avg_(hv8_avg), use_high_bd_(bd) {} |
36 | 40 |
37 ConvolveFunc h8_; | 41 ConvolveFunc h8_; |
38 ConvolveFunc v8_; | 42 ConvolveFunc v8_; |
39 ConvolveFunc hv8_; | 43 ConvolveFunc hv8_; |
40 ConvolveFunc h8_avg_; | 44 ConvolveFunc h8_avg_; |
41 ConvolveFunc v8_avg_; | 45 ConvolveFunc v8_avg_; |
42 ConvolveFunc hv8_avg_; | 46 ConvolveFunc hv8_avg_; |
| 47 int use_high_bd_; // 0 if high bitdepth not used, else the actual bit depth. |
43 }; | 48 }; |
44 | 49 |
45 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; | 50 typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam; |
46 | 51 |
47 // Reference 8-tap subpixel filter, slightly modified to fit into this test. | 52 // Reference 8-tap subpixel filter, slightly modified to fit into this test. |
48 #define VP9_FILTER_WEIGHT 128 | 53 #define VP9_FILTER_WEIGHT 128 |
49 #define VP9_FILTER_SHIFT 7 | 54 #define VP9_FILTER_SHIFT 7 |
50 uint8_t clip_pixel(int x) { | 55 uint8_t clip_pixel(int x) { |
51 return x < 0 ? 0 : | 56 return x < 0 ? 0 : |
52 x > 255 ? 255 : | 57 x > 255 ? 255 : |
53 x; | 58 x; |
54 } | 59 } |
55 | 60 |
56 void filter_block2d_8_c(const uint8_t *src_ptr, | 61 void filter_block2d_8_c(const uint8_t *src_ptr, |
57 const unsigned int src_stride, | 62 const unsigned int src_stride, |
58 const int16_t *HFilter, | 63 const int16_t *HFilter, |
59 const int16_t *VFilter, | 64 const int16_t *VFilter, |
60 uint8_t *dst_ptr, | 65 uint8_t *dst_ptr, |
61 unsigned int dst_stride, | 66 unsigned int dst_stride, |
62 unsigned int output_width, | 67 unsigned int output_width, |
63 unsigned int output_height) { | 68 unsigned int output_height) { |
64 // Between passes, we use an intermediate buffer whose height is extended to | 69 // Between passes, we use an intermediate buffer whose height is extended to |
65 // have enough horizontally filtered values as input for the vertical pass. | 70 // have enough horizontally filtered values as input for the vertical pass. |
66 // This buffer is allocated to be big enough for the largest block type we | 71 // This buffer is allocated to be big enough for the largest block type we |
67 // support. | 72 // support. |
68 const int kInterp_Extend = 4; | 73 const int kInterp_Extend = 4; |
69 const unsigned int intermediate_height = | 74 const unsigned int intermediate_height = |
70 (kInterp_Extend - 1) + output_height + kInterp_Extend; | 75 (kInterp_Extend - 1) + output_height + kInterp_Extend; |
| 76 unsigned int i, j; |
| 77 |
| 78 // Size of intermediate_buffer is max_intermediate_height * filter_max_width, |
| 79 // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height |
| 80 // + kInterp_Extend |
| 81 // = 3 + 16 + 4 |
| 82 // = 23 |
| 83 // and filter_max_width = 16 |
| 84 // |
| 85 uint8_t intermediate_buffer[71 * kMaxDimension]; |
| 86 const int intermediate_next_stride = 1 - intermediate_height * output_width; |
| 87 |
| 88 // Horizontal pass (src -> transposed intermediate). |
| 89 uint8_t *output_ptr = intermediate_buffer; |
| 90 const int src_next_row_stride = src_stride - output_width; |
| 91 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); |
| 92 for (i = 0; i < intermediate_height; ++i) { |
| 93 for (j = 0; j < output_width; ++j) { |
| 94 // Apply filter... |
| 95 const int temp = (src_ptr[0] * HFilter[0]) + |
| 96 (src_ptr[1] * HFilter[1]) + |
| 97 (src_ptr[2] * HFilter[2]) + |
| 98 (src_ptr[3] * HFilter[3]) + |
| 99 (src_ptr[4] * HFilter[4]) + |
| 100 (src_ptr[5] * HFilter[5]) + |
| 101 (src_ptr[6] * HFilter[6]) + |
| 102 (src_ptr[7] * HFilter[7]) + |
| 103 (VP9_FILTER_WEIGHT >> 1); // Rounding |
| 104 |
| 105 // Normalize back to 0-255... |
| 106 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); |
| 107 ++src_ptr; |
| 108 output_ptr += intermediate_height; |
| 109 } |
| 110 src_ptr += src_next_row_stride; |
| 111 output_ptr += intermediate_next_stride; |
| 112 } |
| 113 |
| 114 // Vertical pass (transposed intermediate -> dst). |
| 115 src_ptr = intermediate_buffer; |
| 116 const int dst_next_row_stride = dst_stride - output_width; |
| 117 for (i = 0; i < output_height; ++i) { |
| 118 for (j = 0; j < output_width; ++j) { |
| 119 // Apply filter... |
| 120 const int temp = (src_ptr[0] * VFilter[0]) + |
| 121 (src_ptr[1] * VFilter[1]) + |
| 122 (src_ptr[2] * VFilter[2]) + |
| 123 (src_ptr[3] * VFilter[3]) + |
| 124 (src_ptr[4] * VFilter[4]) + |
| 125 (src_ptr[5] * VFilter[5]) + |
| 126 (src_ptr[6] * VFilter[6]) + |
| 127 (src_ptr[7] * VFilter[7]) + |
| 128 (VP9_FILTER_WEIGHT >> 1); // Rounding |
| 129 |
| 130 // Normalize back to 0-255... |
| 131 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); |
| 132 src_ptr += intermediate_height; |
| 133 } |
| 134 src_ptr += intermediate_next_stride; |
| 135 dst_ptr += dst_next_row_stride; |
| 136 } |
| 137 } |
| 138 |
| 139 void block2d_average_c(uint8_t *src, |
| 140 unsigned int src_stride, |
| 141 uint8_t *output_ptr, |
| 142 unsigned int output_stride, |
| 143 unsigned int output_width, |
| 144 unsigned int output_height) { |
| 145 unsigned int i, j; |
| 146 for (i = 0; i < output_height; ++i) { |
| 147 for (j = 0; j < output_width; ++j) { |
| 148 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; |
| 149 } |
| 150 output_ptr += output_stride; |
| 151 } |
| 152 } |
| 153 |
| 154 void filter_average_block2d_8_c(const uint8_t *src_ptr, |
| 155 const unsigned int src_stride, |
| 156 const int16_t *HFilter, |
| 157 const int16_t *VFilter, |
| 158 uint8_t *dst_ptr, |
| 159 unsigned int dst_stride, |
| 160 unsigned int output_width, |
| 161 unsigned int output_height) { |
| 162 uint8_t tmp[kMaxDimension * kMaxDimension]; |
| 163 |
| 164 assert(output_width <= kMaxDimension); |
| 165 assert(output_height <= kMaxDimension); |
| 166 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, |
| 167 output_width, output_height); |
| 168 block2d_average_c(tmp, 64, dst_ptr, dst_stride, |
| 169 output_width, output_height); |
| 170 } |
| 171 |
| 172 #if CONFIG_VP9_HIGHBITDEPTH |
| 173 void high_filter_block2d_8_c(const uint16_t *src_ptr, |
| 174 const unsigned int src_stride, |
| 175 const int16_t *HFilter, |
| 176 const int16_t *VFilter, |
| 177 uint16_t *dst_ptr, |
| 178 unsigned int dst_stride, |
| 179 unsigned int output_width, |
| 180 unsigned int output_height, |
| 181 int bd) { |
| 182 // Between passes, we use an intermediate buffer whose height is extended to |
| 183 // have enough horizontally filtered values as input for the vertical pass. |
| 184 // This buffer is allocated to be big enough for the largest block type we |
| 185 // support. |
| 186 const int kInterp_Extend = 4; |
| 187 const unsigned int intermediate_height = |
| 188 (kInterp_Extend - 1) + output_height + kInterp_Extend; |
71 | 189 |
72 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, | 190 /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, |
73 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height | 191 * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height |
74 * + kInterp_Extend | 192 * + kInterp_Extend |
75 * = 3 + 16 + 4 | 193 * = 3 + 16 + 4 |
76 * = 23 | 194 * = 23 |
77 * and filter_max_width = 16 | 195 * and filter_max_width = 16 |
78 */ | 196 */ |
79 uint8_t intermediate_buffer[71 * 64]; | 197 uint16_t intermediate_buffer[71 * kMaxDimension]; |
80 const int intermediate_next_stride = 1 - intermediate_height * output_width; | 198 const int intermediate_next_stride = 1 - intermediate_height * output_width; |
81 | 199 |
82 // Horizontal pass (src -> transposed intermediate). | 200 // Horizontal pass (src -> transposed intermediate). |
83 { | 201 { |
84 uint8_t *output_ptr = intermediate_buffer; | 202 uint16_t *output_ptr = intermediate_buffer; |
85 const int src_next_row_stride = src_stride - output_width; | 203 const int src_next_row_stride = src_stride - output_width; |
86 unsigned int i, j; | 204 unsigned int i, j; |
87 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); | 205 src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); |
88 for (i = 0; i < intermediate_height; ++i) { | 206 for (i = 0; i < intermediate_height; ++i) { |
89 for (j = 0; j < output_width; ++j) { | 207 for (j = 0; j < output_width; ++j) { |
90 // Apply filter... | 208 // Apply filter... |
91 const int temp = (src_ptr[0] * HFilter[0]) + | 209 const int temp = (src_ptr[0] * HFilter[0]) + |
92 (src_ptr[1] * HFilter[1]) + | 210 (src_ptr[1] * HFilter[1]) + |
93 (src_ptr[2] * HFilter[2]) + | 211 (src_ptr[2] * HFilter[2]) + |
94 (src_ptr[3] * HFilter[3]) + | 212 (src_ptr[3] * HFilter[3]) + |
95 (src_ptr[4] * HFilter[4]) + | 213 (src_ptr[4] * HFilter[4]) + |
96 (src_ptr[5] * HFilter[5]) + | 214 (src_ptr[5] * HFilter[5]) + |
97 (src_ptr[6] * HFilter[6]) + | 215 (src_ptr[6] * HFilter[6]) + |
98 (src_ptr[7] * HFilter[7]) + | 216 (src_ptr[7] * HFilter[7]) + |
99 (VP9_FILTER_WEIGHT >> 1); // Rounding | 217 (VP9_FILTER_WEIGHT >> 1); // Rounding |
100 | 218 |
101 // Normalize back to 0-255... | 219 // Normalize back to 0-255... |
102 *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); | 220 *output_ptr = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd); |
103 ++src_ptr; | 221 ++src_ptr; |
104 output_ptr += intermediate_height; | 222 output_ptr += intermediate_height; |
105 } | 223 } |
106 src_ptr += src_next_row_stride; | 224 src_ptr += src_next_row_stride; |
107 output_ptr += intermediate_next_stride; | 225 output_ptr += intermediate_next_stride; |
108 } | 226 } |
109 } | 227 } |
110 | 228 |
111 // Vertical pass (transposed intermediate -> dst). | 229 // Vertical pass (transposed intermediate -> dst). |
112 { | 230 { |
113 uint8_t *src_ptr = intermediate_buffer; | 231 uint16_t *src_ptr = intermediate_buffer; |
114 const int dst_next_row_stride = dst_stride - output_width; | 232 const int dst_next_row_stride = dst_stride - output_width; |
115 unsigned int i, j; | 233 unsigned int i, j; |
116 for (i = 0; i < output_height; ++i) { | 234 for (i = 0; i < output_height; ++i) { |
117 for (j = 0; j < output_width; ++j) { | 235 for (j = 0; j < output_width; ++j) { |
118 // Apply filter... | 236 // Apply filter... |
119 const int temp = (src_ptr[0] * VFilter[0]) + | 237 const int temp = (src_ptr[0] * VFilter[0]) + |
120 (src_ptr[1] * VFilter[1]) + | 238 (src_ptr[1] * VFilter[1]) + |
121 (src_ptr[2] * VFilter[2]) + | 239 (src_ptr[2] * VFilter[2]) + |
122 (src_ptr[3] * VFilter[3]) + | 240 (src_ptr[3] * VFilter[3]) + |
123 (src_ptr[4] * VFilter[4]) + | 241 (src_ptr[4] * VFilter[4]) + |
124 (src_ptr[5] * VFilter[5]) + | 242 (src_ptr[5] * VFilter[5]) + |
125 (src_ptr[6] * VFilter[6]) + | 243 (src_ptr[6] * VFilter[6]) + |
126 (src_ptr[7] * VFilter[7]) + | 244 (src_ptr[7] * VFilter[7]) + |
127 (VP9_FILTER_WEIGHT >> 1); // Rounding | 245 (VP9_FILTER_WEIGHT >> 1); // Rounding |
128 | 246 |
129 // Normalize back to 0-255... | 247 // Normalize back to 0-255... |
130 *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); | 248 *dst_ptr++ = clip_pixel_high(temp >> VP9_FILTER_SHIFT, bd); |
131 src_ptr += intermediate_height; | 249 src_ptr += intermediate_height; |
132 } | 250 } |
133 src_ptr += intermediate_next_stride; | 251 src_ptr += intermediate_next_stride; |
134 dst_ptr += dst_next_row_stride; | 252 dst_ptr += dst_next_row_stride; |
135 } | 253 } |
136 } | 254 } |
137 } | 255 } |
138 | 256 |
139 void block2d_average_c(uint8_t *src, | 257 void high_block2d_average_c(uint16_t *src, |
140 unsigned int src_stride, | 258 unsigned int src_stride, |
141 uint8_t *output_ptr, | 259 uint16_t *output_ptr, |
142 unsigned int output_stride, | 260 unsigned int output_stride, |
143 unsigned int output_width, | 261 unsigned int output_width, |
144 unsigned int output_height) { | 262 unsigned int output_height, |
| 263 int bd) { |
145 unsigned int i, j; | 264 unsigned int i, j; |
146 for (i = 0; i < output_height; ++i) { | 265 for (i = 0; i < output_height; ++i) { |
147 for (j = 0; j < output_width; ++j) { | 266 for (j = 0; j < output_width; ++j) { |
148 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; | 267 output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; |
149 } | 268 } |
150 output_ptr += output_stride; | 269 output_ptr += output_stride; |
151 } | 270 } |
152 } | 271 } |
153 | 272 |
154 void filter_average_block2d_8_c(const uint8_t *src_ptr, | 273 void high_filter_average_block2d_8_c(const uint16_t *src_ptr, |
155 const unsigned int src_stride, | 274 const unsigned int src_stride, |
156 const int16_t *HFilter, | 275 const int16_t *HFilter, |
157 const int16_t *VFilter, | 276 const int16_t *VFilter, |
158 uint8_t *dst_ptr, | 277 uint16_t *dst_ptr, |
159 unsigned int dst_stride, | 278 unsigned int dst_stride, |
160 unsigned int output_width, | 279 unsigned int output_width, |
161 unsigned int output_height) { | 280 unsigned int output_height, |
162 uint8_t tmp[64 * 64]; | 281 int bd) { |
| 282 uint16_t tmp[kMaxDimension * kMaxDimension]; |
163 | 283 |
164 assert(output_width <= 64); | 284 assert(output_width <= kMaxDimension); |
165 assert(output_height <= 64); | 285 assert(output_height <= kMaxDimension); |
166 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, | 286 high_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64, |
167 output_width, output_height); | 287 output_width, output_height, bd); |
168 block2d_average_c(tmp, 64, dst_ptr, dst_stride, | 288 high_block2d_average_c(tmp, 64, dst_ptr, dst_stride, |
169 output_width, output_height); | 289 output_width, output_height, bd); |
170 } | 290 } |
| 291 #endif // CONFIG_VP9_HIGHBITDEPTH |
171 | 292 |
172 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { | 293 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> { |
173 public: | 294 public: |
174 static void SetUpTestCase() { | 295 static void SetUpTestCase() { |
175 // Force input_ to be unaligned, output to be 16 byte aligned. | 296 // Force input_ to be unaligned, output to be 16 byte aligned. |
176 input_ = reinterpret_cast<uint8_t*>( | 297 input_ = reinterpret_cast<uint8_t*>( |
177 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; | 298 vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; |
178 output_ = reinterpret_cast<uint8_t*>( | 299 output_ = reinterpret_cast<uint8_t*>( |
179 vpx_memalign(kDataAlignment, kOutputBufferSize)); | 300 vpx_memalign(kDataAlignment, kOutputBufferSize)); |
| 301 #if CONFIG_VP9_HIGHBITDEPTH |
| 302 input16_ = reinterpret_cast<uint16_t*>( |
| 303 vpx_memalign(kDataAlignment, |
| 304 (kInputBufferSize + 1) * sizeof(uint16_t))) + 1; |
| 305 output16_ = reinterpret_cast<uint16_t*>( |
| 306 vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); |
| 307 #endif |
180 } | 308 } |
181 | 309 |
182 static void TearDownTestCase() { | 310 static void TearDownTestCase() { |
183 vpx_free(input_ - 1); | 311 vpx_free(input_ - 1); |
184 input_ = NULL; | 312 input_ = NULL; |
185 vpx_free(output_); | 313 vpx_free(output_); |
186 output_ = NULL; | 314 output_ = NULL; |
| 315 #if CONFIG_VP9_HIGHBITDEPTH |
| 316 vpx_free(input16_ - 1); |
| 317 input16_ = NULL; |
| 318 vpx_free(output16_); |
| 319 output16_ = NULL; |
| 320 #endif |
187 } | 321 } |
188 | 322 |
189 protected: | 323 protected: |
190 static const int kDataAlignment = 16; | 324 static const int kDataAlignment = 16; |
191 static const int kOuterBlockSize = 256; | 325 static const int kOuterBlockSize = 256; |
192 static const int kInputStride = kOuterBlockSize; | 326 static const int kInputStride = kOuterBlockSize; |
193 static const int kOutputStride = kOuterBlockSize; | 327 static const int kOutputStride = kOuterBlockSize; |
194 static const int kMaxDimension = 64; | |
195 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; | 328 static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; |
196 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; | 329 static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; |
197 | 330 |
198 int Width() const { return GET_PARAM(0); } | 331 int Width() const { return GET_PARAM(0); } |
199 int Height() const { return GET_PARAM(1); } | 332 int Height() const { return GET_PARAM(1); } |
200 int BorderLeft() const { | 333 int BorderLeft() const { |
201 const int center = (kOuterBlockSize - Width()) / 2; | 334 const int center = (kOuterBlockSize - Width()) / 2; |
202 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); | 335 return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); |
203 } | 336 } |
204 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } | 337 int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } |
205 | 338 |
206 bool IsIndexInBorder(int i) { | 339 bool IsIndexInBorder(int i) { |
207 return (i < BorderTop() * kOuterBlockSize || | 340 return (i < BorderTop() * kOuterBlockSize || |
208 i >= (BorderTop() + Height()) * kOuterBlockSize || | 341 i >= (BorderTop() + Height()) * kOuterBlockSize || |
209 i % kOuterBlockSize < BorderLeft() || | 342 i % kOuterBlockSize < BorderLeft() || |
210 i % kOuterBlockSize >= (BorderLeft() + Width())); | 343 i % kOuterBlockSize >= (BorderLeft() + Width())); |
211 } | 344 } |
212 | 345 |
213 virtual void SetUp() { | 346 virtual void SetUp() { |
214 UUT_ = GET_PARAM(2); | 347 UUT_ = GET_PARAM(2); |
| 348 #if CONFIG_VP9_HIGHBITDEPTH |
| 349 if (UUT_->use_high_bd_ != 0) |
| 350 mask_ = (1 << UUT_->use_high_bd_) - 1; |
| 351 else |
| 352 mask_ = 255; |
| 353 #endif |
215 /* Set up guard blocks for an inner block centered in the outer block */ | 354 /* Set up guard blocks for an inner block centered in the outer block */ |
216 for (int i = 0; i < kOutputBufferSize; ++i) { | 355 for (int i = 0; i < kOutputBufferSize; ++i) { |
217 if (IsIndexInBorder(i)) | 356 if (IsIndexInBorder(i)) |
218 output_[i] = 255; | 357 output_[i] = 255; |
219 else | 358 else |
220 output_[i] = 0; | 359 output_[i] = 0; |
221 } | 360 } |
222 | 361 |
223 ::libvpx_test::ACMRandom prng; | 362 ::libvpx_test::ACMRandom prng; |
224 for (int i = 0; i < kInputBufferSize; ++i) { | 363 for (int i = 0; i < kInputBufferSize; ++i) { |
225 if (i & 1) | 364 if (i & 1) { |
226 input_[i] = 255; | 365 input_[i] = 255; |
227 else | 366 #if CONFIG_VP9_HIGHBITDEPTH |
| 367 input16_[i] = mask_; |
| 368 #endif |
| 369 } else { |
228 input_[i] = prng.Rand8Extremes(); | 370 input_[i] = prng.Rand8Extremes(); |
| 371 #if CONFIG_VP9_HIGHBITDEPTH |
| 372 input16_[i] = prng.Rand16() & mask_; |
| 373 #endif |
| 374 } |
229 } | 375 } |
230 } | 376 } |
231 | 377 |
232 void SetConstantInput(int value) { | 378 void SetConstantInput(int value) { |
233 memset(input_, value, kInputBufferSize); | 379 memset(input_, value, kInputBufferSize); |
| 380 #if CONFIG_VP9_HIGHBITDEPTH |
| 381 vpx_memset16(input16_, value, kInputBufferSize); |
| 382 #endif |
234 } | 383 } |
235 | 384 |
236 void CheckGuardBlocks() { | 385 void CheckGuardBlocks() { |
237 for (int i = 0; i < kOutputBufferSize; ++i) { | 386 for (int i = 0; i < kOutputBufferSize; ++i) { |
238 if (IsIndexInBorder(i)) | 387 if (IsIndexInBorder(i)) |
239 EXPECT_EQ(255, output_[i]); | 388 EXPECT_EQ(255, output_[i]); |
240 } | 389 } |
241 } | 390 } |
242 | 391 |
243 uint8_t* input() const { | 392 uint8_t *input() const { |
| 393 #if CONFIG_VP9_HIGHBITDEPTH |
| 394 if (UUT_->use_high_bd_ == 0) { |
| 395 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
| 396 } else { |
| 397 return CONVERT_TO_BYTEPTR(input16_ + BorderTop() * kOuterBlockSize + |
| 398 BorderLeft()); |
| 399 } |
| 400 #else |
244 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); | 401 return input_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
| 402 #endif |
245 } | 403 } |
246 | 404 |
247 uint8_t* output() const { | 405 uint8_t *output() const { |
| 406 #if CONFIG_VP9_HIGHBITDEPTH |
| 407 if (UUT_->use_high_bd_ == 0) { |
| 408 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
| 409 } else { |
| 410 return CONVERT_TO_BYTEPTR(output16_ + BorderTop() * kOuterBlockSize + |
| 411 BorderLeft()); |
| 412 } |
| 413 #else |
248 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); | 414 return output_ + BorderTop() * kOuterBlockSize + BorderLeft(); |
| 415 #endif |
| 416 } |
| 417 |
| 418 uint16_t lookup(uint8_t *list, int index) const { |
| 419 #if CONFIG_VP9_HIGHBITDEPTH |
| 420 if (UUT_->use_high_bd_ == 0) { |
| 421 return list[index]; |
| 422 } else { |
| 423 return CONVERT_TO_SHORTPTR(list)[index]; |
| 424 } |
| 425 #else |
| 426 return list[index]; |
| 427 #endif |
| 428 } |
| 429 |
| 430 void assign_val(uint8_t *list, int index, uint16_t val) const { |
| 431 #if CONFIG_VP9_HIGHBITDEPTH |
| 432 if (UUT_->use_high_bd_ == 0) { |
| 433 list[index] = (uint8_t) val; |
| 434 } else { |
| 435 CONVERT_TO_SHORTPTR(list)[index] = val; |
| 436 } |
| 437 #else |
| 438 list[index] = (uint8_t) val; |
| 439 #endif |
| 440 } |
| 441 |
| 442 void wrapper_filter_average_block2d_8_c(const uint8_t *src_ptr, |
| 443 const unsigned int src_stride, |
| 444 const int16_t *HFilter, |
| 445 const int16_t *VFilter, |
| 446 uint8_t *dst_ptr, |
| 447 unsigned int dst_stride, |
| 448 unsigned int output_width, |
| 449 unsigned int output_height) { |
| 450 #if CONFIG_VP9_HIGHBITDEPTH |
| 451 if (UUT_->use_high_bd_ == 0) { |
| 452 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
| 453 dst_ptr, dst_stride, output_width, |
| 454 output_height); |
| 455 } else { |
| 456 high_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, |
| 457 HFilter, VFilter, |
| 458 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, |
| 459 output_width, output_height, |
| 460 UUT_->use_high_bd_); |
| 461 } |
| 462 #else |
| 463 filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
| 464 dst_ptr, dst_stride, output_width, |
| 465 output_height); |
| 466 #endif |
| 467 } |
| 468 |
| 469 void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, |
| 470 const unsigned int src_stride, |
| 471 const int16_t *HFilter, |
| 472 const int16_t *VFilter, |
| 473 uint8_t *dst_ptr, |
| 474 unsigned int dst_stride, |
| 475 unsigned int output_width, |
| 476 unsigned int output_height) { |
| 477 #if CONFIG_VP9_HIGHBITDEPTH |
| 478 if (UUT_->use_high_bd_ == 0) { |
| 479 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
| 480 dst_ptr, dst_stride, output_width, output_height); |
| 481 } else { |
| 482 high_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, |
| 483 HFilter, VFilter, |
| 484 CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, |
| 485 output_width, output_height, UUT_->use_high_bd_); |
| 486 } |
| 487 #else |
| 488 filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, |
| 489 dst_ptr, dst_stride, output_width, output_height); |
| 490 #endif |
249 } | 491 } |
250 | 492 |
251 const ConvolveFunctions* UUT_; | 493 const ConvolveFunctions* UUT_; |
252 static uint8_t* input_; | 494 static uint8_t* input_; |
253 static uint8_t* output_; | 495 static uint8_t* output_; |
| 496 #if CONFIG_VP9_HIGHBITDEPTH |
| 497 static uint16_t* input16_; |
| 498 static uint16_t* output16_; |
| 499 int mask_; |
| 500 #endif |
254 }; | 501 }; |
| 502 |
255 uint8_t* ConvolveTest::input_ = NULL; | 503 uint8_t* ConvolveTest::input_ = NULL; |
256 uint8_t* ConvolveTest::output_ = NULL; | 504 uint8_t* ConvolveTest::output_ = NULL; |
| 505 #if CONFIG_VP9_HIGHBITDEPTH |
| 506 uint16_t* ConvolveTest::input16_ = NULL; |
| 507 uint16_t* ConvolveTest::output16_ = NULL; |
| 508 #endif |
257 | 509 |
258 TEST_P(ConvolveTest, GuardBlocks) { | 510 TEST_P(ConvolveTest, GuardBlocks) { |
259 CheckGuardBlocks(); | 511 CheckGuardBlocks(); |
260 } | 512 } |
261 | 513 |
262 TEST_P(ConvolveTest, CopyHoriz) { | 514 TEST_P(ConvolveTest, CopyHoriz) { |
263 uint8_t* const in = input(); | 515 uint8_t* const in = input(); |
264 uint8_t* const out = output(); | 516 uint8_t* const out = output(); |
265 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; | 517 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; |
266 | 518 |
267 ASM_REGISTER_STATE_CHECK( | 519 ASM_REGISTER_STATE_CHECK( |
268 UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, | 520 UUT_->h8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, |
269 Width(), Height())); | 521 Width(), Height())); |
270 | 522 |
271 CheckGuardBlocks(); | 523 CheckGuardBlocks(); |
272 | 524 |
273 for (int y = 0; y < Height(); ++y) | 525 for (int y = 0; y < Height(); ++y) |
274 for (int x = 0; x < Width(); ++x) | 526 for (int x = 0; x < Width(); ++x) |
275 ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) | 527 ASSERT_EQ(lookup(out, y * kOutputStride + x), |
| 528 lookup(in, y * kInputStride + x)) |
276 << "(" << x << "," << y << ")"; | 529 << "(" << x << "," << y << ")"; |
277 } | 530 } |
278 | 531 |
279 TEST_P(ConvolveTest, CopyVert) { | 532 TEST_P(ConvolveTest, CopyVert) { |
280 uint8_t* const in = input(); | 533 uint8_t* const in = input(); |
281 uint8_t* const out = output(); | 534 uint8_t* const out = output(); |
282 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; | 535 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; |
283 | 536 |
284 ASM_REGISTER_STATE_CHECK( | 537 ASM_REGISTER_STATE_CHECK( |
285 UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, | 538 UUT_->v8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, |
286 Width(), Height())); | 539 Width(), Height())); |
287 | 540 |
288 CheckGuardBlocks(); | 541 CheckGuardBlocks(); |
289 | 542 |
290 for (int y = 0; y < Height(); ++y) | 543 for (int y = 0; y < Height(); ++y) |
291 for (int x = 0; x < Width(); ++x) | 544 for (int x = 0; x < Width(); ++x) |
292 ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) | 545 ASSERT_EQ(lookup(out, y * kOutputStride + x), |
| 546 lookup(in, y * kInputStride + x)) |
293 << "(" << x << "," << y << ")"; | 547 << "(" << x << "," << y << ")"; |
294 } | 548 } |
295 | 549 |
296 TEST_P(ConvolveTest, Copy2D) { | 550 TEST_P(ConvolveTest, Copy2D) { |
297 uint8_t* const in = input(); | 551 uint8_t* const in = input(); |
298 uint8_t* const out = output(); | 552 uint8_t* const out = output(); |
299 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; | 553 DECLARE_ALIGNED(256, const int16_t, filter8[8]) = {0, 0, 0, 128, 0, 0, 0, 0}; |
300 | 554 |
301 ASM_REGISTER_STATE_CHECK( | 555 ASM_REGISTER_STATE_CHECK( |
302 UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, | 556 UUT_->hv8_(in, kInputStride, out, kOutputStride, filter8, 16, filter8, 16, |
303 Width(), Height())); | 557 Width(), Height())); |
304 | 558 |
305 CheckGuardBlocks(); | 559 CheckGuardBlocks(); |
306 | 560 |
307 for (int y = 0; y < Height(); ++y) | 561 for (int y = 0; y < Height(); ++y) |
308 for (int x = 0; x < Width(); ++x) | 562 for (int x = 0; x < Width(); ++x) |
309 ASSERT_EQ(out[y * kOutputStride + x], in[y * kInputStride + x]) | 563 ASSERT_EQ(lookup(out, y * kOutputStride + x), |
| 564 lookup(in, y * kInputStride + x)) |
310 << "(" << x << "," << y << ")"; | 565 << "(" << x << "," << y << ")"; |
311 } | 566 } |
312 | 567 |
313 const int kNumFilterBanks = 4; | 568 const int kNumFilterBanks = 4; |
314 const int kNumFilters = 16; | 569 const int kNumFilters = 16; |
315 | 570 |
316 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { | 571 TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { |
317 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { | 572 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { |
318 const InterpKernel *filters = | 573 const InterpKernel *filters = |
319 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); | 574 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); |
(...skipping 12 matching lines...) Expand all Loading... |
332 EXPECT_EQ(p0 + p1 + p2 + p3, 128); | 587 EXPECT_EQ(p0 + p1 + p2 + p3, 128); |
333 } | 588 } |
334 } | 589 } |
335 } | 590 } |
336 | 591 |
337 const int16_t kInvalidFilter[8] = { 0 }; | 592 const int16_t kInvalidFilter[8] = { 0 }; |
338 | 593 |
339 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { | 594 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { |
340 uint8_t* const in = input(); | 595 uint8_t* const in = input(); |
341 uint8_t* const out = output(); | 596 uint8_t* const out = output(); |
| 597 #if CONFIG_VP9_HIGHBITDEPTH |
| 598 uint8_t ref8[kOutputStride * kMaxDimension]; |
| 599 uint16_t ref16[kOutputStride * kMaxDimension]; |
| 600 uint8_t* ref; |
| 601 if (UUT_->use_high_bd_ == 0) { |
| 602 ref = ref8; |
| 603 } else { |
| 604 ref = CONVERT_TO_BYTEPTR(ref16); |
| 605 } |
| 606 #else |
342 uint8_t ref[kOutputStride * kMaxDimension]; | 607 uint8_t ref[kOutputStride * kMaxDimension]; |
343 | 608 #endif |
344 | 609 |
345 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { | 610 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { |
346 const InterpKernel *filters = | 611 const InterpKernel *filters = |
347 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); | 612 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); |
348 const InterpKernel *const eighttap_smooth = | 613 const InterpKernel *const eighttap_smooth = |
349 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); | 614 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); |
350 | 615 |
351 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { | 616 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { |
352 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { | 617 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { |
353 filter_block2d_8_c(in, kInputStride, | 618 wrapper_filter_block2d_8_c(in, kInputStride, |
354 filters[filter_x], filters[filter_y], | 619 filters[filter_x], filters[filter_y], |
355 ref, kOutputStride, | 620 ref, kOutputStride, |
356 Width(), Height()); | 621 Width(), Height()); |
357 | 622 |
358 if (filters == eighttap_smooth || (filter_x && filter_y)) | 623 if (filters == eighttap_smooth || (filter_x && filter_y)) |
359 ASM_REGISTER_STATE_CHECK( | 624 ASM_REGISTER_STATE_CHECK( |
360 UUT_->hv8_(in, kInputStride, out, kOutputStride, | 625 UUT_->hv8_(in, kInputStride, out, kOutputStride, |
361 filters[filter_x], 16, filters[filter_y], 16, | 626 filters[filter_x], 16, filters[filter_y], 16, |
362 Width(), Height())); | 627 Width(), Height())); |
363 else if (filter_y) | 628 else if (filter_y) |
364 ASM_REGISTER_STATE_CHECK( | 629 ASM_REGISTER_STATE_CHECK( |
365 UUT_->v8_(in, kInputStride, out, kOutputStride, | 630 UUT_->v8_(in, kInputStride, out, kOutputStride, |
366 kInvalidFilter, 16, filters[filter_y], 16, | 631 kInvalidFilter, 16, filters[filter_y], 16, |
367 Width(), Height())); | 632 Width(), Height())); |
368 else | 633 else |
369 ASM_REGISTER_STATE_CHECK( | 634 ASM_REGISTER_STATE_CHECK( |
370 UUT_->h8_(in, kInputStride, out, kOutputStride, | 635 UUT_->h8_(in, kInputStride, out, kOutputStride, |
371 filters[filter_x], 16, kInvalidFilter, 16, | 636 filters[filter_x], 16, kInvalidFilter, 16, |
372 Width(), Height())); | 637 Width(), Height())); |
373 | 638 |
374 CheckGuardBlocks(); | 639 CheckGuardBlocks(); |
375 | 640 |
376 for (int y = 0; y < Height(); ++y) | 641 for (int y = 0; y < Height(); ++y) |
377 for (int x = 0; x < Width(); ++x) | 642 for (int x = 0; x < Width(); ++x) |
378 ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x]) | 643 ASSERT_EQ(lookup(ref, y * kOutputStride + x), |
| 644 lookup(out, y * kOutputStride + x)) |
379 << "mismatch at (" << x << "," << y << "), " | 645 << "mismatch at (" << x << "," << y << "), " |
380 << "filters (" << filter_bank << "," | 646 << "filters (" << filter_bank << "," |
381 << filter_x << "," << filter_y << ")"; | 647 << filter_x << "," << filter_y << ")"; |
382 } | 648 } |
383 } | 649 } |
384 } | 650 } |
385 } | 651 } |
386 | 652 |
387 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { | 653 TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) { |
388 uint8_t* const in = input(); | 654 uint8_t* const in = input(); |
389 uint8_t* const out = output(); | 655 uint8_t* const out = output(); |
| 656 #if CONFIG_VP9_HIGHBITDEPTH |
| 657 uint8_t ref8[kOutputStride * kMaxDimension]; |
| 658 uint16_t ref16[kOutputStride * kMaxDimension]; |
| 659 uint8_t* ref; |
| 660 if (UUT_->use_high_bd_ == 0) { |
| 661 ref = ref8; |
| 662 } else { |
| 663 ref = CONVERT_TO_BYTEPTR(ref16); |
| 664 } |
| 665 #else |
390 uint8_t ref[kOutputStride * kMaxDimension]; | 666 uint8_t ref[kOutputStride * kMaxDimension]; |
| 667 #endif |
391 | 668 |
392 // Populate ref and out with some random data | 669 // Populate ref and out with some random data |
393 ::libvpx_test::ACMRandom prng; | 670 ::libvpx_test::ACMRandom prng; |
394 for (int y = 0; y < Height(); ++y) { | 671 for (int y = 0; y < Height(); ++y) { |
395 for (int x = 0; x < Width(); ++x) { | 672 for (int x = 0; x < Width(); ++x) { |
396 const uint8_t r = prng.Rand8Extremes(); | 673 uint16_t r; |
| 674 #if CONFIG_VP9_HIGHBITDEPTH |
| 675 if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) { |
| 676 r = prng.Rand8Extremes(); |
| 677 } else { |
| 678 r = prng.Rand16() & mask_; |
| 679 } |
| 680 #else |
| 681 r = prng.Rand8Extremes(); |
| 682 #endif |
397 | 683 |
398 out[y * kOutputStride + x] = r; | 684 assign_val(out, y * kOutputStride + x, r); |
399 ref[y * kOutputStride + x] = r; | 685 assign_val(ref, y * kOutputStride + x, r); |
400 } | 686 } |
401 } | 687 } |
402 | 688 |
403 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { | 689 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { |
404 const InterpKernel *filters = | 690 const InterpKernel *filters = |
405 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); | 691 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); |
406 const InterpKernel *const eighttap_smooth = | 692 const InterpKernel *const eighttap_smooth = |
407 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); | 693 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); |
408 | 694 |
409 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { | 695 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { |
410 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { | 696 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { |
411 filter_average_block2d_8_c(in, kInputStride, | 697 wrapper_filter_average_block2d_8_c(in, kInputStride, |
412 filters[filter_x], filters[filter_y], | 698 filters[filter_x], filters[filter_y], |
413 ref, kOutputStride, | 699 ref, kOutputStride, |
414 Width(), Height()); | 700 Width(), Height()); |
415 | 701 |
416 if (filters == eighttap_smooth || (filter_x && filter_y)) | 702 if (filters == eighttap_smooth || (filter_x && filter_y)) |
417 ASM_REGISTER_STATE_CHECK( | 703 ASM_REGISTER_STATE_CHECK( |
418 UUT_->hv8_avg_(in, kInputStride, out, kOutputStride, | 704 UUT_->hv8_avg_(in, kInputStride, out, kOutputStride, |
419 filters[filter_x], 16, filters[filter_y], 16, | 705 filters[filter_x], 16, filters[filter_y], 16, |
420 Width(), Height())); | 706 Width(), Height())); |
421 else if (filter_y) | 707 else if (filter_y) |
422 ASM_REGISTER_STATE_CHECK( | 708 ASM_REGISTER_STATE_CHECK( |
423 UUT_->v8_avg_(in, kInputStride, out, kOutputStride, | 709 UUT_->v8_avg_(in, kInputStride, out, kOutputStride, |
424 filters[filter_x], 16, filters[filter_y], 16, | 710 filters[filter_x], 16, filters[filter_y], 16, |
425 Width(), Height())); | 711 Width(), Height())); |
426 else | 712 else |
427 ASM_REGISTER_STATE_CHECK( | 713 ASM_REGISTER_STATE_CHECK( |
428 UUT_->h8_avg_(in, kInputStride, out, kOutputStride, | 714 UUT_->h8_avg_(in, kInputStride, out, kOutputStride, |
429 filters[filter_x], 16, filters[filter_y], 16, | 715 filters[filter_x], 16, filters[filter_y], 16, |
430 Width(), Height())); | 716 Width(), Height())); |
431 | 717 |
432 CheckGuardBlocks(); | 718 CheckGuardBlocks(); |
433 | 719 |
434 for (int y = 0; y < Height(); ++y) | 720 for (int y = 0; y < Height(); ++y) |
435 for (int x = 0; x < Width(); ++x) | 721 for (int x = 0; x < Width(); ++x) |
436 ASSERT_EQ(ref[y * kOutputStride + x], out[y * kOutputStride + x]) | 722 ASSERT_EQ(lookup(ref, y * kOutputStride + x), |
| 723 lookup(out, y * kOutputStride + x)) |
437 << "mismatch at (" << x << "," << y << "), " | 724 << "mismatch at (" << x << "," << y << "), " |
438 << "filters (" << filter_bank << "," | 725 << "filters (" << filter_bank << "," |
439 << filter_x << "," << filter_y << ")"; | 726 << filter_x << "," << filter_y << ")"; |
440 } | 727 } |
441 } | 728 } |
442 } | 729 } |
443 } | 730 } |
444 | 731 |
| 732 TEST_P(ConvolveTest, FilterExtremes) { |
| 733 uint8_t *const in = input(); |
| 734 uint8_t *const out = output(); |
| 735 #if CONFIG_VP9_HIGHBITDEPTH |
| 736 uint8_t ref8[kOutputStride * kMaxDimension]; |
| 737 uint16_t ref16[kOutputStride * kMaxDimension]; |
| 738 uint8_t *ref; |
| 739 if (UUT_->use_high_bd_ == 0) { |
| 740 ref = ref8; |
| 741 } else { |
| 742 ref = CONVERT_TO_BYTEPTR(ref16); |
| 743 } |
| 744 #else |
| 745 uint8_t ref[kOutputStride * kMaxDimension]; |
| 746 #endif |
| 747 |
| 748 // Populate ref and out with some random data |
| 749 ::libvpx_test::ACMRandom prng; |
| 750 for (int y = 0; y < Height(); ++y) { |
| 751 for (int x = 0; x < Width(); ++x) { |
| 752 uint16_t r; |
| 753 #if CONFIG_VP9_HIGHBITDEPTH |
| 754 if (UUT_->use_high_bd_ == 0 || UUT_->use_high_bd_ == 8) { |
| 755 r = prng.Rand8Extremes(); |
| 756 } else { |
| 757 r = prng.Rand16() & mask_; |
| 758 } |
| 759 #else |
| 760 r = prng.Rand8Extremes(); |
| 761 #endif |
| 762 assign_val(out, y * kOutputStride + x, r); |
| 763 assign_val(ref, y * kOutputStride + x, r); |
| 764 } |
| 765 } |
| 766 |
| 767 for (int axis = 0; axis < 2; axis++) { |
| 768 int seed_val = 0; |
| 769 while (seed_val < 256) { |
| 770 for (int y = 0; y < 8; ++y) { |
| 771 for (int x = 0; x < 8; ++x) { |
| 772 #if CONFIG_VP9_HIGHBITDEPTH |
| 773 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, |
| 774 ((seed_val >> (axis ? y : x)) & 1) * mask_); |
| 775 #else |
| 776 assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, |
| 777 ((seed_val >> (axis ? y : x)) & 1) * 255); |
| 778 #endif |
| 779 if (axis) seed_val++; |
| 780 } |
| 781 if (axis) |
| 782 seed_val-= 8; |
| 783 else |
| 784 seed_val++; |
| 785 } |
| 786 if (axis) seed_val += 8; |
| 787 |
| 788 for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { |
| 789 const InterpKernel *filters = |
| 790 vp9_get_interp_kernel(static_cast<INTERP_FILTER>(filter_bank)); |
| 791 const InterpKernel *const eighttap_smooth = |
| 792 vp9_get_interp_kernel(EIGHTTAP_SMOOTH); |
| 793 for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { |
| 794 for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { |
| 795 wrapper_filter_block2d_8_c(in, kInputStride, |
| 796 filters[filter_x], filters[filter_y], |
| 797 ref, kOutputStride, |
| 798 Width(), Height()); |
| 799 if (filters == eighttap_smooth || (filter_x && filter_y)) |
| 800 ASM_REGISTER_STATE_CHECK( |
| 801 UUT_->hv8_(in, kInputStride, out, kOutputStride, |
| 802 filters[filter_x], 16, filters[filter_y], 16, |
| 803 Width(), Height())); |
| 804 else if (filter_y) |
| 805 ASM_REGISTER_STATE_CHECK( |
| 806 UUT_->v8_(in, kInputStride, out, kOutputStride, |
| 807 kInvalidFilter, 16, filters[filter_y], 16, |
| 808 Width(), Height())); |
| 809 else |
| 810 ASM_REGISTER_STATE_CHECK( |
| 811 UUT_->h8_(in, kInputStride, out, kOutputStride, |
| 812 filters[filter_x], 16, kInvalidFilter, 16, |
| 813 Width(), Height())); |
| 814 |
| 815 for (int y = 0; y < Height(); ++y) |
| 816 for (int x = 0; x < Width(); ++x) |
| 817 ASSERT_EQ(lookup(ref, y * kOutputStride + x), |
| 818 lookup(out, y * kOutputStride + x)) |
| 819 << "mismatch at (" << x << "," << y << "), " |
| 820 << "filters (" << filter_bank << "," |
| 821 << filter_x << "," << filter_y << ")"; |
| 822 } |
| 823 } |
| 824 } |
| 825 } |
| 826 } |
| 827 } |
| 828 |
445 DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = { | 829 DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = { |
446 { 0, 0, 0, 0, 0, 0, 0, 128}, | 830 { 0, 0, 0, 0, 0, 0, 0, 128}, |
447 { 0, 0, 0, 0, 0, 0, 128}, | 831 { 0, 0, 0, 0, 0, 0, 128}, |
448 { 0, 0, 0, 0, 0, 128}, | 832 { 0, 0, 0, 0, 0, 128}, |
449 { 0, 0, 0, 0, 128}, | 833 { 0, 0, 0, 0, 128}, |
450 { 0, 0, 0, 128}, | 834 { 0, 0, 0, 128}, |
451 { 0, 0, 128}, | 835 { 0, 0, 128}, |
452 { 0, 128}, | 836 { 0, 128}, |
453 { 128}, | 837 { 128}, |
454 { 0, 0, 0, 0, 0, 0, 0, 128}, | 838 { 0, 0, 0, 0, 0, 0, 0, 128}, |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
498 UUT_->h8_(in, kInputStride, out, kOutputStride, | 882 UUT_->h8_(in, kInputStride, out, kOutputStride, |
499 kChangeFilters[kInitialSubPelOffset], | 883 kChangeFilters[kInitialSubPelOffset], |
500 kInputPixelStep, NULL, 0, Width(), Height())); | 884 kInputPixelStep, NULL, 0, Width(), Height())); |
501 | 885 |
502 for (int x = 0; x < Width(); ++x) { | 886 for (int x = 0; x < Width(); ++x) { |
503 const int kFilterPeriodAdjust = (x >> 3) << 3; | 887 const int kFilterPeriodAdjust = (x >> 3) << 3; |
504 const int ref_x = | 888 const int ref_x = |
505 kPixelSelected + ((kInitialSubPelOffset | 889 kPixelSelected + ((kInitialSubPelOffset |
506 + kFilterPeriodAdjust * kInputPixelStep) | 890 + kFilterPeriodAdjust * kInputPixelStep) |
507 >> SUBPEL_BITS); | 891 >> SUBPEL_BITS); |
508 ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width(); | 892 ASSERT_EQ(lookup(in, ref_x), lookup(out, x)) |
| 893 << "x == " << x << "width = " << Width(); |
509 } | 894 } |
510 | 895 |
511 /* Test the vertical filter. */ | 896 /* Test the vertical filter. */ |
512 ASM_REGISTER_STATE_CHECK( | 897 ASM_REGISTER_STATE_CHECK( |
513 UUT_->v8_(in, kInputStride, out, kOutputStride, | 898 UUT_->v8_(in, kInputStride, out, kOutputStride, |
514 NULL, 0, kChangeFilters[kInitialSubPelOffset], | 899 NULL, 0, kChangeFilters[kInitialSubPelOffset], |
515 kInputPixelStep, Width(), Height())); | 900 kInputPixelStep, Width(), Height())); |
516 | 901 |
517 for (int y = 0; y < Height(); ++y) { | 902 for (int y = 0; y < Height(); ++y) { |
518 const int kFilterPeriodAdjust = (y >> 3) << 3; | 903 const int kFilterPeriodAdjust = (y >> 3) << 3; |
519 const int ref_y = | 904 const int ref_y = |
520 kPixelSelected + ((kInitialSubPelOffset | 905 kPixelSelected + ((kInitialSubPelOffset |
521 + kFilterPeriodAdjust * kInputPixelStep) | 906 + kFilterPeriodAdjust * kInputPixelStep) |
522 >> SUBPEL_BITS); | 907 >> SUBPEL_BITS); |
523 ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y; | 908 ASSERT_EQ(lookup(in, ref_y * kInputStride), lookup(out, y * kInputStride)) |
| 909 << "y == " << y; |
524 } | 910 } |
525 | 911 |
526 /* Test the horizontal and vertical filters in combination. */ | 912 /* Test the horizontal and vertical filters in combination. */ |
527 ASM_REGISTER_STATE_CHECK( | 913 ASM_REGISTER_STATE_CHECK( |
528 UUT_->hv8_(in, kInputStride, out, kOutputStride, | 914 UUT_->hv8_(in, kInputStride, out, kOutputStride, |
529 kChangeFilters[kInitialSubPelOffset], kInputPixelStep, | 915 kChangeFilters[kInitialSubPelOffset], kInputPixelStep, |
530 kChangeFilters[kInitialSubPelOffset], kInputPixelStep, | 916 kChangeFilters[kInitialSubPelOffset], kInputPixelStep, |
531 Width(), Height())); | 917 Width(), Height())); |
532 | 918 |
533 for (int y = 0; y < Height(); ++y) { | 919 for (int y = 0; y < Height(); ++y) { |
534 const int kFilterPeriodAdjustY = (y >> 3) << 3; | 920 const int kFilterPeriodAdjustY = (y >> 3) << 3; |
535 const int ref_y = | 921 const int ref_y = |
536 kPixelSelected + ((kInitialSubPelOffset | 922 kPixelSelected + ((kInitialSubPelOffset |
537 + kFilterPeriodAdjustY * kInputPixelStep) | 923 + kFilterPeriodAdjustY * kInputPixelStep) |
538 >> SUBPEL_BITS); | 924 >> SUBPEL_BITS); |
539 for (int x = 0; x < Width(); ++x) { | 925 for (int x = 0; x < Width(); ++x) { |
540 const int kFilterPeriodAdjustX = (x >> 3) << 3; | 926 const int kFilterPeriodAdjustX = (x >> 3) << 3; |
541 const int ref_x = | 927 const int ref_x = |
542 kPixelSelected + ((kInitialSubPelOffset | 928 kPixelSelected + ((kInitialSubPelOffset |
543 + kFilterPeriodAdjustX * kInputPixelStep) | 929 + kFilterPeriodAdjustX * kInputPixelStep) |
544 >> SUBPEL_BITS); | 930 >> SUBPEL_BITS); |
545 | 931 |
546 ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x]) | 932 ASSERT_EQ(lookup(in, ref_y * kInputStride + ref_x), |
| 933 lookup(out, y * kOutputStride + x)) |
547 << "x == " << x << ", y == " << y; | 934 << "x == " << x << ", y == " << y; |
548 } | 935 } |
549 } | 936 } |
550 } | 937 } |
551 | 938 |
552 /* This test exercises that enough rows and columns are filtered with every | 939 /* This test exercises that enough rows and columns are filtered with every |
553 possible initial fractional positions and scaling steps. */ | 940 possible initial fractional positions and scaling steps. */ |
554 TEST_P(ConvolveTest, CheckScalingFiltering) { | 941 TEST_P(ConvolveTest, CheckScalingFiltering) { |
555 uint8_t* const in = input(); | 942 uint8_t* const in = input(); |
556 uint8_t* const out = output(); | 943 uint8_t* const out = output(); |
557 const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP); | 944 const InterpKernel *const eighttap = vp9_get_interp_kernel(EIGHTTAP); |
558 | 945 |
559 SetConstantInput(127); | 946 SetConstantInput(127); |
560 | 947 |
561 for (int frac = 0; frac < 16; ++frac) { | 948 for (int frac = 0; frac < 16; ++frac) { |
562 for (int step = 1; step <= 32; ++step) { | 949 for (int step = 1; step <= 32; ++step) { |
563 /* Test the horizontal and vertical filters in combination. */ | 950 /* Test the horizontal and vertical filters in combination. */ |
564 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride, | 951 ASM_REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride, |
565 eighttap[frac], step, | 952 eighttap[frac], step, |
566 eighttap[frac], step, | 953 eighttap[frac], step, |
567 Width(), Height())); | 954 Width(), Height())); |
568 | 955 |
569 CheckGuardBlocks(); | 956 CheckGuardBlocks(); |
570 | 957 |
571 for (int y = 0; y < Height(); ++y) { | 958 for (int y = 0; y < Height(); ++y) { |
572 for (int x = 0; x < Width(); ++x) { | 959 for (int x = 0; x < Width(); ++x) { |
573 ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x]) | 960 ASSERT_EQ(lookup(in, y * kInputStride + x), |
| 961 lookup(out, y * kOutputStride + x)) |
574 << "x == " << x << ", y == " << y | 962 << "x == " << x << ", y == " << y |
575 << ", frac == " << frac << ", step == " << step; | 963 << ", frac == " << frac << ", step == " << step; |
576 } | 964 } |
577 } | 965 } |
578 } | 966 } |
579 } | 967 } |
580 } | 968 } |
581 | 969 |
582 using std::tr1::make_tuple; | 970 using std::tr1::make_tuple; |
583 | 971 |
| 972 #if CONFIG_VP9_HIGHBITDEPTH |
| 973 #if HAVE_SSE2 && ARCH_X86_64 |
| 974 void wrap_convolve8_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
| 975 uint8_t *dst, ptrdiff_t dst_stride, |
| 976 const int16_t *filter_x, |
| 977 int filter_x_stride, |
| 978 const int16_t *filter_y, |
| 979 int filter_y_stride, |
| 980 int w, int h) { |
| 981 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 982 filter_x_stride, filter_y, filter_y_stride, |
| 983 w, h, 8); |
| 984 } |
| 985 |
| 986 void wrap_convolve8_avg_horiz_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
| 987 uint8_t *dst, ptrdiff_t dst_stride, |
| 988 const int16_t *filter_x, |
| 989 int filter_x_stride, |
| 990 const int16_t *filter_y, |
| 991 int filter_y_stride, |
| 992 int w, int h) { |
| 993 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 994 filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
| 995 } |
| 996 |
| 997 void wrap_convolve8_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
| 998 uint8_t *dst, ptrdiff_t dst_stride, |
| 999 const int16_t *filter_x, |
| 1000 int filter_x_stride, |
| 1001 const int16_t *filter_y, |
| 1002 int filter_y_stride, |
| 1003 int w, int h) { |
| 1004 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1005 filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
| 1006 } |
| 1007 |
| 1008 void wrap_convolve8_avg_vert_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1009 uint8_t *dst, ptrdiff_t dst_stride, |
| 1010 const int16_t *filter_x, |
| 1011 int filter_x_stride, |
| 1012 const int16_t *filter_y, |
| 1013 int filter_y_stride, |
| 1014 int w, int h) { |
| 1015 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1016 filter_x_stride, filter_y, filter_y_stride, |
| 1017 w, h, 8); |
| 1018 } |
| 1019 |
| 1020 void wrap_convolve8_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1021 uint8_t *dst, ptrdiff_t dst_stride, |
| 1022 const int16_t *filter_x, |
| 1023 int filter_x_stride, |
| 1024 const int16_t *filter_y, |
| 1025 int filter_y_stride, |
| 1026 int w, int h) { |
| 1027 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1028 filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
| 1029 } |
| 1030 |
| 1031 void wrap_convolve8_avg_sse2_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1032 uint8_t *dst, ptrdiff_t dst_stride, |
| 1033 const int16_t *filter_x, |
| 1034 int filter_x_stride, |
| 1035 const int16_t *filter_y, |
| 1036 int filter_y_stride, |
| 1037 int w, int h) { |
| 1038 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1039 filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
| 1040 } |
| 1041 |
| 1042 void wrap_convolve8_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1043 uint8_t *dst, ptrdiff_t dst_stride, |
| 1044 const int16_t *filter_x, |
| 1045 int filter_x_stride, |
| 1046 const int16_t *filter_y, |
| 1047 int filter_y_stride, |
| 1048 int w, int h) { |
| 1049 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1050 filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
| 1051 } |
| 1052 |
| 1053 void wrap_convolve8_avg_horiz_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1054 uint8_t *dst, ptrdiff_t dst_stride, |
| 1055 const int16_t *filter_x, |
| 1056 int filter_x_stride, |
| 1057 const int16_t *filter_y, |
| 1058 int filter_y_stride, |
| 1059 int w, int h) { |
| 1060 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1061 filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
| 1062 } |
| 1063 |
| 1064 void wrap_convolve8_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1065 uint8_t *dst, ptrdiff_t dst_stride, |
| 1066 const int16_t *filter_x, |
| 1067 int filter_x_stride, |
| 1068 const int16_t *filter_y, |
| 1069 int filter_y_stride, |
| 1070 int w, int h) { |
| 1071 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1072 filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
| 1073 } |
| 1074 |
| 1075 void wrap_convolve8_avg_vert_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1076 uint8_t *dst, ptrdiff_t dst_stride, |
| 1077 const int16_t *filter_x, |
| 1078 int filter_x_stride, |
| 1079 const int16_t *filter_y, |
| 1080 int filter_y_stride, |
| 1081 int w, int h) { |
| 1082 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1083 filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
| 1084 } |
| 1085 |
| 1086 void wrap_convolve8_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1087 uint8_t *dst, ptrdiff_t dst_stride, |
| 1088 const int16_t *filter_x, |
| 1089 int filter_x_stride, |
| 1090 const int16_t *filter_y, |
| 1091 int filter_y_stride, |
| 1092 int w, int h) { |
| 1093 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1094 filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
| 1095 } |
| 1096 |
| 1097 void wrap_convolve8_avg_sse2_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1098 uint8_t *dst, ptrdiff_t dst_stride, |
| 1099 const int16_t *filter_x, |
| 1100 int filter_x_stride, |
| 1101 const int16_t *filter_y, |
| 1102 int filter_y_stride, |
| 1103 int w, int h) { |
| 1104 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1105 filter_x_stride, filter_y, filter_y_stride, |
| 1106 w, h, 10); |
| 1107 } |
| 1108 |
| 1109 void wrap_convolve8_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1110 uint8_t *dst, ptrdiff_t dst_stride, |
| 1111 const int16_t *filter_x, |
| 1112 int filter_x_stride, |
| 1113 const int16_t *filter_y, |
| 1114 int filter_y_stride, |
| 1115 int w, int h) { |
| 1116 vp9_high_convolve8_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1117 filter_x_stride, filter_y, filter_y_stride, |
| 1118 w, h, 12); |
| 1119 } |
| 1120 |
| 1121 void wrap_convolve8_avg_horiz_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1122 uint8_t *dst, ptrdiff_t dst_stride, |
| 1123 const int16_t *filter_x, |
| 1124 int filter_x_stride, |
| 1125 const int16_t *filter_y, |
| 1126 int filter_y_stride, |
| 1127 int w, int h) { |
| 1128 vp9_high_convolve8_avg_horiz_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1129 filter_x_stride, filter_y, filter_y_stride, |
| 1130 w, h, 12); |
| 1131 } |
| 1132 |
| 1133 void wrap_convolve8_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1134 uint8_t *dst, ptrdiff_t dst_stride, |
| 1135 const int16_t *filter_x, |
| 1136 int filter_x_stride, |
| 1137 const int16_t *filter_y, |
| 1138 int filter_y_stride, |
| 1139 int w, int h) { |
| 1140 vp9_high_convolve8_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1141 filter_x_stride, filter_y, filter_y_stride, |
| 1142 w, h, 12); |
| 1143 } |
| 1144 |
| 1145 void wrap_convolve8_avg_vert_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1146 uint8_t *dst, ptrdiff_t dst_stride, |
| 1147 const int16_t *filter_x, |
| 1148 int filter_x_stride, |
| 1149 const int16_t *filter_y, |
| 1150 int filter_y_stride, |
| 1151 int w, int h) { |
| 1152 vp9_high_convolve8_avg_vert_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1153 filter_x_stride, filter_y, filter_y_stride, w
, h, 12); |
| 1154 } |
| 1155 |
| 1156 void wrap_convolve8_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1157 uint8_t *dst, ptrdiff_t dst_stride, |
| 1158 const int16_t *filter_x, |
| 1159 int filter_x_stride, |
| 1160 const int16_t *filter_y, |
| 1161 int filter_y_stride, |
| 1162 int w, int h) { |
| 1163 vp9_high_convolve8_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1164 filter_x_stride, filter_y, filter_y_stride, w, h, 12); |
| 1165 } |
| 1166 |
| 1167 void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1168 uint8_t *dst, ptrdiff_t dst_stride, |
| 1169 const int16_t *filter_x, |
| 1170 int filter_x_stride, |
| 1171 const int16_t *filter_y, |
| 1172 int filter_y_stride, |
| 1173 int w, int h) { |
| 1174 vp9_high_convolve8_avg_sse2(src, src_stride, dst, dst_stride, filter_x, |
| 1175 filter_x_stride, filter_y, filter_y_stride, w, h,
12); |
| 1176 } |
| 1177 #endif // HAVE_SSE2 && ARCH_X86_64 |
| 1178 |
| 1179 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1180 uint8_t *dst, ptrdiff_t dst_stride, |
| 1181 const int16_t *filter_x, |
| 1182 int filter_x_stride, |
| 1183 const int16_t *filter_y, |
| 1184 int filter_y_stride, |
| 1185 int w, int h) { |
| 1186 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
| 1187 filter_x_stride, filter_y, filter_y_stride, w, h, 8
); |
| 1188 } |
| 1189 |
| 1190 void wrap_convolve8_avg_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1191 uint8_t *dst, ptrdiff_t dst_stride, |
| 1192 const int16_t *filter_x, |
| 1193 int filter_x_stride, |
| 1194 const int16_t *filter_y, |
| 1195 int filter_y_stride, |
| 1196 int w, int h) { |
| 1197 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
| 1198 filter_x_stride, filter_y, filter_y_stride, w,
h, 8); |
| 1199 } |
| 1200 |
| 1201 void wrap_convolve8_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1202 uint8_t *dst, ptrdiff_t dst_stride, |
| 1203 const int16_t *filter_x, |
| 1204 int filter_x_stride, |
| 1205 const int16_t *filter_y, |
| 1206 int filter_y_stride, |
| 1207 int w, int h) { |
| 1208 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, |
| 1209 filter_x_stride, filter_y, filter_y_stride, w, h, 8)
; |
| 1210 } |
| 1211 |
| 1212 void wrap_convolve8_avg_vert_c_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1213 uint8_t *dst, ptrdiff_t dst_stride, |
| 1214 const int16_t *filter_x, |
| 1215 int filter_x_stride, |
| 1216 const int16_t *filter_y, |
| 1217 int filter_y_stride, |
| 1218 int w, int h) { |
| 1219 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, |
| 1220 filter_x_stride, filter_y, filter_y_stride, w, h
, 8); |
| 1221 } |
| 1222 |
| 1223 void wrap_convolve8_c_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1224 uint8_t *dst, ptrdiff_t dst_stride, |
| 1225 const int16_t *filter_x, |
| 1226 int filter_x_stride, |
| 1227 const int16_t *filter_y, |
| 1228 int filter_y_stride, |
| 1229 int w, int h) { |
| 1230 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, |
| 1231 filter_x_stride, filter_y, filter_y_stride, w, h, 8); |
| 1232 } |
| 1233 |
| 1234 void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride, |
| 1235 uint8_t *dst, ptrdiff_t dst_stride, |
| 1236 const int16_t *filter_x, |
| 1237 int filter_x_stride, |
| 1238 const int16_t *filter_y, |
| 1239 int filter_y_stride, |
| 1240 int w, int h) { |
| 1241 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, |
| 1242 filter_x_stride, filter_y, filter_y_stride, |
| 1243 w, h, 8); |
| 1244 } |
| 1245 |
| 1246 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1247 uint8_t *dst, ptrdiff_t dst_stride, |
| 1248 const int16_t *filter_x, |
| 1249 int filter_x_stride, |
| 1250 const int16_t *filter_y, |
| 1251 int filter_y_stride, |
| 1252 int w, int h) { |
| 1253 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
| 1254 filter_x_stride, filter_y, filter_y_stride, w, h, 1
0); |
| 1255 } |
| 1256 |
| 1257 void wrap_convolve8_avg_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1258 uint8_t *dst, ptrdiff_t dst_stride, |
| 1259 const int16_t *filter_x, |
| 1260 int filter_x_stride, |
| 1261 const int16_t *filter_y, |
| 1262 int filter_y_stride, |
| 1263 int w, int h) { |
| 1264 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
| 1265 filter_x_stride, filter_y, filter_y_stride, |
| 1266 w, h, 10); |
| 1267 } |
| 1268 |
| 1269 void wrap_convolve8_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1270 uint8_t *dst, ptrdiff_t dst_stride, |
| 1271 const int16_t *filter_x, |
| 1272 int filter_x_stride, |
| 1273 const int16_t *filter_y, |
| 1274 int filter_y_stride, |
| 1275 int w, int h) { |
| 1276 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, |
| 1277 filter_x_stride, filter_y, filter_y_stride, w, h, 10
); |
| 1278 } |
| 1279 |
| 1280 void wrap_convolve8_avg_vert_c_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1281 uint8_t *dst, ptrdiff_t dst_stride, |
| 1282 const int16_t *filter_x, |
| 1283 int filter_x_stride, |
| 1284 const int16_t *filter_y, |
| 1285 int filter_y_stride, |
| 1286 int w, int h) { |
| 1287 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, |
| 1288 filter_x_stride, filter_y, filter_y_stride, w, h
, 10); |
| 1289 } |
| 1290 |
| 1291 void wrap_convolve8_c_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1292 uint8_t *dst, ptrdiff_t dst_stride, |
| 1293 const int16_t *filter_x, |
| 1294 int filter_x_stride, |
| 1295 const int16_t *filter_y, |
| 1296 int filter_y_stride, |
| 1297 int w, int h) { |
| 1298 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, |
| 1299 filter_x_stride, filter_y, filter_y_stride, w, h, 10); |
| 1300 } |
| 1301 |
| 1302 void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride, |
| 1303 uint8_t *dst, ptrdiff_t dst_stride, |
| 1304 const int16_t *filter_x, |
| 1305 int filter_x_stride, |
| 1306 const int16_t *filter_y, |
| 1307 int filter_y_stride, |
| 1308 int w, int h) { |
| 1309 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, |
| 1310 filter_x_stride, filter_y, filter_y_stride, w, h, 10)
; |
| 1311 } |
| 1312 |
| 1313 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1314 uint8_t *dst, ptrdiff_t dst_stride, |
| 1315 const int16_t *filter_x, |
| 1316 int filter_x_stride, |
| 1317 const int16_t *filter_y, |
| 1318 int filter_y_stride, |
| 1319 int w, int h) { |
| 1320 vp9_high_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
| 1321 filter_x_stride, filter_y, filter_y_stride, |
| 1322 w, h, 12); |
| 1323 } |
| 1324 |
| 1325 void wrap_convolve8_avg_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1326 uint8_t *dst, ptrdiff_t dst_stride, |
| 1327 const int16_t *filter_x, |
| 1328 int filter_x_stride, |
| 1329 const int16_t *filter_y, |
| 1330 int filter_y_stride, |
| 1331 int w, int h) { |
| 1332 vp9_high_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x, |
| 1333 filter_x_stride, filter_y, filter_y_stride, |
| 1334 w, h, 12); |
| 1335 } |
| 1336 |
| 1337 void wrap_convolve8_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1338 uint8_t *dst, ptrdiff_t dst_stride, |
| 1339 const int16_t *filter_x, |
| 1340 int filter_x_stride, |
| 1341 const int16_t *filter_y, |
| 1342 int filter_y_stride, |
| 1343 int w, int h) { |
| 1344 vp9_high_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, |
| 1345 filter_x_stride, filter_y, filter_y_stride, |
| 1346 w, h, 12); |
| 1347 } |
| 1348 |
| 1349 void wrap_convolve8_avg_vert_c_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1350 uint8_t *dst, ptrdiff_t dst_stride, |
| 1351 const int16_t *filter_x, |
| 1352 int filter_x_stride, |
| 1353 const int16_t *filter_y, |
| 1354 int filter_y_stride, |
| 1355 int w, int h) { |
| 1356 vp9_high_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x, |
| 1357 filter_x_stride, filter_y, filter_y_stride, |
| 1358 w, h, 12); |
| 1359 } |
| 1360 |
| 1361 void wrap_convolve8_c_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1362 uint8_t *dst, ptrdiff_t dst_stride, |
| 1363 const int16_t *filter_x, |
| 1364 int filter_x_stride, |
| 1365 const int16_t *filter_y, |
| 1366 int filter_y_stride, |
| 1367 int w, int h) { |
| 1368 vp9_high_convolve8_c(src, src_stride, dst, dst_stride, filter_x, |
| 1369 filter_x_stride, filter_y, filter_y_stride, |
| 1370 w, h, 12); |
| 1371 } |
| 1372 |
| 1373 void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride, |
| 1374 uint8_t *dst, ptrdiff_t dst_stride, |
| 1375 const int16_t *filter_x, |
| 1376 int filter_x_stride, |
| 1377 const int16_t *filter_y, |
| 1378 int filter_y_stride, |
| 1379 int w, int h) { |
| 1380 vp9_high_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, |
| 1381 filter_x_stride, filter_y, filter_y_stride, |
| 1382 w, h, 12); |
| 1383 } |
| 1384 |
| 1385 const ConvolveFunctions convolve8_c( |
| 1386 wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, |
| 1387 wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, |
| 1388 wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); |
| 1389 INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values( |
| 1390 make_tuple(4, 4, &convolve8_c), |
| 1391 make_tuple(8, 4, &convolve8_c), |
| 1392 make_tuple(4, 8, &convolve8_c), |
| 1393 make_tuple(8, 8, &convolve8_c), |
| 1394 make_tuple(16, 8, &convolve8_c), |
| 1395 make_tuple(8, 16, &convolve8_c), |
| 1396 make_tuple(16, 16, &convolve8_c), |
| 1397 make_tuple(32, 16, &convolve8_c), |
| 1398 make_tuple(16, 32, &convolve8_c), |
| 1399 make_tuple(32, 32, &convolve8_c), |
| 1400 make_tuple(64, 32, &convolve8_c), |
| 1401 make_tuple(32, 64, &convolve8_c), |
| 1402 make_tuple(64, 64, &convolve8_c))); |
| 1403 const ConvolveFunctions convolve10_c( |
| 1404 wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, |
| 1405 wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, |
| 1406 wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); |
| 1407 INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values( |
| 1408 make_tuple(4, 4, &convolve10_c), |
| 1409 make_tuple(8, 4, &convolve10_c), |
| 1410 make_tuple(4, 8, &convolve10_c), |
| 1411 make_tuple(8, 8, &convolve10_c), |
| 1412 make_tuple(16, 8, &convolve10_c), |
| 1413 make_tuple(8, 16, &convolve10_c), |
| 1414 make_tuple(16, 16, &convolve10_c), |
| 1415 make_tuple(32, 16, &convolve10_c), |
| 1416 make_tuple(16, 32, &convolve10_c), |
| 1417 make_tuple(32, 32, &convolve10_c), |
| 1418 make_tuple(64, 32, &convolve10_c), |
| 1419 make_tuple(32, 64, &convolve10_c), |
| 1420 make_tuple(64, 64, &convolve10_c))); |
| 1421 const ConvolveFunctions convolve12_c( |
| 1422 wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, |
| 1423 wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, |
| 1424 wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); |
| 1425 INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values( |
| 1426 make_tuple(4, 4, &convolve12_c), |
| 1427 make_tuple(8, 4, &convolve12_c), |
| 1428 make_tuple(4, 8, &convolve12_c), |
| 1429 make_tuple(8, 8, &convolve12_c), |
| 1430 make_tuple(16, 8, &convolve12_c), |
| 1431 make_tuple(8, 16, &convolve12_c), |
| 1432 make_tuple(16, 16, &convolve12_c), |
| 1433 make_tuple(32, 16, &convolve12_c), |
| 1434 make_tuple(16, 32, &convolve12_c), |
| 1435 make_tuple(32, 32, &convolve12_c), |
| 1436 make_tuple(64, 32, &convolve12_c), |
| 1437 make_tuple(32, 64, &convolve12_c), |
| 1438 make_tuple(64, 64, &convolve12_c))); |
| 1439 |
| 1440 #else |
| 1441 |
584 const ConvolveFunctions convolve8_c( | 1442 const ConvolveFunctions convolve8_c( |
585 vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c, | 1443 vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c, |
586 vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c, | 1444 vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c, |
587 vp9_convolve8_c, vp9_convolve8_avg_c); | 1445 vp9_convolve8_c, vp9_convolve8_avg_c, 0); |
588 | 1446 |
589 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( | 1447 INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values( |
590 make_tuple(4, 4, &convolve8_c), | 1448 make_tuple(4, 4, &convolve8_c), |
591 make_tuple(8, 4, &convolve8_c), | 1449 make_tuple(8, 4, &convolve8_c), |
592 make_tuple(4, 8, &convolve8_c), | 1450 make_tuple(4, 8, &convolve8_c), |
593 make_tuple(8, 8, &convolve8_c), | 1451 make_tuple(8, 8, &convolve8_c), |
594 make_tuple(16, 8, &convolve8_c), | 1452 make_tuple(16, 8, &convolve8_c), |
595 make_tuple(8, 16, &convolve8_c), | 1453 make_tuple(8, 16, &convolve8_c), |
596 make_tuple(16, 16, &convolve8_c), | 1454 make_tuple(16, 16, &convolve8_c), |
597 make_tuple(32, 16, &convolve8_c), | 1455 make_tuple(32, 16, &convolve8_c), |
598 make_tuple(16, 32, &convolve8_c), | 1456 make_tuple(16, 32, &convolve8_c), |
599 make_tuple(32, 32, &convolve8_c), | 1457 make_tuple(32, 32, &convolve8_c), |
600 make_tuple(64, 32, &convolve8_c), | 1458 make_tuple(64, 32, &convolve8_c), |
601 make_tuple(32, 64, &convolve8_c), | 1459 make_tuple(32, 64, &convolve8_c), |
602 make_tuple(64, 64, &convolve8_c))); | 1460 make_tuple(64, 64, &convolve8_c))); |
| 1461 #endif |
603 | 1462 |
604 #if HAVE_SSE2 | 1463 #if HAVE_SSE2 && ARCH_X86_64 |
| 1464 #if CONFIG_VP9_HIGHBITDEPTH |
| 1465 const ConvolveFunctions convolve8_sse2( |
| 1466 wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, |
| 1467 wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, |
| 1468 wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); |
| 1469 INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values( |
| 1470 make_tuple(4, 4, &convolve8_sse2), |
| 1471 make_tuple(8, 4, &convolve8_sse2), |
| 1472 make_tuple(4, 8, &convolve8_sse2), |
| 1473 make_tuple(8, 8, &convolve8_sse2), |
| 1474 make_tuple(16, 8, &convolve8_sse2), |
| 1475 make_tuple(8, 16, &convolve8_sse2), |
| 1476 make_tuple(16, 16, &convolve8_sse2), |
| 1477 make_tuple(32, 16, &convolve8_sse2), |
| 1478 make_tuple(16, 32, &convolve8_sse2), |
| 1479 make_tuple(32, 32, &convolve8_sse2), |
| 1480 make_tuple(64, 32, &convolve8_sse2), |
| 1481 make_tuple(32, 64, &convolve8_sse2), |
| 1482 make_tuple(64, 64, &convolve8_sse2))); |
| 1483 const ConvolveFunctions convolve10_sse2( |
| 1484 wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, |
| 1485 wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, |
| 1486 wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); |
| 1487 INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values( |
| 1488 make_tuple(4, 4, &convolve10_sse2), |
| 1489 make_tuple(8, 4, &convolve10_sse2), |
| 1490 make_tuple(4, 8, &convolve10_sse2), |
| 1491 make_tuple(8, 8, &convolve10_sse2), |
| 1492 make_tuple(16, 8, &convolve10_sse2), |
| 1493 make_tuple(8, 16, &convolve10_sse2), |
| 1494 make_tuple(16, 16, &convolve10_sse2), |
| 1495 make_tuple(32, 16, &convolve10_sse2), |
| 1496 make_tuple(16, 32, &convolve10_sse2), |
| 1497 make_tuple(32, 32, &convolve10_sse2), |
| 1498 make_tuple(64, 32, &convolve10_sse2), |
| 1499 make_tuple(32, 64, &convolve10_sse2), |
| 1500 make_tuple(64, 64, &convolve10_sse2))); |
| 1501 const ConvolveFunctions convolve12_sse2( |
| 1502 wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, |
| 1503 wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, |
| 1504 wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); |
| 1505 INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values( |
| 1506 make_tuple(4, 4, &convolve12_sse2), |
| 1507 make_tuple(8, 4, &convolve12_sse2), |
| 1508 make_tuple(4, 8, &convolve12_sse2), |
| 1509 make_tuple(8, 8, &convolve12_sse2), |
| 1510 make_tuple(16, 8, &convolve12_sse2), |
| 1511 make_tuple(8, 16, &convolve12_sse2), |
| 1512 make_tuple(16, 16, &convolve12_sse2), |
| 1513 make_tuple(32, 16, &convolve12_sse2), |
| 1514 make_tuple(16, 32, &convolve12_sse2), |
| 1515 make_tuple(32, 32, &convolve12_sse2), |
| 1516 make_tuple(64, 32, &convolve12_sse2), |
| 1517 make_tuple(32, 64, &convolve12_sse2), |
| 1518 make_tuple(64, 64, &convolve12_sse2))); |
| 1519 #else |
605 const ConvolveFunctions convolve8_sse2( | 1520 const ConvolveFunctions convolve8_sse2( |
606 vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2, | 1521 vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2, |
607 vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2, | 1522 vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2, |
608 vp9_convolve8_sse2, vp9_convolve8_avg_sse2); | 1523 vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0); |
609 | 1524 |
610 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( | 1525 INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values( |
611 make_tuple(4, 4, &convolve8_sse2), | 1526 make_tuple(4, 4, &convolve8_sse2), |
612 make_tuple(8, 4, &convolve8_sse2), | 1527 make_tuple(8, 4, &convolve8_sse2), |
613 make_tuple(4, 8, &convolve8_sse2), | 1528 make_tuple(4, 8, &convolve8_sse2), |
614 make_tuple(8, 8, &convolve8_sse2), | 1529 make_tuple(8, 8, &convolve8_sse2), |
615 make_tuple(16, 8, &convolve8_sse2), | 1530 make_tuple(16, 8, &convolve8_sse2), |
616 make_tuple(8, 16, &convolve8_sse2), | 1531 make_tuple(8, 16, &convolve8_sse2), |
617 make_tuple(16, 16, &convolve8_sse2), | 1532 make_tuple(16, 16, &convolve8_sse2), |
618 make_tuple(32, 16, &convolve8_sse2), | 1533 make_tuple(32, 16, &convolve8_sse2), |
619 make_tuple(16, 32, &convolve8_sse2), | 1534 make_tuple(16, 32, &convolve8_sse2), |
620 make_tuple(32, 32, &convolve8_sse2), | 1535 make_tuple(32, 32, &convolve8_sse2), |
621 make_tuple(64, 32, &convolve8_sse2), | 1536 make_tuple(64, 32, &convolve8_sse2), |
622 make_tuple(32, 64, &convolve8_sse2), | 1537 make_tuple(32, 64, &convolve8_sse2), |
623 make_tuple(64, 64, &convolve8_sse2))); | 1538 make_tuple(64, 64, &convolve8_sse2))); |
| 1539 #endif // CONFIG_VP9_HIGHBITDEPTH |
624 #endif | 1540 #endif |
625 | 1541 |
626 #if HAVE_SSSE3 | 1542 #if HAVE_SSSE3 |
627 const ConvolveFunctions convolve8_ssse3( | 1543 const ConvolveFunctions convolve8_ssse3( |
628 vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, | 1544 vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3, |
629 vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, | 1545 vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3, |
630 vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3); | 1546 vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0); |
631 | 1547 |
632 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( | 1548 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values( |
633 make_tuple(4, 4, &convolve8_ssse3), | 1549 make_tuple(4, 4, &convolve8_ssse3), |
634 make_tuple(8, 4, &convolve8_ssse3), | 1550 make_tuple(8, 4, &convolve8_ssse3), |
635 make_tuple(4, 8, &convolve8_ssse3), | 1551 make_tuple(4, 8, &convolve8_ssse3), |
636 make_tuple(8, 8, &convolve8_ssse3), | 1552 make_tuple(8, 8, &convolve8_ssse3), |
637 make_tuple(16, 8, &convolve8_ssse3), | 1553 make_tuple(16, 8, &convolve8_ssse3), |
638 make_tuple(8, 16, &convolve8_ssse3), | 1554 make_tuple(8, 16, &convolve8_ssse3), |
639 make_tuple(16, 16, &convolve8_ssse3), | 1555 make_tuple(16, 16, &convolve8_ssse3), |
640 make_tuple(32, 16, &convolve8_ssse3), | 1556 make_tuple(32, 16, &convolve8_ssse3), |
641 make_tuple(16, 32, &convolve8_ssse3), | 1557 make_tuple(16, 32, &convolve8_ssse3), |
642 make_tuple(32, 32, &convolve8_ssse3), | 1558 make_tuple(32, 32, &convolve8_ssse3), |
643 make_tuple(64, 32, &convolve8_ssse3), | 1559 make_tuple(64, 32, &convolve8_ssse3), |
644 make_tuple(32, 64, &convolve8_ssse3), | 1560 make_tuple(32, 64, &convolve8_ssse3), |
645 make_tuple(64, 64, &convolve8_ssse3))); | 1561 make_tuple(64, 64, &convolve8_ssse3))); |
646 #endif | 1562 #endif |
647 | 1563 |
648 #if HAVE_AVX2 | 1564 #if HAVE_AVX2 && HAVE_SSSE3 |
649 const ConvolveFunctions convolve8_avx2( | 1565 const ConvolveFunctions convolve8_avx2( |
650 vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, | 1566 vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3, |
651 vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, | 1567 vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3, |
652 vp9_convolve8_avx2, vp9_convolve8_avg_ssse3); | 1568 vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0); |
653 | 1569 |
654 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( | 1570 INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values( |
655 make_tuple(4, 4, &convolve8_avx2), | 1571 make_tuple(4, 4, &convolve8_avx2), |
656 make_tuple(8, 4, &convolve8_avx2), | 1572 make_tuple(8, 4, &convolve8_avx2), |
657 make_tuple(4, 8, &convolve8_avx2), | 1573 make_tuple(4, 8, &convolve8_avx2), |
658 make_tuple(8, 8, &convolve8_avx2), | 1574 make_tuple(8, 8, &convolve8_avx2), |
659 make_tuple(8, 16, &convolve8_avx2), | 1575 make_tuple(8, 16, &convolve8_avx2), |
660 make_tuple(16, 8, &convolve8_avx2), | 1576 make_tuple(16, 8, &convolve8_avx2), |
661 make_tuple(16, 16, &convolve8_avx2), | 1577 make_tuple(16, 16, &convolve8_avx2), |
662 make_tuple(32, 16, &convolve8_avx2), | 1578 make_tuple(32, 16, &convolve8_avx2), |
663 make_tuple(16, 32, &convolve8_avx2), | 1579 make_tuple(16, 32, &convolve8_avx2), |
664 make_tuple(32, 32, &convolve8_avx2), | 1580 make_tuple(32, 32, &convolve8_avx2), |
665 make_tuple(64, 32, &convolve8_avx2), | 1581 make_tuple(64, 32, &convolve8_avx2), |
666 make_tuple(32, 64, &convolve8_avx2), | 1582 make_tuple(32, 64, &convolve8_avx2), |
667 make_tuple(64, 64, &convolve8_avx2))); | 1583 make_tuple(64, 64, &convolve8_avx2))); |
668 #endif | 1584 #endif // HAVE_AVX2 && HAVE_SSSE3 |
669 | 1585 |
670 #if HAVE_NEON_ASM | 1586 #if HAVE_NEON_ASM |
671 const ConvolveFunctions convolve8_neon( | 1587 const ConvolveFunctions convolve8_neon( |
672 vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, | 1588 vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon, |
673 vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, | 1589 vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon, |
674 vp9_convolve8_neon, vp9_convolve8_avg_neon); | 1590 vp9_convolve8_neon, vp9_convolve8_avg_neon, 0); |
675 | 1591 |
676 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( | 1592 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values( |
677 make_tuple(4, 4, &convolve8_neon), | 1593 make_tuple(4, 4, &convolve8_neon), |
678 make_tuple(8, 4, &convolve8_neon), | 1594 make_tuple(8, 4, &convolve8_neon), |
679 make_tuple(4, 8, &convolve8_neon), | 1595 make_tuple(4, 8, &convolve8_neon), |
680 make_tuple(8, 8, &convolve8_neon), | 1596 make_tuple(8, 8, &convolve8_neon), |
681 make_tuple(16, 8, &convolve8_neon), | 1597 make_tuple(16, 8, &convolve8_neon), |
682 make_tuple(8, 16, &convolve8_neon), | 1598 make_tuple(8, 16, &convolve8_neon), |
683 make_tuple(16, 16, &convolve8_neon), | 1599 make_tuple(16, 16, &convolve8_neon), |
684 make_tuple(32, 16, &convolve8_neon), | 1600 make_tuple(32, 16, &convolve8_neon), |
685 make_tuple(16, 32, &convolve8_neon), | 1601 make_tuple(16, 32, &convolve8_neon), |
686 make_tuple(32, 32, &convolve8_neon), | 1602 make_tuple(32, 32, &convolve8_neon), |
687 make_tuple(64, 32, &convolve8_neon), | 1603 make_tuple(64, 32, &convolve8_neon), |
688 make_tuple(32, 64, &convolve8_neon), | 1604 make_tuple(32, 64, &convolve8_neon), |
689 make_tuple(64, 64, &convolve8_neon))); | 1605 make_tuple(64, 64, &convolve8_neon))); |
690 #endif | 1606 #endif |
691 | 1607 |
692 #if HAVE_DSPR2 | 1608 #if HAVE_DSPR2 |
693 const ConvolveFunctions convolve8_dspr2( | 1609 const ConvolveFunctions convolve8_dspr2( |
694 vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2, | 1610 vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2, |
695 vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2, | 1611 vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2, |
696 vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2); | 1612 vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0); |
697 | 1613 |
698 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values( | 1614 INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values( |
699 make_tuple(4, 4, &convolve8_dspr2), | 1615 make_tuple(4, 4, &convolve8_dspr2), |
700 make_tuple(8, 4, &convolve8_dspr2), | 1616 make_tuple(8, 4, &convolve8_dspr2), |
701 make_tuple(4, 8, &convolve8_dspr2), | 1617 make_tuple(4, 8, &convolve8_dspr2), |
702 make_tuple(8, 8, &convolve8_dspr2), | 1618 make_tuple(8, 8, &convolve8_dspr2), |
703 make_tuple(16, 8, &convolve8_dspr2), | 1619 make_tuple(16, 8, &convolve8_dspr2), |
704 make_tuple(8, 16, &convolve8_dspr2), | 1620 make_tuple(8, 16, &convolve8_dspr2), |
705 make_tuple(16, 16, &convolve8_dspr2), | 1621 make_tuple(16, 16, &convolve8_dspr2), |
706 make_tuple(32, 16, &convolve8_dspr2), | 1622 make_tuple(32, 16, &convolve8_dspr2), |
707 make_tuple(16, 32, &convolve8_dspr2), | 1623 make_tuple(16, 32, &convolve8_dspr2), |
708 make_tuple(32, 32, &convolve8_dspr2), | 1624 make_tuple(32, 32, &convolve8_dspr2), |
709 make_tuple(64, 32, &convolve8_dspr2), | 1625 make_tuple(64, 32, &convolve8_dspr2), |
710 make_tuple(32, 64, &convolve8_dspr2), | 1626 make_tuple(32, 64, &convolve8_dspr2), |
711 make_tuple(64, 64, &convolve8_dspr2))); | 1627 make_tuple(64, 64, &convolve8_dspr2))); |
712 #endif | 1628 #endif |
713 } // namespace | 1629 } // namespace |
OLD | NEW |