| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "./vp9_rtcd.h" | 11 #include "./vp9_rtcd.h" |
| 12 #include "./vpx_dsp_rtcd.h" |
| 12 | 13 |
| 13 #include "vpx_ports/mem.h" | 14 #include "vpx_ports/mem.h" |
| 14 #include "vpx/vpx_integer.h" | 15 #include "vpx/vpx_integer.h" |
| 15 | 16 |
| 16 #include "vp9/common/vp9_common.h" | 17 #include "vp9/common/vp9_common.h" |
| 17 #include "vp9/common/vp9_filter.h" | 18 #include "vp9/common/vp9_filter.h" |
| 18 | 19 |
| 19 #include "vp9/encoder/vp9_variance.h" | 20 #include "vp9/encoder/vp9_variance.h" |
| 20 | 21 |
| 21 void variance(const uint8_t *a, int a_stride, | |
| 22 const uint8_t *b, int b_stride, | |
| 23 int w, int h, unsigned int *sse, int *sum) { | |
| 24 int i, j; | |
| 25 | |
| 26 *sum = 0; | |
| 27 *sse = 0; | |
| 28 | |
| 29 for (i = 0; i < h; i++) { | |
| 30 for (j = 0; j < w; j++) { | |
| 31 const int diff = a[j] - b[j]; | |
| 32 *sum += diff; | |
| 33 *sse += diff * diff; | |
| 34 } | |
| 35 | |
| 36 a += a_stride; | |
| 37 b += b_stride; | |
| 38 } | |
| 39 } | |
| 40 | |
| 41 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal | 22 // Applies a 1-D 2-tap bi-linear filter to the source block in either horizontal |
| 42 // or vertical direction to produce the filtered output block. Used to implement | 23 // or vertical direction to produce the filtered output block. Used to implement |
| 43 // first-pass of 2-D separable filter. | 24 // first-pass of 2-D separable filter. |
| 44 // | 25 // |
| 45 // Produces int32_t output to retain precision for next pass. Two filter taps | 26 // Produces int32_t output to retain precision for next pass. Two filter taps |
| 46 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is | 27 // should sum to VP9_FILTER_WEIGHT. pixel_step defines whether the filter is |
| 47 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It | 28 // applied horizontally (pixel_step=1) or vertically (pixel_step=stride). It |
| 48 // defines the offset required to move from one input to the next. | 29 // defines the offset required to move from one input to the next. |
| 49 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, | 30 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, |
| 50 uint16_t *output_ptr, | 31 uint16_t *output_ptr, |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 93 (int)src_ptr[pixel_step] * vp9_filter[1], | 74 (int)src_ptr[pixel_step] * vp9_filter[1], |
| 94 FILTER_BITS); | 75 FILTER_BITS); |
| 95 src_ptr++; | 76 src_ptr++; |
| 96 } | 77 } |
| 97 | 78 |
| 98 src_ptr += src_pixels_per_line - output_width; | 79 src_ptr += src_pixels_per_line - output_width; |
| 99 output_ptr += output_width; | 80 output_ptr += output_width; |
| 100 } | 81 } |
| 101 } | 82 } |
| 102 | 83 |
| 103 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { | |
| 104 unsigned int i, sum = 0; | |
| 105 | |
| 106 for (i = 0; i < 256; ++i) { | |
| 107 sum += src_ptr[i] * src_ptr[i]; | |
| 108 } | |
| 109 | |
| 110 return sum; | |
| 111 } | |
| 112 | |
| 113 #define VAR(W, H) \ | |
| 114 unsigned int vp9_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ | |
| 115 const uint8_t *b, int b_stride, \ | |
| 116 unsigned int *sse) { \ | |
| 117 int sum; \ | |
| 118 variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ | |
| 119 return *sse - (((int64_t)sum * sum) / (W * H)); \ | |
| 120 } | |
| 121 | |
| 122 #define SUBPIX_VAR(W, H) \ | 84 #define SUBPIX_VAR(W, H) \ |
| 123 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ | 85 unsigned int vp9_sub_pixel_variance##W##x##H##_c( \ |
| 124 const uint8_t *src, int src_stride, \ | 86 const uint8_t *src, int src_stride, \ |
| 125 int xoffset, int yoffset, \ | 87 int xoffset, int yoffset, \ |
| 126 const uint8_t *dst, int dst_stride, \ | 88 const uint8_t *dst, int dst_stride, \ |
| 127 unsigned int *sse) { \ | 89 unsigned int *sse) { \ |
| 128 uint16_t fdata3[(H + 1) * W]; \ | 90 uint16_t fdata3[(H + 1) * W]; \ |
| 129 uint8_t temp2[H * W]; \ | 91 uint8_t temp2[H * W]; \ |
| 130 \ | 92 \ |
| 131 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ | 93 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ |
| 132 BILINEAR_FILTERS_2TAP(xoffset)); \ | 94 BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 133 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 95 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 134 BILINEAR_FILTERS_2TAP(yoffset)); \ | 96 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 135 \ | 97 \ |
| 136 return vp9_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ | 98 return vpx_variance##W##x##H##_c(temp2, W, dst, dst_stride, sse); \ |
| 137 } | 99 } |
| 138 | 100 |
| 139 #define SUBPIX_AVG_VAR(W, H) \ | 101 #define SUBPIX_AVG_VAR(W, H) \ |
| 140 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ | 102 unsigned int vp9_sub_pixel_avg_variance##W##x##H##_c( \ |
| 141 const uint8_t *src, int src_stride, \ | 103 const uint8_t *src, int src_stride, \ |
| 142 int xoffset, int yoffset, \ | 104 int xoffset, int yoffset, \ |
| 143 const uint8_t *dst, int dst_stride, \ | 105 const uint8_t *dst, int dst_stride, \ |
| 144 unsigned int *sse, \ | 106 unsigned int *sse, \ |
| 145 const uint8_t *second_pred) { \ | 107 const uint8_t *second_pred) { \ |
| 146 uint16_t fdata3[(H + 1) * W]; \ | 108 uint16_t fdata3[(H + 1) * W]; \ |
| 147 uint8_t temp2[H * W]; \ | 109 uint8_t temp2[H * W]; \ |
| 148 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ | 110 DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ |
| 149 \ | 111 \ |
| 150 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ | 112 var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, W, \ |
| 151 BILINEAR_FILTERS_2TAP(xoffset)); \ | 113 BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 152 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 114 var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 153 BILINEAR_FILTERS_2TAP(yoffset)); \ | 115 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 154 \ | 116 \ |
| 155 vp9_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ | 117 vpx_comp_avg_pred(temp3, second_pred, W, H, temp2, W); \ |
| 156 \ | 118 \ |
| 157 return vp9_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ | 119 return vpx_variance##W##x##H##_c(temp3, W, dst, dst_stride, sse); \ |
| 158 } | 120 } |
| 159 | 121 |
| 160 void vp9_get16x16var_c(const uint8_t *src_ptr, int source_stride, | |
| 161 const uint8_t *ref_ptr, int ref_stride, | |
| 162 unsigned int *sse, int *sum) { | |
| 163 variance(src_ptr, source_stride, ref_ptr, ref_stride, 16, 16, sse, sum); | |
| 164 } | |
| 165 | |
| 166 void vp9_get8x8var_c(const uint8_t *src_ptr, int source_stride, | |
| 167 const uint8_t *ref_ptr, int ref_stride, | |
| 168 unsigned int *sse, int *sum) { | |
| 169 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); | |
| 170 } | |
| 171 | |
| 172 unsigned int vp9_mse16x16_c(const uint8_t *src, int src_stride, | |
| 173 const uint8_t *ref, int ref_stride, | |
| 174 unsigned int *sse) { | |
| 175 int sum; | |
| 176 variance(src, src_stride, ref, ref_stride, 16, 16, sse, &sum); | |
| 177 return *sse; | |
| 178 } | |
| 179 | |
| 180 unsigned int vp9_mse16x8_c(const uint8_t *src, int src_stride, | |
| 181 const uint8_t *ref, int ref_stride, | |
| 182 unsigned int *sse) { | |
| 183 int sum; | |
| 184 variance(src, src_stride, ref, ref_stride, 16, 8, sse, &sum); | |
| 185 return *sse; | |
| 186 } | |
| 187 | |
| 188 unsigned int vp9_mse8x16_c(const uint8_t *src, int src_stride, | |
| 189 const uint8_t *ref, int ref_stride, | |
| 190 unsigned int *sse) { | |
| 191 int sum; | |
| 192 variance(src, src_stride, ref, ref_stride, 8, 16, sse, &sum); | |
| 193 return *sse; | |
| 194 } | |
| 195 | |
| 196 unsigned int vp9_mse8x8_c(const uint8_t *src, int src_stride, | |
| 197 const uint8_t *ref, int ref_stride, | |
| 198 unsigned int *sse) { | |
| 199 int sum; | |
| 200 variance(src, src_stride, ref, ref_stride, 8, 8, sse, &sum); | |
| 201 return *sse; | |
| 202 } | |
| 203 | |
| 204 VAR(4, 4) | |
| 205 SUBPIX_VAR(4, 4) | 122 SUBPIX_VAR(4, 4) |
| 206 SUBPIX_AVG_VAR(4, 4) | 123 SUBPIX_AVG_VAR(4, 4) |
| 207 | 124 |
| 208 VAR(4, 8) | |
| 209 SUBPIX_VAR(4, 8) | 125 SUBPIX_VAR(4, 8) |
| 210 SUBPIX_AVG_VAR(4, 8) | 126 SUBPIX_AVG_VAR(4, 8) |
| 211 | 127 |
| 212 VAR(8, 4) | |
| 213 SUBPIX_VAR(8, 4) | 128 SUBPIX_VAR(8, 4) |
| 214 SUBPIX_AVG_VAR(8, 4) | 129 SUBPIX_AVG_VAR(8, 4) |
| 215 | 130 |
| 216 VAR(8, 8) | |
| 217 SUBPIX_VAR(8, 8) | 131 SUBPIX_VAR(8, 8) |
| 218 SUBPIX_AVG_VAR(8, 8) | 132 SUBPIX_AVG_VAR(8, 8) |
| 219 | 133 |
| 220 VAR(8, 16) | |
| 221 SUBPIX_VAR(8, 16) | 134 SUBPIX_VAR(8, 16) |
| 222 SUBPIX_AVG_VAR(8, 16) | 135 SUBPIX_AVG_VAR(8, 16) |
| 223 | 136 |
| 224 VAR(16, 8) | |
| 225 SUBPIX_VAR(16, 8) | 137 SUBPIX_VAR(16, 8) |
| 226 SUBPIX_AVG_VAR(16, 8) | 138 SUBPIX_AVG_VAR(16, 8) |
| 227 | 139 |
| 228 VAR(16, 16) | |
| 229 SUBPIX_VAR(16, 16) | 140 SUBPIX_VAR(16, 16) |
| 230 SUBPIX_AVG_VAR(16, 16) | 141 SUBPIX_AVG_VAR(16, 16) |
| 231 | 142 |
| 232 VAR(16, 32) | |
| 233 SUBPIX_VAR(16, 32) | 143 SUBPIX_VAR(16, 32) |
| 234 SUBPIX_AVG_VAR(16, 32) | 144 SUBPIX_AVG_VAR(16, 32) |
| 235 | 145 |
| 236 VAR(32, 16) | |
| 237 SUBPIX_VAR(32, 16) | 146 SUBPIX_VAR(32, 16) |
| 238 SUBPIX_AVG_VAR(32, 16) | 147 SUBPIX_AVG_VAR(32, 16) |
| 239 | 148 |
| 240 VAR(32, 32) | |
| 241 SUBPIX_VAR(32, 32) | 149 SUBPIX_VAR(32, 32) |
| 242 SUBPIX_AVG_VAR(32, 32) | 150 SUBPIX_AVG_VAR(32, 32) |
| 243 | 151 |
| 244 VAR(32, 64) | |
| 245 SUBPIX_VAR(32, 64) | 152 SUBPIX_VAR(32, 64) |
| 246 SUBPIX_AVG_VAR(32, 64) | 153 SUBPIX_AVG_VAR(32, 64) |
| 247 | 154 |
| 248 VAR(64, 32) | |
| 249 SUBPIX_VAR(64, 32) | 155 SUBPIX_VAR(64, 32) |
| 250 SUBPIX_AVG_VAR(64, 32) | 156 SUBPIX_AVG_VAR(64, 32) |
| 251 | 157 |
| 252 VAR(64, 64) | |
| 253 SUBPIX_VAR(64, 64) | 158 SUBPIX_VAR(64, 64) |
| 254 SUBPIX_AVG_VAR(64, 64) | 159 SUBPIX_AVG_VAR(64, 64) |
| 255 | 160 |
| 256 void vp9_comp_avg_pred(uint8_t *comp_pred, const uint8_t *pred, int width, | |
| 257 int height, const uint8_t *ref, int ref_stride) { | |
| 258 int i, j; | |
| 259 | |
| 260 for (i = 0; i < height; i++) { | |
| 261 for (j = 0; j < width; j++) { | |
| 262 const int tmp = pred[j] + ref[j]; | |
| 263 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); | |
| 264 } | |
| 265 comp_pred += width; | |
| 266 pred += width; | |
| 267 ref += ref_stride; | |
| 268 } | |
| 269 } | |
| 270 | |
| 271 #if CONFIG_VP9_HIGHBITDEPTH | 161 #if CONFIG_VP9_HIGHBITDEPTH |
| 272 void highbd_variance64(const uint8_t *a8, int a_stride, | |
| 273 const uint8_t *b8, int b_stride, | |
| 274 int w, int h, uint64_t *sse, | |
| 275 uint64_t *sum) { | |
| 276 int i, j; | |
| 277 | |
| 278 uint16_t *a = CONVERT_TO_SHORTPTR(a8); | |
| 279 uint16_t *b = CONVERT_TO_SHORTPTR(b8); | |
| 280 *sum = 0; | |
| 281 *sse = 0; | |
| 282 | |
| 283 for (i = 0; i < h; i++) { | |
| 284 for (j = 0; j < w; j++) { | |
| 285 const int diff = a[j] - b[j]; | |
| 286 *sum += diff; | |
| 287 *sse += diff * diff; | |
| 288 } | |
| 289 a += a_stride; | |
| 290 b += b_stride; | |
| 291 } | |
| 292 } | |
| 293 | |
| 294 void highbd_variance(const uint8_t *a8, int a_stride, | |
| 295 const uint8_t *b8, int b_stride, | |
| 296 int w, int h, unsigned int *sse, | |
| 297 int *sum) { | |
| 298 uint64_t sse_long = 0; | |
| 299 uint64_t sum_long = 0; | |
| 300 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); | |
| 301 *sse = (unsigned int)sse_long; | |
| 302 *sum = (int)sum_long; | |
| 303 } | |
| 304 | |
| 305 void highbd_10_variance(const uint8_t *a8, int a_stride, | |
| 306 const uint8_t *b8, int b_stride, | |
| 307 int w, int h, unsigned int *sse, | |
| 308 int *sum) { | |
| 309 uint64_t sse_long = 0; | |
| 310 uint64_t sum_long = 0; | |
| 311 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); | |
| 312 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); | |
| 313 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 4); | |
| 314 } | |
| 315 | |
| 316 void highbd_12_variance(const uint8_t *a8, int a_stride, | |
| 317 const uint8_t *b8, int b_stride, | |
| 318 int w, int h, unsigned int *sse, | |
| 319 int *sum) { | |
| 320 uint64_t sse_long = 0; | |
| 321 uint64_t sum_long = 0; | |
| 322 highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); | |
| 323 *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); | |
| 324 *sse = (unsigned int)ROUND_POWER_OF_TWO(sse_long, 8); | |
| 325 } | |
| 326 | |
| 327 static void highbd_var_filter_block2d_bil_first_pass( | 162 static void highbd_var_filter_block2d_bil_first_pass( |
| 328 const uint8_t *src_ptr8, | 163 const uint8_t *src_ptr8, |
| 329 uint16_t *output_ptr, | 164 uint16_t *output_ptr, |
| 330 unsigned int src_pixels_per_line, | 165 unsigned int src_pixels_per_line, |
| 331 int pixel_step, | 166 int pixel_step, |
| 332 unsigned int output_height, | 167 unsigned int output_height, |
| 333 unsigned int output_width, | 168 unsigned int output_width, |
| 334 const int16_t *vp9_filter) { | 169 const int16_t *vp9_filter) { |
| 335 unsigned int i, j; | 170 unsigned int i, j; |
| 336 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); | 171 uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); |
| (...skipping 30 matching lines...) Expand all Loading... |
| 367 (int)src_ptr[pixel_step] * vp9_filter[1], | 202 (int)src_ptr[pixel_step] * vp9_filter[1], |
| 368 FILTER_BITS); | 203 FILTER_BITS); |
| 369 src_ptr++; | 204 src_ptr++; |
| 370 } | 205 } |
| 371 | 206 |
| 372 src_ptr += src_pixels_per_line - output_width; | 207 src_ptr += src_pixels_per_line - output_width; |
| 373 output_ptr += output_width; | 208 output_ptr += output_width; |
| 374 } | 209 } |
| 375 } | 210 } |
| 376 | 211 |
| 377 #define HIGHBD_VAR(W, H) \ | |
| 378 unsigned int vp9_highbd_variance##W##x##H##_c(const uint8_t *a, int a_stride, \ | |
| 379 const uint8_t *b, int b_stride, \ | |
| 380 unsigned int *sse) { \ | |
| 381 int sum; \ | |
| 382 highbd_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ | |
| 383 return *sse - (((int64_t)sum * sum) / (W * H)); \ | |
| 384 } \ | |
| 385 \ | |
| 386 unsigned int vp9_highbd_10_variance##W##x##H##_c(const uint8_t *a, \ | |
| 387 int a_stride, \ | |
| 388 const uint8_t *b, \ | |
| 389 int b_stride, \ | |
| 390 unsigned int *sse) { \ | |
| 391 int sum; \ | |
| 392 highbd_10_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ | |
| 393 return *sse - (((int64_t)sum * sum) / (W * H)); \ | |
| 394 } \ | |
| 395 \ | |
| 396 unsigned int vp9_highbd_12_variance##W##x##H##_c(const uint8_t *a, \ | |
| 397 int a_stride, \ | |
| 398 const uint8_t *b, \ | |
| 399 int b_stride, \ | |
| 400 unsigned int *sse) { \ | |
| 401 int sum; \ | |
| 402 highbd_12_variance(a, a_stride, b, b_stride, W, H, sse, &sum); \ | |
| 403 return *sse - (((int64_t)sum * sum) / (W * H)); \ | |
| 404 } | |
| 405 | |
| 406 #define HIGHBD_SUBPIX_VAR(W, H) \ | 212 #define HIGHBD_SUBPIX_VAR(W, H) \ |
| 407 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ | 213 unsigned int vp9_highbd_sub_pixel_variance##W##x##H##_c( \ |
| 408 const uint8_t *src, int src_stride, \ | 214 const uint8_t *src, int src_stride, \ |
| 409 int xoffset, int yoffset, \ | 215 int xoffset, int yoffset, \ |
| 410 const uint8_t *dst, int dst_stride, \ | 216 const uint8_t *dst, int dst_stride, \ |
| 411 unsigned int *sse) { \ | 217 unsigned int *sse) { \ |
| 412 uint16_t fdata3[(H + 1) * W]; \ | 218 uint16_t fdata3[(H + 1) * W]; \ |
| 413 uint16_t temp2[H * W]; \ | 219 uint16_t temp2[H * W]; \ |
| 414 \ | 220 \ |
| 415 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ | 221 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ |
| 416 W, BILINEAR_FILTERS_2TAP(xoffset)); \ | 222 W, BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 417 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 223 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 418 BILINEAR_FILTERS_2TAP(yoffset)); \ | 224 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 419 \ | 225 \ |
| 420 return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ | 226 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, dst, \ |
| 421 dst_stride, sse); \ | 227 dst_stride, sse); \ |
| 422 } \ | 228 } \ |
| 423 \ | 229 \ |
| 424 unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \ | 230 unsigned int vp9_highbd_10_sub_pixel_variance##W##x##H##_c( \ |
| 425 const uint8_t *src, int src_stride, \ | 231 const uint8_t *src, int src_stride, \ |
| 426 int xoffset, int yoffset, \ | 232 int xoffset, int yoffset, \ |
| 427 const uint8_t *dst, int dst_stride, \ | 233 const uint8_t *dst, int dst_stride, \ |
| 428 unsigned int *sse) { \ | 234 unsigned int *sse) { \ |
| 429 uint16_t fdata3[(H + 1) * W]; \ | 235 uint16_t fdata3[(H + 1) * W]; \ |
| 430 uint16_t temp2[H * W]; \ | 236 uint16_t temp2[H * W]; \ |
| 431 \ | 237 \ |
| 432 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ | 238 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ |
| 433 W, BILINEAR_FILTERS_2TAP(xoffset)); \ | 239 W, BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 434 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 240 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 435 BILINEAR_FILTERS_2TAP(yoffset)); \ | 241 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 436 \ | 242 \ |
| 437 return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ | 243 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ |
| 438 W, dst, dst_stride, sse); \ | 244 W, dst, dst_stride, sse); \ |
| 439 } \ | 245 } \ |
| 440 \ | 246 \ |
| 441 unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \ | 247 unsigned int vp9_highbd_12_sub_pixel_variance##W##x##H##_c( \ |
| 442 const uint8_t *src, int src_stride, \ | 248 const uint8_t *src, int src_stride, \ |
| 443 int xoffset, int yoffset, \ | 249 int xoffset, int yoffset, \ |
| 444 const uint8_t *dst, int dst_stride, \ | 250 const uint8_t *dst, int dst_stride, \ |
| 445 unsigned int *sse) { \ | 251 unsigned int *sse) { \ |
| 446 uint16_t fdata3[(H + 1) * W]; \ | 252 uint16_t fdata3[(H + 1) * W]; \ |
| 447 uint16_t temp2[H * W]; \ | 253 uint16_t temp2[H * W]; \ |
| 448 \ | 254 \ |
| 449 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ | 255 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ |
| 450 W, BILINEAR_FILTERS_2TAP(xoffset)); \ | 256 W, BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 451 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 257 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 452 BILINEAR_FILTERS_2TAP(yoffset)); \ | 258 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 453 \ | 259 \ |
| 454 return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ | 260 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), \ |
| 455 W, dst, dst_stride, sse); \ | 261 W, dst, dst_stride, sse); \ |
| 456 } | 262 } |
| 457 | 263 |
| 458 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ | 264 #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ |
| 459 unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \ | 265 unsigned int vp9_highbd_sub_pixel_avg_variance##W##x##H##_c( \ |
| 460 const uint8_t *src, int src_stride, \ | 266 const uint8_t *src, int src_stride, \ |
| 461 int xoffset, int yoffset, \ | 267 int xoffset, int yoffset, \ |
| 462 const uint8_t *dst, int dst_stride, \ | 268 const uint8_t *dst, int dst_stride, \ |
| 463 unsigned int *sse, \ | 269 unsigned int *sse, \ |
| 464 const uint8_t *second_pred) { \ | 270 const uint8_t *second_pred) { \ |
| 465 uint16_t fdata3[(H + 1) * W]; \ | 271 uint16_t fdata3[(H + 1) * W]; \ |
| 466 uint16_t temp2[H * W]; \ | 272 uint16_t temp2[H * W]; \ |
| 467 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ | 273 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
| 468 \ | 274 \ |
| 469 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ | 275 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ |
| 470 W, BILINEAR_FILTERS_2TAP(xoffset)); \ | 276 W, BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 471 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 277 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 472 BILINEAR_FILTERS_2TAP(yoffset)); \ | 278 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 473 \ | 279 \ |
| 474 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ | 280 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ |
| 475 CONVERT_TO_BYTEPTR(temp2), W); \ | 281 CONVERT_TO_BYTEPTR(temp2), W); \ |
| 476 \ | 282 \ |
| 477 return vp9_highbd_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ | 283 return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, dst, \ |
| 478 dst_stride, sse); \ | 284 dst_stride, sse); \ |
| 479 } \ | 285 } \ |
| 480 \ | 286 \ |
| 481 unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ | 287 unsigned int vp9_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ |
| 482 const uint8_t *src, int src_stride, \ | 288 const uint8_t *src, int src_stride, \ |
| 483 int xoffset, int yoffset, \ | 289 int xoffset, int yoffset, \ |
| 484 const uint8_t *dst, int dst_stride, \ | 290 const uint8_t *dst, int dst_stride, \ |
| 485 unsigned int *sse, \ | 291 unsigned int *sse, \ |
| 486 const uint8_t *second_pred) { \ | 292 const uint8_t *second_pred) { \ |
| 487 uint16_t fdata3[(H + 1) * W]; \ | 293 uint16_t fdata3[(H + 1) * W]; \ |
| 488 uint16_t temp2[H * W]; \ | 294 uint16_t temp2[H * W]; \ |
| 489 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ | 295 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
| 490 \ | 296 \ |
| 491 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ | 297 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ |
| 492 W, BILINEAR_FILTERS_2TAP(xoffset)); \ | 298 W, BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 493 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 299 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 494 BILINEAR_FILTERS_2TAP(yoffset)); \ | 300 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 495 \ | 301 \ |
| 496 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ | 302 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ |
| 497 CONVERT_TO_BYTEPTR(temp2), W); \ | 303 CONVERT_TO_BYTEPTR(temp2), W); \ |
| 498 \ | 304 \ |
| 499 return vp9_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ | 305 return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ |
| 500 W, dst, dst_stride, sse); \ | 306 W, dst, dst_stride, sse); \ |
| 501 } \ | 307 } \ |
| 502 \ | 308 \ |
| 503 unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ | 309 unsigned int vp9_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ |
| 504 const uint8_t *src, int src_stride, \ | 310 const uint8_t *src, int src_stride, \ |
| 505 int xoffset, int yoffset, \ | 311 int xoffset, int yoffset, \ |
| 506 const uint8_t *dst, int dst_stride, \ | 312 const uint8_t *dst, int dst_stride, \ |
| 507 unsigned int *sse, \ | 313 unsigned int *sse, \ |
| 508 const uint8_t *second_pred) { \ | 314 const uint8_t *second_pred) { \ |
| 509 uint16_t fdata3[(H + 1) * W]; \ | 315 uint16_t fdata3[(H + 1) * W]; \ |
| 510 uint16_t temp2[H * W]; \ | 316 uint16_t temp2[H * W]; \ |
| 511 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ | 317 DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ |
| 512 \ | 318 \ |
| 513 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ | 319 highbd_var_filter_block2d_bil_first_pass(src, fdata3, src_stride, 1, H + 1, \ |
| 514 W, BILINEAR_FILTERS_2TAP(xoffset)); \ | 320 W, BILINEAR_FILTERS_2TAP(xoffset)); \ |
| 515 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ | 321 highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ |
| 516 BILINEAR_FILTERS_2TAP(yoffset)); \ | 322 BILINEAR_FILTERS_2TAP(yoffset)); \ |
| 517 \ | 323 \ |
| 518 vp9_highbd_comp_avg_pred(temp3, second_pred, W, H, \ | 324 vpx_highbd_comp_avg_pred(temp3, second_pred, W, H, \ |
| 519 CONVERT_TO_BYTEPTR(temp2), W); \ | 325 CONVERT_TO_BYTEPTR(temp2), W); \ |
| 520 \ | 326 \ |
| 521 return vp9_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ | 327 return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), \ |
| 522 W, dst, dst_stride, sse); \ | 328 W, dst, dst_stride, sse); \ |
| 523 } | 329 } |
| 524 | 330 |
| 525 #define HIGHBD_GET_VAR(S) \ | |
| 526 void vp9_highbd_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ | |
| 527 const uint8_t *ref, int ref_stride, \ | |
| 528 unsigned int *sse, int *sum) { \ | |
| 529 highbd_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ | |
| 530 } \ | |
| 531 \ | |
| 532 void vp9_highbd_10_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ | |
| 533 const uint8_t *ref, int ref_stride, \ | |
| 534 unsigned int *sse, int *sum) { \ | |
| 535 highbd_10_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ | |
| 536 } \ | |
| 537 \ | |
| 538 void vp9_highbd_12_get##S##x##S##var_c(const uint8_t *src, int src_stride, \ | |
| 539 const uint8_t *ref, int ref_stride, \ | |
| 540 unsigned int *sse, int *sum) { \ | |
| 541 highbd_12_variance(src, src_stride, ref, ref_stride, S, S, sse, sum); \ | |
| 542 } | |
| 543 | |
| 544 #define HIGHBD_MSE(W, H) \ | |
| 545 unsigned int vp9_highbd_mse##W##x##H##_c(const uint8_t *src, \ | |
| 546 int src_stride, \ | |
| 547 const uint8_t *ref, \ | |
| 548 int ref_stride, \ | |
| 549 unsigned int *sse) { \ | |
| 550 int sum; \ | |
| 551 highbd_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ | |
| 552 return *sse; \ | |
| 553 } \ | |
| 554 \ | |
| 555 unsigned int vp9_highbd_10_mse##W##x##H##_c(const uint8_t *src, \ | |
| 556 int src_stride, \ | |
| 557 const uint8_t *ref, \ | |
| 558 int ref_stride, \ | |
| 559 unsigned int *sse) { \ | |
| 560 int sum; \ | |
| 561 highbd_10_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ | |
| 562 return *sse; \ | |
| 563 } \ | |
| 564 \ | |
| 565 unsigned int vp9_highbd_12_mse##W##x##H##_c(const uint8_t *src, \ | |
| 566 int src_stride, \ | |
| 567 const uint8_t *ref, \ | |
| 568 int ref_stride, \ | |
| 569 unsigned int *sse) { \ | |
| 570 int sum; \ | |
| 571 highbd_12_variance(src, src_stride, ref, ref_stride, W, H, sse, &sum); \ | |
| 572 return *sse; \ | |
| 573 } | |
| 574 | |
| 575 HIGHBD_GET_VAR(8) | |
| 576 HIGHBD_GET_VAR(16) | |
| 577 | |
| 578 HIGHBD_MSE(16, 16) | |
| 579 HIGHBD_MSE(16, 8) | |
| 580 HIGHBD_MSE(8, 16) | |
| 581 HIGHBD_MSE(8, 8) | |
| 582 | |
| 583 HIGHBD_VAR(4, 4) | |
| 584 HIGHBD_SUBPIX_VAR(4, 4) | 331 HIGHBD_SUBPIX_VAR(4, 4) |
| 585 HIGHBD_SUBPIX_AVG_VAR(4, 4) | 332 HIGHBD_SUBPIX_AVG_VAR(4, 4) |
| 586 | 333 |
| 587 HIGHBD_VAR(4, 8) | |
| 588 HIGHBD_SUBPIX_VAR(4, 8) | 334 HIGHBD_SUBPIX_VAR(4, 8) |
| 589 HIGHBD_SUBPIX_AVG_VAR(4, 8) | 335 HIGHBD_SUBPIX_AVG_VAR(4, 8) |
| 590 | 336 |
| 591 HIGHBD_VAR(8, 4) | |
| 592 HIGHBD_SUBPIX_VAR(8, 4) | 337 HIGHBD_SUBPIX_VAR(8, 4) |
| 593 HIGHBD_SUBPIX_AVG_VAR(8, 4) | 338 HIGHBD_SUBPIX_AVG_VAR(8, 4) |
| 594 | 339 |
| 595 HIGHBD_VAR(8, 8) | |
| 596 HIGHBD_SUBPIX_VAR(8, 8) | 340 HIGHBD_SUBPIX_VAR(8, 8) |
| 597 HIGHBD_SUBPIX_AVG_VAR(8, 8) | 341 HIGHBD_SUBPIX_AVG_VAR(8, 8) |
| 598 | 342 |
| 599 HIGHBD_VAR(8, 16) | |
| 600 HIGHBD_SUBPIX_VAR(8, 16) | 343 HIGHBD_SUBPIX_VAR(8, 16) |
| 601 HIGHBD_SUBPIX_AVG_VAR(8, 16) | 344 HIGHBD_SUBPIX_AVG_VAR(8, 16) |
| 602 | 345 |
| 603 HIGHBD_VAR(16, 8) | |
| 604 HIGHBD_SUBPIX_VAR(16, 8) | 346 HIGHBD_SUBPIX_VAR(16, 8) |
| 605 HIGHBD_SUBPIX_AVG_VAR(16, 8) | 347 HIGHBD_SUBPIX_AVG_VAR(16, 8) |
| 606 | 348 |
| 607 HIGHBD_VAR(16, 16) | |
| 608 HIGHBD_SUBPIX_VAR(16, 16) | 349 HIGHBD_SUBPIX_VAR(16, 16) |
| 609 HIGHBD_SUBPIX_AVG_VAR(16, 16) | 350 HIGHBD_SUBPIX_AVG_VAR(16, 16) |
| 610 | 351 |
| 611 HIGHBD_VAR(16, 32) | |
| 612 HIGHBD_SUBPIX_VAR(16, 32) | 352 HIGHBD_SUBPIX_VAR(16, 32) |
| 613 HIGHBD_SUBPIX_AVG_VAR(16, 32) | 353 HIGHBD_SUBPIX_AVG_VAR(16, 32) |
| 614 | 354 |
| 615 HIGHBD_VAR(32, 16) | |
| 616 HIGHBD_SUBPIX_VAR(32, 16) | 355 HIGHBD_SUBPIX_VAR(32, 16) |
| 617 HIGHBD_SUBPIX_AVG_VAR(32, 16) | 356 HIGHBD_SUBPIX_AVG_VAR(32, 16) |
| 618 | 357 |
| 619 HIGHBD_VAR(32, 32) | |
| 620 HIGHBD_SUBPIX_VAR(32, 32) | 358 HIGHBD_SUBPIX_VAR(32, 32) |
| 621 HIGHBD_SUBPIX_AVG_VAR(32, 32) | 359 HIGHBD_SUBPIX_AVG_VAR(32, 32) |
| 622 | 360 |
| 623 HIGHBD_VAR(32, 64) | |
| 624 HIGHBD_SUBPIX_VAR(32, 64) | 361 HIGHBD_SUBPIX_VAR(32, 64) |
| 625 HIGHBD_SUBPIX_AVG_VAR(32, 64) | 362 HIGHBD_SUBPIX_AVG_VAR(32, 64) |
| 626 | 363 |
| 627 HIGHBD_VAR(64, 32) | |
| 628 HIGHBD_SUBPIX_VAR(64, 32) | 364 HIGHBD_SUBPIX_VAR(64, 32) |
| 629 HIGHBD_SUBPIX_AVG_VAR(64, 32) | 365 HIGHBD_SUBPIX_AVG_VAR(64, 32) |
| 630 | 366 |
| 631 HIGHBD_VAR(64, 64) | |
| 632 HIGHBD_SUBPIX_VAR(64, 64) | 367 HIGHBD_SUBPIX_VAR(64, 64) |
| 633 HIGHBD_SUBPIX_AVG_VAR(64, 64) | 368 HIGHBD_SUBPIX_AVG_VAR(64, 64) |
| 634 | |
| 635 void vp9_highbd_comp_avg_pred(uint16_t *comp_pred, const uint8_t *pred8, | |
| 636 int width, int height, const uint8_t *ref8, | |
| 637 int ref_stride) { | |
| 638 int i, j; | |
| 639 uint16_t *pred = CONVERT_TO_SHORTPTR(pred8); | |
| 640 uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); | |
| 641 for (i = 0; i < height; i++) { | |
| 642 for (j = 0; j < width; j++) { | |
| 643 const int tmp = pred[j] + ref[j]; | |
| 644 comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); | |
| 645 } | |
| 646 comp_pred += width; | |
| 647 pred += width; | |
| 648 ref += ref_stride; | |
| 649 } | |
| 650 } | |
| 651 #endif // CONFIG_VP9_HIGHBITDEPTH | 369 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |