OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "./vp9_rtcd.h" |
| 12 |
| 13 #include "vpx_ports/mem.h" |
| 14 #include "vpx/vpx_integer.h" |
| 15 |
| 16 #include "vp9/common/vp9_common.h" |
| 17 #include "vp9/common/vp9_filter.h" |
| 18 |
| 19 #include "vp9/encoder/vp9_variance.h" |
| 20 |
| 21 void variance(const uint8_t *src_ptr, |
| 22 int source_stride, |
| 23 const uint8_t *ref_ptr, |
| 24 int recon_stride, |
| 25 int w, |
| 26 int h, |
| 27 unsigned int *sse, |
| 28 int *sum) { |
| 29 int i, j; |
| 30 int diff; |
| 31 |
| 32 *sum = 0; |
| 33 *sse = 0; |
| 34 |
| 35 for (i = 0; i < h; i++) { |
| 36 for (j = 0; j < w; j++) { |
| 37 diff = src_ptr[j] - ref_ptr[j]; |
| 38 *sum += diff; |
| 39 *sse += diff * diff; |
| 40 } |
| 41 |
| 42 src_ptr += source_stride; |
| 43 ref_ptr += recon_stride; |
| 44 } |
| 45 } |
| 46 |
| 47 /**************************************************************************** |
| 48 * |
| 49 * ROUTINE : filter_block2d_bil_first_pass |
| 50 * |
| 51 * INPUTS : uint8_t *src_ptr : Pointer to source block. |
| 52 * uint32_t src_pixels_per_line : Stride of input block. |
| 53 * uint32_t pixel_step : Offset between filter input |
| 54 * samples (see notes). |
| 55 * uint32_t output_height : Input block height. |
| 56 * uint32_t output_width : Input block width. |
| 57 * int32_t *vp9_filter : Array of 2 bi-linear filter |
| 58 * taps. |
| 59 * |
| 60 * OUTPUTS : int32_t *output_ptr : Pointer to filtered block. |
| 61 * |
| 62 * RETURNS : void |
| 63 * |
| 64 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in |
| 65 * either horizontal or vertical direction to produce the |
| 66 * filtered output block. Used to implement first-pass |
| 67 * of 2-D separable filter. |
| 68 * |
| 69 * SPECIAL NOTES : Produces int32_t output to retain precision for next pass. |
| 70 * Two filter taps should sum to VP9_FILTER_WEIGHT. |
| 71 * pixel_step defines whether the filter is applied |
| 72 * horizontally (pixel_step=1) or vertically (pixel_step= |
| 73 * stride). |
| 74 * It defines the offset required to move from one input |
| 75 * to the next. |
| 76 * |
| 77 ****************************************************************************/ |
| 78 static void var_filter_block2d_bil_first_pass(const uint8_t *src_ptr, |
| 79 uint16_t *output_ptr, |
| 80 unsigned int src_pixels_per_line, |
| 81 int pixel_step, |
| 82 unsigned int output_height, |
| 83 unsigned int output_width, |
| 84 const int16_t *vp9_filter) { |
| 85 unsigned int i, j; |
| 86 |
| 87 for (i = 0; i < output_height; i++) { |
| 88 for (j = 0; j < output_width; j++) { |
| 89 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + |
| 90 (int)src_ptr[pixel_step] * vp9_filter[1], |
| 91 FILTER_BITS); |
| 92 |
| 93 src_ptr++; |
| 94 } |
| 95 |
| 96 // Next row... |
| 97 src_ptr += src_pixels_per_line - output_width; |
| 98 output_ptr += output_width; |
| 99 } |
| 100 } |
| 101 |
| 102 /**************************************************************************** |
| 103 * |
| 104 * ROUTINE : filter_block2d_bil_second_pass |
| 105 * |
| 106 * INPUTS : int32_t *src_ptr : Pointer to source block. |
| 107 * uint32_t src_pixels_per_line : Stride of input block. |
| 108 * uint32_t pixel_step : Offset between filter input |
| 109 * samples (see notes). |
| 110 * uint32_t output_height : Input block height. |
| 111 * uint32_t output_width : Input block width. |
| 112 * int32_t *vp9_filter : Array of 2 bi-linear filter |
| 113 * taps. |
| 114 * |
| 115 * OUTPUTS : uint16_t *output_ptr : Pointer to filtered block. |
| 116 * |
| 117 * RETURNS : void |
| 118 * |
| 119 * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block in |
| 120 * either horizontal or vertical direction to produce the |
| 121 * filtered output block. Used to implement second-pass |
| 122 * of 2-D separable filter. |
| 123 * |
| 124 * SPECIAL NOTES : Requires 32-bit input as produced by |
| 125 * filter_block2d_bil_first_pass. |
| 126 * Two filter taps should sum to VP9_FILTER_WEIGHT. |
| 127 * pixel_step defines whether the filter is applied |
| 128 * horizontally (pixel_step=1) or vertically (pixel_step= |
| 129 * stride). |
| 130 * It defines the offset required to move from one input |
| 131 * to the next. |
| 132 * |
| 133 ****************************************************************************/ |
| 134 static void var_filter_block2d_bil_second_pass(const uint16_t *src_ptr, |
| 135 uint8_t *output_ptr, |
| 136 unsigned int src_pixels_per_line, |
| 137 unsigned int pixel_step, |
| 138 unsigned int output_height, |
| 139 unsigned int output_width, |
| 140 const int16_t *vp9_filter) { |
| 141 unsigned int i, j; |
| 142 |
| 143 for (i = 0; i < output_height; i++) { |
| 144 for (j = 0; j < output_width; j++) { |
| 145 output_ptr[j] = ROUND_POWER_OF_TWO((int)src_ptr[0] * vp9_filter[0] + |
| 146 (int)src_ptr[pixel_step] * vp9_filter[1], |
| 147 FILTER_BITS); |
| 148 src_ptr++; |
| 149 } |
| 150 |
| 151 src_ptr += src_pixels_per_line - output_width; |
| 152 output_ptr += output_width; |
| 153 } |
| 154 } |
| 155 |
| 156 unsigned int vp9_get_mb_ss_c(const int16_t *src_ptr) { |
| 157 unsigned int i, sum = 0; |
| 158 |
| 159 for (i = 0; i < 256; i++) { |
| 160 sum += (src_ptr[i] * src_ptr[i]); |
| 161 } |
| 162 |
| 163 return sum; |
| 164 } |
| 165 |
| 166 unsigned int vp9_variance64x32_c(const uint8_t *src_ptr, |
| 167 int source_stride, |
| 168 const uint8_t *ref_ptr, |
| 169 int recon_stride, |
| 170 unsigned int *sse) { |
| 171 unsigned int var; |
| 172 int avg; |
| 173 |
| 174 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 32, &var, &avg); |
| 175 *sse = var; |
| 176 return (var - (((int64_t)avg * avg) >> 11)); |
| 177 } |
| 178 |
| 179 unsigned int vp9_sub_pixel_variance64x32_c(const uint8_t *src_ptr, |
| 180 int src_pixels_per_line, |
| 181 int xoffset, |
| 182 int yoffset, |
| 183 const uint8_t *dst_ptr, |
| 184 int dst_pixels_per_line, |
| 185 unsigned int *sse) { |
| 186 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering |
| 187 uint8_t temp2[68 * 64]; |
| 188 const int16_t *hfilter, *vfilter; |
| 189 |
| 190 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 191 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 192 |
| 193 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 194 1, 33, 64, hfilter); |
| 195 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); |
| 196 |
| 197 return vp9_variance64x32(temp2, 64, dst_ptr, dst_pixels_per_line, sse); |
| 198 } |
| 199 |
| 200 unsigned int vp9_sub_pixel_avg_variance64x32_c(const uint8_t *src_ptr, |
| 201 int src_pixels_per_line, |
| 202 int xoffset, |
| 203 int yoffset, |
| 204 const uint8_t *dst_ptr, |
| 205 int dst_pixels_per_line, |
| 206 unsigned int *sse, |
| 207 const uint8_t *second_pred) { |
| 208 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering |
| 209 uint8_t temp2[68 * 64]; |
| 210 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer |
| 211 const int16_t *hfilter, *vfilter; |
| 212 |
| 213 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 214 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 215 |
| 216 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 217 1, 33, 64, hfilter); |
| 218 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 32, 64, vfilter); |
| 219 comp_avg_pred(temp3, second_pred, 64, 32, temp2, 64); |
| 220 return vp9_variance64x32(temp3, 64, dst_ptr, dst_pixels_per_line, sse); |
| 221 } |
| 222 |
| 223 unsigned int vp9_variance32x64_c(const uint8_t *src_ptr, |
| 224 int source_stride, |
| 225 const uint8_t *ref_ptr, |
| 226 int recon_stride, |
| 227 unsigned int *sse) { |
| 228 unsigned int var; |
| 229 int avg; |
| 230 |
| 231 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 64, &var, &avg); |
| 232 *sse = var; |
| 233 return (var - (((int64_t)avg * avg) >> 11)); |
| 234 } |
| 235 |
| 236 unsigned int vp9_sub_pixel_variance32x64_c(const uint8_t *src_ptr, |
| 237 int src_pixels_per_line, |
| 238 int xoffset, |
| 239 int yoffset, |
| 240 const uint8_t *dst_ptr, |
| 241 int dst_pixels_per_line, |
| 242 unsigned int *sse) { |
| 243 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering |
| 244 uint8_t temp2[68 * 64]; |
| 245 const int16_t *hfilter, *vfilter; |
| 246 |
| 247 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 248 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 249 |
| 250 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 251 1, 65, 32, hfilter); |
| 252 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); |
| 253 |
| 254 return vp9_variance32x64(temp2, 32, dst_ptr, dst_pixels_per_line, sse); |
| 255 } |
| 256 |
| 257 unsigned int vp9_sub_pixel_avg_variance32x64_c(const uint8_t *src_ptr, |
| 258 int src_pixels_per_line, |
| 259 int xoffset, |
| 260 int yoffset, |
| 261 const uint8_t *dst_ptr, |
| 262 int dst_pixels_per_line, |
| 263 unsigned int *sse, |
| 264 const uint8_t *second_pred) { |
| 265 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering |
| 266 uint8_t temp2[68 * 64]; |
| 267 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 64); // compound pred buffer |
| 268 const int16_t *hfilter, *vfilter; |
| 269 |
| 270 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 271 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 272 |
| 273 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 274 1, 65, 32, hfilter); |
| 275 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 64, 32, vfilter); |
| 276 comp_avg_pred(temp3, second_pred, 32, 64, temp2, 32); |
| 277 return vp9_variance32x64(temp3, 32, dst_ptr, dst_pixels_per_line, sse); |
| 278 } |
| 279 |
| 280 unsigned int vp9_variance32x16_c(const uint8_t *src_ptr, |
| 281 int source_stride, |
| 282 const uint8_t *ref_ptr, |
| 283 int recon_stride, |
| 284 unsigned int *sse) { |
| 285 unsigned int var; |
| 286 int avg; |
| 287 |
| 288 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 16, &var, &avg); |
| 289 *sse = var; |
| 290 return (var - (((int64_t)avg * avg) >> 9)); |
| 291 } |
| 292 |
| 293 unsigned int vp9_sub_pixel_variance32x16_c(const uint8_t *src_ptr, |
| 294 int src_pixels_per_line, |
| 295 int xoffset, |
| 296 int yoffset, |
| 297 const uint8_t *dst_ptr, |
| 298 int dst_pixels_per_line, |
| 299 unsigned int *sse) { |
| 300 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering |
| 301 uint8_t temp2[36 * 32]; |
| 302 const int16_t *hfilter, *vfilter; |
| 303 |
| 304 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 305 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 306 |
| 307 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 308 1, 17, 32, hfilter); |
| 309 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); |
| 310 |
| 311 return vp9_variance32x16(temp2, 32, dst_ptr, dst_pixels_per_line, sse); |
| 312 } |
| 313 |
| 314 unsigned int vp9_sub_pixel_avg_variance32x16_c(const uint8_t *src_ptr, |
| 315 int src_pixels_per_line, |
| 316 int xoffset, |
| 317 int yoffset, |
| 318 const uint8_t *dst_ptr, |
| 319 int dst_pixels_per_line, |
| 320 unsigned int *sse, |
| 321 const uint8_t *second_pred) { |
| 322 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering |
| 323 uint8_t temp2[36 * 32]; |
| 324 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 16); // compound pred buffer |
| 325 const int16_t *hfilter, *vfilter; |
| 326 |
| 327 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 328 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 329 |
| 330 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 331 1, 17, 32, hfilter); |
| 332 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 16, 32, vfilter); |
| 333 comp_avg_pred(temp3, second_pred, 32, 16, temp2, 32); |
| 334 return vp9_variance32x16(temp3, 32, dst_ptr, dst_pixels_per_line, sse); |
| 335 } |
| 336 |
| 337 unsigned int vp9_variance16x32_c(const uint8_t *src_ptr, |
| 338 int source_stride, |
| 339 const uint8_t *ref_ptr, |
| 340 int recon_stride, |
| 341 unsigned int *sse) { |
| 342 unsigned int var; |
| 343 int avg; |
| 344 |
| 345 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 32, &var, &avg); |
| 346 *sse = var; |
| 347 return (var - (((int64_t)avg * avg) >> 9)); |
| 348 } |
| 349 |
| 350 unsigned int vp9_sub_pixel_variance16x32_c(const uint8_t *src_ptr, |
| 351 int src_pixels_per_line, |
| 352 int xoffset, |
| 353 int yoffset, |
| 354 const uint8_t *dst_ptr, |
| 355 int dst_pixels_per_line, |
| 356 unsigned int *sse) { |
| 357 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering |
| 358 uint8_t temp2[36 * 32]; |
| 359 const int16_t *hfilter, *vfilter; |
| 360 |
| 361 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 362 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 363 |
| 364 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 365 1, 33, 16, hfilter); |
| 366 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); |
| 367 |
| 368 return vp9_variance16x32(temp2, 16, dst_ptr, dst_pixels_per_line, sse); |
| 369 } |
| 370 |
| 371 unsigned int vp9_sub_pixel_avg_variance16x32_c(const uint8_t *src_ptr, |
| 372 int src_pixels_per_line, |
| 373 int xoffset, |
| 374 int yoffset, |
| 375 const uint8_t *dst_ptr, |
| 376 int dst_pixels_per_line, |
| 377 unsigned int *sse, |
| 378 const uint8_t *second_pred) { |
| 379 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering |
| 380 uint8_t temp2[36 * 32]; |
| 381 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 32); // compound pred buffer |
| 382 const int16_t *hfilter, *vfilter; |
| 383 |
| 384 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 385 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 386 |
| 387 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 388 1, 33, 16, hfilter); |
| 389 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 32, 16, vfilter); |
| 390 comp_avg_pred(temp3, second_pred, 16, 32, temp2, 16); |
| 391 return vp9_variance16x32(temp3, 16, dst_ptr, dst_pixels_per_line, sse); |
| 392 } |
| 393 |
| 394 unsigned int vp9_variance64x64_c(const uint8_t *src_ptr, |
| 395 int source_stride, |
| 396 const uint8_t *ref_ptr, |
| 397 int recon_stride, |
| 398 unsigned int *sse) { |
| 399 unsigned int var; |
| 400 int avg; |
| 401 |
| 402 variance(src_ptr, source_stride, ref_ptr, recon_stride, 64, 64, &var, &avg); |
| 403 *sse = var; |
| 404 return (var - (((int64_t)avg * avg) >> 12)); |
| 405 } |
| 406 |
| 407 unsigned int vp9_variance32x32_c(const uint8_t *src_ptr, |
| 408 int source_stride, |
| 409 const uint8_t *ref_ptr, |
| 410 int recon_stride, |
| 411 unsigned int *sse) { |
| 412 unsigned int var; |
| 413 int avg; |
| 414 |
| 415 variance(src_ptr, source_stride, ref_ptr, recon_stride, 32, 32, &var, &avg); |
| 416 *sse = var; |
| 417 return (var - (((int64_t)avg * avg) >> 10)); |
| 418 } |
| 419 |
| 420 unsigned int vp9_variance16x16_c(const uint8_t *src_ptr, |
| 421 int source_stride, |
| 422 const uint8_t *ref_ptr, |
| 423 int recon_stride, |
| 424 unsigned int *sse) { |
| 425 unsigned int var; |
| 426 int avg; |
| 427 |
| 428 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); |
| 429 *sse = var; |
| 430 return (var - (((unsigned int)avg * avg) >> 8)); |
| 431 } |
| 432 |
| 433 unsigned int vp9_variance8x16_c(const uint8_t *src_ptr, |
| 434 int source_stride, |
| 435 const uint8_t *ref_ptr, |
| 436 int recon_stride, |
| 437 unsigned int *sse) { |
| 438 unsigned int var; |
| 439 int avg; |
| 440 |
| 441 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); |
| 442 *sse = var; |
| 443 return (var - (((unsigned int)avg * avg) >> 7)); |
| 444 } |
| 445 |
| 446 unsigned int vp9_variance16x8_c(const uint8_t *src_ptr, |
| 447 int source_stride, |
| 448 const uint8_t *ref_ptr, |
| 449 int recon_stride, |
| 450 unsigned int *sse) { |
| 451 unsigned int var; |
| 452 int avg; |
| 453 |
| 454 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); |
| 455 *sse = var; |
| 456 return (var - (((unsigned int)avg * avg) >> 7)); |
| 457 } |
| 458 |
| 459 void vp9_get_sse_sum_8x8_c(const uint8_t *src_ptr, int source_stride, |
| 460 const uint8_t *ref_ptr, int ref_stride, |
| 461 unsigned int *sse, int *sum) { |
| 462 variance(src_ptr, source_stride, ref_ptr, ref_stride, 8, 8, sse, sum); |
| 463 } |
| 464 |
| 465 unsigned int vp9_variance8x8_c(const uint8_t *src_ptr, |
| 466 int source_stride, |
| 467 const uint8_t *ref_ptr, |
| 468 int recon_stride, |
| 469 unsigned int *sse) { |
| 470 unsigned int var; |
| 471 int avg; |
| 472 |
| 473 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); |
| 474 *sse = var; |
| 475 return (var - (((unsigned int)avg * avg) >> 6)); |
| 476 } |
| 477 |
| 478 unsigned int vp9_variance8x4_c(const uint8_t *src_ptr, |
| 479 int source_stride, |
| 480 const uint8_t *ref_ptr, |
| 481 int recon_stride, |
| 482 unsigned int *sse) { |
| 483 unsigned int var; |
| 484 int avg; |
| 485 |
| 486 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 4, &var, &avg); |
| 487 *sse = var; |
| 488 return (var - (((unsigned int)avg * avg) >> 5)); |
| 489 } |
| 490 |
| 491 unsigned int vp9_variance4x8_c(const uint8_t *src_ptr, |
| 492 int source_stride, |
| 493 const uint8_t *ref_ptr, |
| 494 int recon_stride, |
| 495 unsigned int *sse) { |
| 496 unsigned int var; |
| 497 int avg; |
| 498 |
| 499 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 8, &var, &avg); |
| 500 *sse = var; |
| 501 return (var - (((unsigned int)avg * avg) >> 5)); |
| 502 } |
| 503 |
| 504 unsigned int vp9_variance4x4_c(const uint8_t *src_ptr, |
| 505 int source_stride, |
| 506 const uint8_t *ref_ptr, |
| 507 int recon_stride, |
| 508 unsigned int *sse) { |
| 509 unsigned int var; |
| 510 int avg; |
| 511 |
| 512 variance(src_ptr, source_stride, ref_ptr, recon_stride, 4, 4, &var, &avg); |
| 513 *sse = var; |
| 514 return (var - (((unsigned int)avg * avg) >> 4)); |
| 515 } |
| 516 |
| 517 |
| 518 unsigned int vp9_mse16x16_c(const uint8_t *src_ptr, |
| 519 int source_stride, |
| 520 const uint8_t *ref_ptr, |
| 521 int recon_stride, |
| 522 unsigned int *sse) { |
| 523 unsigned int var; |
| 524 int avg; |
| 525 |
| 526 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 16, &var, &avg); |
| 527 *sse = var; |
| 528 return var; |
| 529 } |
| 530 |
| 531 unsigned int vp9_mse16x8_c(const uint8_t *src_ptr, |
| 532 int source_stride, |
| 533 const uint8_t *ref_ptr, |
| 534 int recon_stride, |
| 535 unsigned int *sse) { |
| 536 unsigned int var; |
| 537 int avg; |
| 538 |
| 539 variance(src_ptr, source_stride, ref_ptr, recon_stride, 16, 8, &var, &avg); |
| 540 *sse = var; |
| 541 return var; |
| 542 } |
| 543 |
| 544 unsigned int vp9_mse8x16_c(const uint8_t *src_ptr, |
| 545 int source_stride, |
| 546 const uint8_t *ref_ptr, |
| 547 int recon_stride, |
| 548 unsigned int *sse) { |
| 549 unsigned int var; |
| 550 int avg; |
| 551 |
| 552 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 16, &var, &avg); |
| 553 *sse = var; |
| 554 return var; |
| 555 } |
| 556 |
| 557 unsigned int vp9_mse8x8_c(const uint8_t *src_ptr, |
| 558 int source_stride, |
| 559 const uint8_t *ref_ptr, |
| 560 int recon_stride, |
| 561 unsigned int *sse) { |
| 562 unsigned int var; |
| 563 int avg; |
| 564 |
| 565 variance(src_ptr, source_stride, ref_ptr, recon_stride, 8, 8, &var, &avg); |
| 566 *sse = var; |
| 567 return var; |
| 568 } |
| 569 |
| 570 |
| 571 unsigned int vp9_sub_pixel_variance4x4_c(const uint8_t *src_ptr, |
| 572 int src_pixels_per_line, |
| 573 int xoffset, |
| 574 int yoffset, |
| 575 const uint8_t *dst_ptr, |
| 576 int dst_pixels_per_line, |
| 577 unsigned int *sse) { |
| 578 uint8_t temp2[20 * 16]; |
| 579 const int16_t *hfilter, *vfilter; |
| 580 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering |
| 581 |
| 582 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 583 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 584 |
| 585 // First filter 1d Horizontal |
| 586 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 587 1, 5, 4, hfilter); |
| 588 |
| 589 // Now filter Verticaly |
| 590 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); |
| 591 |
| 592 return vp9_variance4x4(temp2, 4, dst_ptr, dst_pixels_per_line, sse); |
| 593 } |
| 594 |
| 595 unsigned int vp9_sub_pixel_avg_variance4x4_c(const uint8_t *src_ptr, |
| 596 int src_pixels_per_line, |
| 597 int xoffset, |
| 598 int yoffset, |
| 599 const uint8_t *dst_ptr, |
| 600 int dst_pixels_per_line, |
| 601 unsigned int *sse, |
| 602 const uint8_t *second_pred) { |
| 603 uint8_t temp2[20 * 16]; |
| 604 const int16_t *hfilter, *vfilter; |
| 605 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 4); // compound pred buffer |
| 606 uint16_t fdata3[5 * 4]; // Temp data buffer used in filtering |
| 607 |
| 608 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 609 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 610 |
| 611 // First filter 1d Horizontal |
| 612 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 613 1, 5, 4, hfilter); |
| 614 |
| 615 // Now filter Verticaly |
| 616 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 4, 4, vfilter); |
| 617 comp_avg_pred(temp3, second_pred, 4, 4, temp2, 4); |
| 618 return vp9_variance4x4(temp3, 4, dst_ptr, dst_pixels_per_line, sse); |
| 619 } |
| 620 |
| 621 unsigned int vp9_sub_pixel_variance8x8_c(const uint8_t *src_ptr, |
| 622 int src_pixels_per_line, |
| 623 int xoffset, |
| 624 int yoffset, |
| 625 const uint8_t *dst_ptr, |
| 626 int dst_pixels_per_line, |
| 627 unsigned int *sse) { |
| 628 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering |
| 629 uint8_t temp2[20 * 16]; |
| 630 const int16_t *hfilter, *vfilter; |
| 631 |
| 632 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 633 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 634 |
| 635 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 636 1, 9, 8, hfilter); |
| 637 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); |
| 638 |
| 639 return vp9_variance8x8(temp2, 8, dst_ptr, dst_pixels_per_line, sse); |
| 640 } |
| 641 |
| 642 unsigned int vp9_sub_pixel_avg_variance8x8_c(const uint8_t *src_ptr, |
| 643 int src_pixels_per_line, |
| 644 int xoffset, |
| 645 int yoffset, |
| 646 const uint8_t *dst_ptr, |
| 647 int dst_pixels_per_line, |
| 648 unsigned int *sse, |
| 649 const uint8_t *second_pred) { |
| 650 uint16_t fdata3[9 * 8]; // Temp data buffer used in filtering |
| 651 uint8_t temp2[20 * 16]; |
| 652 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 8); // compound pred buffer |
| 653 const int16_t *hfilter, *vfilter; |
| 654 |
| 655 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 656 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 657 |
| 658 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 659 1, 9, 8, hfilter); |
| 660 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 8, 8, vfilter); |
| 661 comp_avg_pred(temp3, second_pred, 8, 8, temp2, 8); |
| 662 return vp9_variance8x8(temp3, 8, dst_ptr, dst_pixels_per_line, sse); |
| 663 } |
| 664 |
| 665 unsigned int vp9_sub_pixel_variance16x16_c(const uint8_t *src_ptr, |
| 666 int src_pixels_per_line, |
| 667 int xoffset, |
| 668 int yoffset, |
| 669 const uint8_t *dst_ptr, |
| 670 int dst_pixels_per_line, |
| 671 unsigned int *sse) { |
| 672 uint16_t fdata3[17 * 16]; // Temp data buffer used in filtering |
| 673 uint8_t temp2[20 * 16]; |
| 674 const int16_t *hfilter, *vfilter; |
| 675 |
| 676 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 677 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 678 |
| 679 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 680 1, 17, 16, hfilter); |
| 681 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); |
| 682 |
| 683 return vp9_variance16x16(temp2, 16, dst_ptr, dst_pixels_per_line, sse); |
| 684 } |
| 685 |
| 686 unsigned int vp9_sub_pixel_avg_variance16x16_c(const uint8_t *src_ptr, |
| 687 int src_pixels_per_line, |
| 688 int xoffset, |
| 689 int yoffset, |
| 690 const uint8_t *dst_ptr, |
| 691 int dst_pixels_per_line, |
| 692 unsigned int *sse, |
| 693 const uint8_t *second_pred) { |
| 694 uint16_t fdata3[17 * 16]; |
| 695 uint8_t temp2[20 * 16]; |
| 696 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 16); // compound pred buffer |
| 697 const int16_t *hfilter, *vfilter; |
| 698 |
| 699 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 700 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 701 |
| 702 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 703 1, 17, 16, hfilter); |
| 704 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 16, 16, vfilter); |
| 705 |
| 706 comp_avg_pred(temp3, second_pred, 16, 16, temp2, 16); |
| 707 return vp9_variance16x16(temp3, 16, dst_ptr, dst_pixels_per_line, sse); |
| 708 } |
| 709 |
| 710 unsigned int vp9_sub_pixel_variance64x64_c(const uint8_t *src_ptr, |
| 711 int src_pixels_per_line, |
| 712 int xoffset, |
| 713 int yoffset, |
| 714 const uint8_t *dst_ptr, |
| 715 int dst_pixels_per_line, |
| 716 unsigned int *sse) { |
| 717 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering |
| 718 uint8_t temp2[68 * 64]; |
| 719 const int16_t *hfilter, *vfilter; |
| 720 |
| 721 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 722 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 723 |
| 724 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 725 1, 65, 64, hfilter); |
| 726 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); |
| 727 |
| 728 return vp9_variance64x64(temp2, 64, dst_ptr, dst_pixels_per_line, sse); |
| 729 } |
| 730 |
| 731 unsigned int vp9_sub_pixel_avg_variance64x64_c(const uint8_t *src_ptr, |
| 732 int src_pixels_per_line, |
| 733 int xoffset, |
| 734 int yoffset, |
| 735 const uint8_t *dst_ptr, |
| 736 int dst_pixels_per_line, |
| 737 unsigned int *sse, |
| 738 const uint8_t *second_pred) { |
| 739 uint16_t fdata3[65 * 64]; // Temp data buffer used in filtering |
| 740 uint8_t temp2[68 * 64]; |
| 741 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 64 * 64); // compound pred buffer |
| 742 const int16_t *hfilter, *vfilter; |
| 743 |
| 744 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 745 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 746 |
| 747 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 748 1, 65, 64, hfilter); |
| 749 var_filter_block2d_bil_second_pass(fdata3, temp2, 64, 64, 64, 64, vfilter); |
| 750 comp_avg_pred(temp3, second_pred, 64, 64, temp2, 64); |
| 751 return vp9_variance64x64(temp3, 64, dst_ptr, dst_pixels_per_line, sse); |
| 752 } |
| 753 |
| 754 unsigned int vp9_sub_pixel_variance32x32_c(const uint8_t *src_ptr, |
| 755 int src_pixels_per_line, |
| 756 int xoffset, |
| 757 int yoffset, |
| 758 const uint8_t *dst_ptr, |
| 759 int dst_pixels_per_line, |
| 760 unsigned int *sse) { |
| 761 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering |
| 762 uint8_t temp2[36 * 32]; |
| 763 const int16_t *hfilter, *vfilter; |
| 764 |
| 765 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 766 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 767 |
| 768 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 769 1, 33, 32, hfilter); |
| 770 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); |
| 771 |
| 772 return vp9_variance32x32(temp2, 32, dst_ptr, dst_pixels_per_line, sse); |
| 773 } |
| 774 |
| 775 unsigned int vp9_sub_pixel_avg_variance32x32_c(const uint8_t *src_ptr, |
| 776 int src_pixels_per_line, |
| 777 int xoffset, |
| 778 int yoffset, |
| 779 const uint8_t *dst_ptr, |
| 780 int dst_pixels_per_line, |
| 781 unsigned int *sse, |
| 782 const uint8_t *second_pred) { |
| 783 uint16_t fdata3[33 * 32]; // Temp data buffer used in filtering |
| 784 uint8_t temp2[36 * 32]; |
| 785 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 32 * 32); // compound pred buffer |
| 786 const int16_t *hfilter, *vfilter; |
| 787 |
| 788 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 789 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 790 |
| 791 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 792 1, 33, 32, hfilter); |
| 793 var_filter_block2d_bil_second_pass(fdata3, temp2, 32, 32, 32, 32, vfilter); |
| 794 comp_avg_pred(temp3, second_pred, 32, 32, temp2, 32); |
| 795 return vp9_variance32x32(temp3, 32, dst_ptr, dst_pixels_per_line, sse); |
| 796 } |
| 797 |
| 798 unsigned int vp9_variance_halfpixvar16x16_h_c(const uint8_t *src_ptr, |
| 799 int source_stride, |
| 800 const uint8_t *ref_ptr, |
| 801 int recon_stride, |
| 802 unsigned int *sse) { |
| 803 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 0, |
| 804 ref_ptr, recon_stride, sse); |
| 805 } |
| 806 |
| 807 unsigned int vp9_variance_halfpixvar32x32_h_c(const uint8_t *src_ptr, |
| 808 int source_stride, |
| 809 const uint8_t *ref_ptr, |
| 810 int recon_stride, |
| 811 unsigned int *sse) { |
| 812 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 0, |
| 813 ref_ptr, recon_stride, sse); |
| 814 } |
| 815 |
| 816 unsigned int vp9_variance_halfpixvar64x64_h_c(const uint8_t *src_ptr, |
| 817 int source_stride, |
| 818 const uint8_t *ref_ptr, |
| 819 int recon_stride, |
| 820 unsigned int *sse) { |
| 821 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 0, |
| 822 ref_ptr, recon_stride, sse); |
| 823 } |
| 824 |
| 825 unsigned int vp9_variance_halfpixvar16x16_v_c(const uint8_t *src_ptr, |
| 826 int source_stride, |
| 827 const uint8_t *ref_ptr, |
| 828 int recon_stride, |
| 829 unsigned int *sse) { |
| 830 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 8, |
| 831 ref_ptr, recon_stride, sse); |
| 832 } |
| 833 |
| 834 unsigned int vp9_variance_halfpixvar32x32_v_c(const uint8_t *src_ptr, |
| 835 int source_stride, |
| 836 const uint8_t *ref_ptr, |
| 837 int recon_stride, |
| 838 unsigned int *sse) { |
| 839 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 0, 8, |
| 840 ref_ptr, recon_stride, sse); |
| 841 } |
| 842 |
| 843 unsigned int vp9_variance_halfpixvar64x64_v_c(const uint8_t *src_ptr, |
| 844 int source_stride, |
| 845 const uint8_t *ref_ptr, |
| 846 int recon_stride, |
| 847 unsigned int *sse) { |
| 848 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 0, 8, |
| 849 ref_ptr, recon_stride, sse); |
| 850 } |
| 851 |
| 852 unsigned int vp9_variance_halfpixvar16x16_hv_c(const uint8_t *src_ptr, |
| 853 int source_stride, |
| 854 const uint8_t *ref_ptr, |
| 855 int recon_stride, |
| 856 unsigned int *sse) { |
| 857 return vp9_sub_pixel_variance16x16_c(src_ptr, source_stride, 8, 8, |
| 858 ref_ptr, recon_stride, sse); |
| 859 } |
| 860 |
| 861 unsigned int vp9_variance_halfpixvar32x32_hv_c(const uint8_t *src_ptr, |
| 862 int source_stride, |
| 863 const uint8_t *ref_ptr, |
| 864 int recon_stride, |
| 865 unsigned int *sse) { |
| 866 return vp9_sub_pixel_variance32x32_c(src_ptr, source_stride, 8, 8, |
| 867 ref_ptr, recon_stride, sse); |
| 868 } |
| 869 |
| 870 unsigned int vp9_variance_halfpixvar64x64_hv_c(const uint8_t *src_ptr, |
| 871 int source_stride, |
| 872 const uint8_t *ref_ptr, |
| 873 int recon_stride, |
| 874 unsigned int *sse) { |
| 875 return vp9_sub_pixel_variance64x64_c(src_ptr, source_stride, 8, 8, |
| 876 ref_ptr, recon_stride, sse); |
| 877 } |
| 878 |
| 879 unsigned int vp9_sub_pixel_mse16x16_c(const uint8_t *src_ptr, |
| 880 int src_pixels_per_line, |
| 881 int xoffset, |
| 882 int yoffset, |
| 883 const uint8_t *dst_ptr, |
| 884 int dst_pixels_per_line, |
| 885 unsigned int *sse) { |
| 886 vp9_sub_pixel_variance16x16_c(src_ptr, src_pixels_per_line, |
| 887 xoffset, yoffset, dst_ptr, |
| 888 dst_pixels_per_line, sse); |
| 889 return *sse; |
| 890 } |
| 891 |
| 892 unsigned int vp9_sub_pixel_mse32x32_c(const uint8_t *src_ptr, |
| 893 int src_pixels_per_line, |
| 894 int xoffset, |
| 895 int yoffset, |
| 896 const uint8_t *dst_ptr, |
| 897 int dst_pixels_per_line, |
| 898 unsigned int *sse) { |
| 899 vp9_sub_pixel_variance32x32_c(src_ptr, src_pixels_per_line, |
| 900 xoffset, yoffset, dst_ptr, |
| 901 dst_pixels_per_line, sse); |
| 902 return *sse; |
| 903 } |
| 904 |
| 905 unsigned int vp9_sub_pixel_mse64x64_c(const uint8_t *src_ptr, |
| 906 int src_pixels_per_line, |
| 907 int xoffset, |
| 908 int yoffset, |
| 909 const uint8_t *dst_ptr, |
| 910 int dst_pixels_per_line, |
| 911 unsigned int *sse) { |
| 912 vp9_sub_pixel_variance64x64_c(src_ptr, src_pixels_per_line, |
| 913 xoffset, yoffset, dst_ptr, |
| 914 dst_pixels_per_line, sse); |
| 915 return *sse; |
| 916 } |
| 917 |
| 918 unsigned int vp9_sub_pixel_variance16x8_c(const uint8_t *src_ptr, |
| 919 int src_pixels_per_line, |
| 920 int xoffset, |
| 921 int yoffset, |
| 922 const uint8_t *dst_ptr, |
| 923 int dst_pixels_per_line, |
| 924 unsigned int *sse) { |
| 925 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering |
| 926 uint8_t temp2[20 * 16]; |
| 927 const int16_t *hfilter, *vfilter; |
| 928 |
| 929 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 930 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 931 |
| 932 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 933 1, 9, 16, hfilter); |
| 934 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); |
| 935 |
| 936 return vp9_variance16x8(temp2, 16, dst_ptr, dst_pixels_per_line, sse); |
| 937 } |
| 938 |
| 939 unsigned int vp9_sub_pixel_avg_variance16x8_c(const uint8_t *src_ptr, |
| 940 int src_pixels_per_line, |
| 941 int xoffset, |
| 942 int yoffset, |
| 943 const uint8_t *dst_ptr, |
| 944 int dst_pixels_per_line, |
| 945 unsigned int *sse, |
| 946 const uint8_t *second_pred) { |
| 947 uint16_t fdata3[16 * 9]; // Temp data buffer used in filtering |
| 948 uint8_t temp2[20 * 16]; |
| 949 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 16 * 8); // compound pred buffer |
| 950 const int16_t *hfilter, *vfilter; |
| 951 |
| 952 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 953 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 954 |
| 955 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 956 1, 9, 16, hfilter); |
| 957 var_filter_block2d_bil_second_pass(fdata3, temp2, 16, 16, 8, 16, vfilter); |
| 958 comp_avg_pred(temp3, second_pred, 16, 8, temp2, 16); |
| 959 return vp9_variance16x8(temp3, 16, dst_ptr, dst_pixels_per_line, sse); |
| 960 } |
| 961 |
| 962 unsigned int vp9_sub_pixel_variance8x16_c(const uint8_t *src_ptr, |
| 963 int src_pixels_per_line, |
| 964 int xoffset, |
| 965 int yoffset, |
| 966 const uint8_t *dst_ptr, |
| 967 int dst_pixels_per_line, |
| 968 unsigned int *sse) { |
| 969 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering |
| 970 uint8_t temp2[20 * 16]; |
| 971 const int16_t *hfilter, *vfilter; |
| 972 |
| 973 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 974 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 975 |
| 976 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 977 1, 17, 8, hfilter); |
| 978 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); |
| 979 |
| 980 return vp9_variance8x16(temp2, 8, dst_ptr, dst_pixels_per_line, sse); |
| 981 } |
| 982 |
| 983 unsigned int vp9_sub_pixel_avg_variance8x16_c(const uint8_t *src_ptr, |
| 984 int src_pixels_per_line, |
| 985 int xoffset, |
| 986 int yoffset, |
| 987 const uint8_t *dst_ptr, |
| 988 int dst_pixels_per_line, |
| 989 unsigned int *sse, |
| 990 const uint8_t *second_pred) { |
| 991 uint16_t fdata3[9 * 16]; // Temp data buffer used in filtering |
| 992 uint8_t temp2[20 * 16]; |
| 993 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 16); // compound pred buffer |
| 994 const int16_t *hfilter, *vfilter; |
| 995 |
| 996 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 997 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 998 |
| 999 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 1000 1, 17, 8, hfilter); |
| 1001 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 16, 8, vfilter); |
| 1002 comp_avg_pred(temp3, second_pred, 8, 16, temp2, 8); |
| 1003 return vp9_variance8x16(temp3, 8, dst_ptr, dst_pixels_per_line, sse); |
| 1004 } |
| 1005 |
| 1006 unsigned int vp9_sub_pixel_variance8x4_c(const uint8_t *src_ptr, |
| 1007 int src_pixels_per_line, |
| 1008 int xoffset, |
| 1009 int yoffset, |
| 1010 const uint8_t *dst_ptr, |
| 1011 int dst_pixels_per_line, |
| 1012 unsigned int *sse) { |
| 1013 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering |
| 1014 uint8_t temp2[20 * 16]; |
| 1015 const int16_t *hfilter, *vfilter; |
| 1016 |
| 1017 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 1018 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 1019 |
| 1020 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 1021 1, 5, 8, hfilter); |
| 1022 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); |
| 1023 |
| 1024 return vp9_variance8x4(temp2, 8, dst_ptr, dst_pixels_per_line, sse); |
| 1025 } |
| 1026 |
| 1027 unsigned int vp9_sub_pixel_avg_variance8x4_c(const uint8_t *src_ptr, |
| 1028 int src_pixels_per_line, |
| 1029 int xoffset, |
| 1030 int yoffset, |
| 1031 const uint8_t *dst_ptr, |
| 1032 int dst_pixels_per_line, |
| 1033 unsigned int *sse, |
| 1034 const uint8_t *second_pred) { |
| 1035 uint16_t fdata3[8 * 5]; // Temp data buffer used in filtering |
| 1036 uint8_t temp2[20 * 16]; |
| 1037 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 8 * 4); // compound pred buffer |
| 1038 const int16_t *hfilter, *vfilter; |
| 1039 |
| 1040 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 1041 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 1042 |
| 1043 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 1044 1, 5, 8, hfilter); |
| 1045 var_filter_block2d_bil_second_pass(fdata3, temp2, 8, 8, 4, 8, vfilter); |
| 1046 comp_avg_pred(temp3, second_pred, 8, 4, temp2, 8); |
| 1047 return vp9_variance8x4(temp3, 8, dst_ptr, dst_pixels_per_line, sse); |
| 1048 } |
| 1049 |
| 1050 unsigned int vp9_sub_pixel_variance4x8_c(const uint8_t *src_ptr, |
| 1051 int src_pixels_per_line, |
| 1052 int xoffset, |
| 1053 int yoffset, |
| 1054 const uint8_t *dst_ptr, |
| 1055 int dst_pixels_per_line, |
| 1056 unsigned int *sse) { |
| 1057 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering |
| 1058 // FIXME(jingning,rbultje): this temp2 buffer probably doesn't need to be |
| 1059 // of this big? same issue appears in all other block size settings. |
| 1060 uint8_t temp2[20 * 16]; |
| 1061 const int16_t *hfilter, *vfilter; |
| 1062 |
| 1063 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 1064 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 1065 |
| 1066 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 1067 1, 9, 4, hfilter); |
| 1068 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); |
| 1069 |
| 1070 return vp9_variance4x8(temp2, 4, dst_ptr, dst_pixels_per_line, sse); |
| 1071 } |
| 1072 |
| 1073 unsigned int vp9_sub_pixel_avg_variance4x8_c(const uint8_t *src_ptr, |
| 1074 int src_pixels_per_line, |
| 1075 int xoffset, |
| 1076 int yoffset, |
| 1077 const uint8_t *dst_ptr, |
| 1078 int dst_pixels_per_line, |
| 1079 unsigned int *sse, |
| 1080 const uint8_t *second_pred) { |
| 1081 uint16_t fdata3[5 * 8]; // Temp data buffer used in filtering |
| 1082 uint8_t temp2[20 * 16]; |
| 1083 DECLARE_ALIGNED_ARRAY(16, uint8_t, temp3, 4 * 8); // compound pred buffer |
| 1084 const int16_t *hfilter, *vfilter; |
| 1085 |
| 1086 hfilter = BILINEAR_FILTERS_2TAP(xoffset); |
| 1087 vfilter = BILINEAR_FILTERS_2TAP(yoffset); |
| 1088 |
| 1089 var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_pixels_per_line, |
| 1090 1, 9, 4, hfilter); |
| 1091 var_filter_block2d_bil_second_pass(fdata3, temp2, 4, 4, 8, 4, vfilter); |
| 1092 comp_avg_pred(temp3, second_pred, 4, 8, temp2, 4); |
| 1093 return vp9_variance4x8(temp3, 4, dst_ptr, dst_pixels_per_line, sse); |
| 1094 } |
OLD | NEW |