OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 264 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
275 (void)filter_y; (void)filter_y_stride; | 275 (void)filter_y; (void)filter_y_stride; |
276 | 276 |
277 for (y = 0; y < h; ++y) { | 277 for (y = 0; y < h; ++y) { |
278 for (x = 0; x < w; ++x) | 278 for (x = 0; x < w; ++x) |
279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); | 279 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
280 | 280 |
281 src += src_stride; | 281 src += src_stride; |
282 dst += dst_stride; | 282 dst += dst_stride; |
283 } | 283 } |
284 } | 284 } |
| 285 |
| 286 #if CONFIG_VP9_HIGHBITDEPTH |
| 287 static void high_convolve_horiz(const uint8_t *src8, ptrdiff_t src_stride, |
| 288 uint8_t *dst8, ptrdiff_t dst_stride, |
| 289 const InterpKernel *x_filters, |
| 290 int x0_q4, int x_step_q4, |
| 291 int w, int h, int bd) { |
| 292 int x, y; |
| 293 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 294 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 295 src -= SUBPEL_TAPS / 2 - 1; |
| 296 for (y = 0; y < h; ++y) { |
| 297 int x_q4 = x0_q4; |
| 298 for (x = 0; x < w; ++x) { |
| 299 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
| 300 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
| 301 int k, sum = 0; |
| 302 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 303 sum += src_x[k] * x_filter[k]; |
| 304 dst[x] = clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
| 305 x_q4 += x_step_q4; |
| 306 } |
| 307 src += src_stride; |
| 308 dst += dst_stride; |
| 309 } |
| 310 } |
| 311 |
| 312 static void high_convolve_avg_horiz(const uint8_t *src8, ptrdiff_t src_stride, |
| 313 uint8_t *dst8, ptrdiff_t dst_stride, |
| 314 const InterpKernel *x_filters, |
| 315 int x0_q4, int x_step_q4, |
| 316 int w, int h, int bd) { |
| 317 int x, y; |
| 318 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 319 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 320 src -= SUBPEL_TAPS / 2 - 1; |
| 321 for (y = 0; y < h; ++y) { |
| 322 int x_q4 = x0_q4; |
| 323 for (x = 0; x < w; ++x) { |
| 324 const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; |
| 325 const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; |
| 326 int k, sum = 0; |
| 327 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 328 sum += src_x[k] * x_filter[k]; |
| 329 dst[x] = ROUND_POWER_OF_TWO(dst[x] + |
| 330 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); |
| 331 x_q4 += x_step_q4; |
| 332 } |
| 333 src += src_stride; |
| 334 dst += dst_stride; |
| 335 } |
| 336 } |
| 337 |
| 338 static void high_convolve_vert(const uint8_t *src8, ptrdiff_t src_stride, |
| 339 uint8_t *dst8, ptrdiff_t dst_stride, |
| 340 const InterpKernel *y_filters, |
| 341 int y0_q4, int y_step_q4, int w, int h, |
| 342 int bd) { |
| 343 int x, y; |
| 344 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 345 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 346 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
| 347 for (x = 0; x < w; ++x) { |
| 348 int y_q4 = y0_q4; |
| 349 for (y = 0; y < h; ++y) { |
| 350 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
| 351 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
| 352 int k, sum = 0; |
| 353 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 354 sum += src_y[k * src_stride] * y_filter[k]; |
| 355 dst[y * dst_stride] = clip_pixel_high( |
| 356 ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); |
| 357 y_q4 += y_step_q4; |
| 358 } |
| 359 ++src; |
| 360 ++dst; |
| 361 } |
| 362 } |
| 363 |
| 364 static void high_convolve_avg_vert(const uint8_t *src8, ptrdiff_t src_stride, |
| 365 uint8_t *dst8, ptrdiff_t dst_stride, |
| 366 const InterpKernel *y_filters, |
| 367 int y0_q4, int y_step_q4, int w, int h, |
| 368 int bd) { |
| 369 int x, y; |
| 370 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 371 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 372 src -= src_stride * (SUBPEL_TAPS / 2 - 1); |
| 373 for (x = 0; x < w; ++x) { |
| 374 int y_q4 = y0_q4; |
| 375 for (y = 0; y < h; ++y) { |
| 376 const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; |
| 377 const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; |
| 378 int k, sum = 0; |
| 379 for (k = 0; k < SUBPEL_TAPS; ++k) |
| 380 sum += src_y[k * src_stride] * y_filter[k]; |
| 381 dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + |
| 382 clip_pixel_high(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); |
| 383 y_q4 += y_step_q4; |
| 384 } |
| 385 ++src; |
| 386 ++dst; |
| 387 } |
| 388 } |
| 389 |
| 390 static void high_convolve(const uint8_t *src, ptrdiff_t src_stride, |
| 391 uint8_t *dst, ptrdiff_t dst_stride, |
| 392 const InterpKernel *const x_filters, |
| 393 int x0_q4, int x_step_q4, |
| 394 const InterpKernel *const y_filters, |
| 395 int y0_q4, int y_step_q4, |
| 396 int w, int h, int bd) { |
| 397 // Note: Fixed size intermediate buffer, temp, places limits on parameters. |
| 398 // 2d filtering proceeds in 2 steps: |
| 399 // (1) Interpolate horizontally into an intermediate buffer, temp. |
| 400 // (2) Interpolate temp vertically to derive the sub-pixel result. |
| 401 // Deriving the maximum number of rows in the temp buffer (135): |
| 402 // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). |
| 403 // --Largest block size is 64x64 pixels. |
| 404 // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the |
| 405 // original frame (in 1/16th pixel units). |
| 406 // --Must round-up because block may be located at sub-pixel position. |
| 407 // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. |
| 408 // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. |
| 409 uint16_t temp[64 * 135]; |
| 410 int intermediate_height = (((h - 1) * y_step_q4 + 15) >> 4) + SUBPEL_TAPS; |
| 411 |
| 412 assert(w <= 64); |
| 413 assert(h <= 64); |
| 414 assert(y_step_q4 <= 32); |
| 415 assert(x_step_q4 <= 32); |
| 416 |
| 417 if (intermediate_height < h) |
| 418 intermediate_height = h; |
| 419 |
| 420 high_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), |
| 421 src_stride, CONVERT_TO_BYTEPTR(temp), 64, |
| 422 x_filters, x0_q4, x_step_q4, w, |
| 423 intermediate_height, bd); |
| 424 high_convolve_vert(CONVERT_TO_BYTEPTR(temp) + 64 * (SUBPEL_TAPS / 2 - 1), |
| 425 64, dst, dst_stride, y_filters, y0_q4, y_step_q4, |
| 426 w, h, bd); |
| 427 } |
| 428 |
| 429 |
| 430 void vp9_high_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 431 uint8_t *dst, ptrdiff_t dst_stride, |
| 432 const int16_t *filter_x, int x_step_q4, |
| 433 const int16_t *filter_y, int y_step_q4, |
| 434 int w, int h, int bd) { |
| 435 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 436 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 437 (void)filter_y; |
| 438 (void)y_step_q4; |
| 439 |
| 440 high_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, |
| 441 x0_q4, x_step_q4, w, h, bd); |
| 442 } |
| 443 |
| 444 void vp9_high_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, |
| 445 uint8_t *dst, ptrdiff_t dst_stride, |
| 446 const int16_t *filter_x, int x_step_q4, |
| 447 const int16_t *filter_y, int y_step_q4, |
| 448 int w, int h, int bd) { |
| 449 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 450 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 451 (void)filter_y; |
| 452 (void)y_step_q4; |
| 453 |
| 454 high_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, |
| 455 x0_q4, x_step_q4, w, h, bd); |
| 456 } |
| 457 |
| 458 void vp9_high_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 459 uint8_t *dst, ptrdiff_t dst_stride, |
| 460 const int16_t *filter_x, int x_step_q4, |
| 461 const int16_t *filter_y, int y_step_q4, |
| 462 int w, int h, int bd) { |
| 463 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 464 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 465 (void)filter_x; |
| 466 (void)x_step_q4; |
| 467 |
| 468 high_convolve_vert(src, src_stride, dst, dst_stride, filters_y, |
| 469 y0_q4, y_step_q4, w, h, bd); |
| 470 } |
| 471 |
| 472 void vp9_high_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, |
| 473 uint8_t *dst, ptrdiff_t dst_stride, |
| 474 const int16_t *filter_x, int x_step_q4, |
| 475 const int16_t *filter_y, int y_step_q4, |
| 476 int w, int h, int bd) { |
| 477 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 478 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 479 (void)filter_x; |
| 480 (void)x_step_q4; |
| 481 |
| 482 high_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, |
| 483 y0_q4, y_step_q4, w, h, bd); |
| 484 } |
| 485 |
| 486 void vp9_high_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, |
| 487 uint8_t *dst, ptrdiff_t dst_stride, |
| 488 const int16_t *filter_x, int x_step_q4, |
| 489 const int16_t *filter_y, int y_step_q4, |
| 490 int w, int h, int bd) { |
| 491 const InterpKernel *const filters_x = get_filter_base(filter_x); |
| 492 const int x0_q4 = get_filter_offset(filter_x, filters_x); |
| 493 |
| 494 const InterpKernel *const filters_y = get_filter_base(filter_y); |
| 495 const int y0_q4 = get_filter_offset(filter_y, filters_y); |
| 496 |
| 497 high_convolve(src, src_stride, dst, dst_stride, |
| 498 filters_x, x0_q4, x_step_q4, |
| 499 filters_y, y0_q4, y_step_q4, w, h, bd); |
| 500 } |
| 501 |
| 502 void vp9_high_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, |
| 503 uint8_t *dst, ptrdiff_t dst_stride, |
| 504 const int16_t *filter_x, int x_step_q4, |
| 505 const int16_t *filter_y, int y_step_q4, |
| 506 int w, int h, int bd) { |
| 507 // Fixed size intermediate buffer places limits on parameters. |
| 508 DECLARE_ALIGNED_ARRAY(16, uint16_t, temp, 64 * 64); |
| 509 assert(w <= 64); |
| 510 assert(h <= 64); |
| 511 |
| 512 vp9_high_convolve8_c(src, src_stride, CONVERT_TO_BYTEPTR(temp), 64, |
| 513 filter_x, x_step_q4, filter_y, y_step_q4, w, h, bd); |
| 514 vp9_high_convolve_avg_c(CONVERT_TO_BYTEPTR(temp), 64, dst, dst_stride, |
| 515 NULL, 0, NULL, 0, w, h, bd); |
| 516 } |
| 517 |
| 518 void vp9_high_convolve_copy_c(const uint8_t *src8, ptrdiff_t src_stride, |
| 519 uint8_t *dst8, ptrdiff_t dst_stride, |
| 520 const int16_t *filter_x, int filter_x_stride, |
| 521 const int16_t *filter_y, int filter_y_stride, |
| 522 int w, int h, int bd) { |
| 523 int r; |
| 524 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 525 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 526 (void)filter_x; |
| 527 (void)filter_y; |
| 528 (void)filter_x_stride; |
| 529 (void)filter_y_stride; |
| 530 (void)bd; |
| 531 |
| 532 for (r = h; r > 0; --r) { |
| 533 vpx_memcpy(dst, src, w * sizeof(uint16_t)); |
| 534 src += src_stride; |
| 535 dst += dst_stride; |
| 536 } |
| 537 } |
| 538 |
| 539 void vp9_high_convolve_avg_c(const uint8_t *src8, ptrdiff_t src_stride, |
| 540 uint8_t *dst8, ptrdiff_t dst_stride, |
| 541 const int16_t *filter_x, int filter_x_stride, |
| 542 const int16_t *filter_y, int filter_y_stride, |
| 543 int w, int h, int bd) { |
| 544 int x, y; |
| 545 uint16_t *src = CONVERT_TO_SHORTPTR(src8); |
| 546 uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); |
| 547 (void)filter_x; |
| 548 (void)filter_y; |
| 549 (void)filter_x_stride; |
| 550 (void)filter_y_stride; |
| 551 (void)bd; |
| 552 |
| 553 for (y = 0; y < h; ++y) { |
| 554 for (x = 0; x < w; ++x) { |
| 555 dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); |
| 556 } |
| 557 src += src_stride; |
| 558 dst += dst_stride; |
| 559 } |
| 560 } |
| 561 #endif |
OLD | NEW |