OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include "./vpx_config.h" | 11 #include "./vpx_config.h" |
12 #include "vp9/common/vp9_common.h" | 12 #include "vp9/common/vp9_common.h" |
13 #include "vp9/common/vp9_loopfilter.h" | 13 #include "vp9/common/vp9_loopfilter.h" |
14 #include "vp9/common/vp9_onyxc_int.h" | 14 #include "vp9/common/vp9_onyxc_int.h" |
15 | 15 |
16 static INLINE int8_t signed_char_clamp(int t) { | 16 static INLINE int8_t signed_char_clamp(int t) { |
17 return (int8_t)clamp(t, -128, 127); | 17 return (int8_t)clamp(t, -128, 127); |
18 } | 18 } |
19 | 19 |
| 20 #if CONFIG_VP9_HIGHBITDEPTH |
| 21 static INLINE int16_t signed_char_clamp_high(int t, int bd) { |
| 22 switch (bd) { |
| 23 case 10: |
| 24 return (int16_t)clamp(t, -128*4, 128*4-1); |
| 25 case 12: |
| 26 return (int16_t)clamp(t, -128*16, 128*16-1); |
| 27 case 8: |
| 28 default: |
| 29 return (int16_t)clamp(t, -128, 128-1); |
| 30 } |
| 31 } |
| 32 #endif |
| 33 |
20 // should we apply any filter at all: 11111111 yes, 00000000 no | 34 // should we apply any filter at all: 11111111 yes, 00000000 no |
21 static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, | 35 static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, |
22 uint8_t p3, uint8_t p2, | 36 uint8_t p3, uint8_t p2, |
23 uint8_t p1, uint8_t p0, | 37 uint8_t p1, uint8_t p0, |
24 uint8_t q0, uint8_t q1, | 38 uint8_t q0, uint8_t q1, |
25 uint8_t q2, uint8_t q3) { | 39 uint8_t q2, uint8_t q3) { |
26 int8_t mask = 0; | 40 int8_t mask = 0; |
27 mask |= (abs(p3 - p2) > limit) * -1; | 41 mask |= (abs(p3 - p2) > limit) * -1; |
28 mask |= (abs(p2 - p1) > limit) * -1; | 42 mask |= (abs(p2 - p1) > limit) * -1; |
29 mask |= (abs(p1 - p0) > limit) * -1; | 43 mask |= (abs(p1 - p0) > limit) * -1; |
(...skipping 300 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
330 | 344 |
331 void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, | 345 void vp9_lpf_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit, |
332 const uint8_t *limit, const uint8_t *thresh) { | 346 const uint8_t *limit, const uint8_t *thresh) { |
333 mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); | 347 mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8); |
334 } | 348 } |
335 | 349 |
336 void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, | 350 void vp9_lpf_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit, |
337 const uint8_t *limit, const uint8_t *thresh) { | 351 const uint8_t *limit, const uint8_t *thresh) { |
338 mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); | 352 mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16); |
339 } | 353 } |
| 354 |
| 355 #if CONFIG_VP9_HIGHBITDEPTH |
| 356 // Should we apply any filter at all: 11111111 yes, 00000000 no ? |
| 357 static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit, |
| 358 uint16_t p3, uint16_t p2, |
| 359 uint16_t p1, uint16_t p0, |
| 360 uint16_t q0, uint16_t q1, |
| 361 uint16_t q2, uint16_t q3, int bd) { |
| 362 int8_t mask = 0; |
| 363 int16_t limit16 = (uint16_t)limit << (bd - 8); |
| 364 int16_t blimit16 = (uint16_t)blimit << (bd - 8); |
| 365 mask |= (abs(p3 - p2) > limit16) * -1; |
| 366 mask |= (abs(p2 - p1) > limit16) * -1; |
| 367 mask |= (abs(p1 - p0) > limit16) * -1; |
| 368 mask |= (abs(q1 - q0) > limit16) * -1; |
| 369 mask |= (abs(q2 - q1) > limit16) * -1; |
| 370 mask |= (abs(q3 - q2) > limit16) * -1; |
| 371 mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1; |
| 372 return ~mask; |
| 373 } |
| 374 |
| 375 static INLINE int8_t highbd_flat_mask4(uint8_t thresh, |
| 376 uint16_t p3, uint16_t p2, |
| 377 uint16_t p1, uint16_t p0, |
| 378 uint16_t q0, uint16_t q1, |
| 379 uint16_t q2, uint16_t q3, int bd) { |
| 380 int8_t mask = 0; |
| 381 int16_t thresh16 = (uint16_t)thresh << (bd - 8); |
| 382 mask |= (abs(p1 - p0) > thresh16) * -1; |
| 383 mask |= (abs(q1 - q0) > thresh16) * -1; |
| 384 mask |= (abs(p2 - p0) > thresh16) * -1; |
| 385 mask |= (abs(q2 - q0) > thresh16) * -1; |
| 386 mask |= (abs(p3 - p0) > thresh16) * -1; |
| 387 mask |= (abs(q3 - q0) > thresh16) * -1; |
| 388 return ~mask; |
| 389 } |
| 390 |
| 391 static INLINE int8_t highbd_flat_mask5(uint8_t thresh, |
| 392 uint16_t p4, uint16_t p3, |
| 393 uint16_t p2, uint16_t p1, |
| 394 uint16_t p0, uint16_t q0, |
| 395 uint16_t q1, uint16_t q2, |
| 396 uint16_t q3, uint16_t q4, int bd) { |
| 397 int8_t mask = ~highbd_flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 398 int16_t thresh16 = (uint16_t)thresh << (bd - 8); |
| 399 mask |= (abs(p4 - p0) > thresh16) * -1; |
| 400 mask |= (abs(q4 - q0) > thresh16) * -1; |
| 401 return ~mask; |
| 402 } |
| 403 |
| 404 // Is there high edge variance internal edge: |
| 405 // 11111111_11111111 yes, 00000000_00000000 no ? |
| 406 static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0, |
| 407 uint16_t q0, uint16_t q1, int bd) { |
| 408 int16_t hev = 0; |
| 409 int16_t thresh16 = (uint16_t)thresh << (bd - 8); |
| 410 hev |= (abs(p1 - p0) > thresh16) * -1; |
| 411 hev |= (abs(q1 - q0) > thresh16) * -1; |
| 412 return hev; |
| 413 } |
| 414 |
| 415 static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, |
| 416 uint16_t *op0, uint16_t *oq0, uint16_t *oq1, |
| 417 int bd) { |
| 418 int16_t filter1, filter2; |
| 419 // ^0x80 equivalent to subtracting 0x80 from the values to turn them |
| 420 // into -128 to +127 instead of 0 to 255. |
| 421 int shift = bd - 8; |
| 422 const int16_t ps1 = (int16_t)*op1 - (0x80 << shift); |
| 423 const int16_t ps0 = (int16_t)*op0 - (0x80 << shift); |
| 424 const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift); |
| 425 const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift); |
| 426 const uint16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); |
| 427 |
| 428 // Add outer taps if we have high edge variance. |
| 429 int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev; |
| 430 |
| 431 // Inner taps. |
| 432 filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask; |
| 433 |
| 434 // Save bottom 3 bits so that we round one side +4 and the other +3 |
| 435 // if it equals 4 we'll set to adjust by -1 to account for the fact |
| 436 // we'd round 3 the other way. |
| 437 filter1 = signed_char_clamp_high(filter + 4, bd) >> 3; |
| 438 filter2 = signed_char_clamp_high(filter + 3, bd) >> 3; |
| 439 |
| 440 *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift); |
| 441 *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift); |
| 442 |
| 443 // Outer tap adjustments. |
| 444 filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; |
| 445 |
| 446 *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift); |
| 447 *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); |
| 448 } |
| 449 |
| 450 void vp9_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */, |
| 451 const uint8_t *blimit, const uint8_t *limit, |
| 452 const uint8_t *thresh, int count, int bd) { |
| 453 int i; |
| 454 |
| 455 // loop filter designed to work using chars so that we can make maximum use |
| 456 // of 8 bit simd instructions. |
| 457 for (i = 0; i < 8 * count; ++i) { |
| 458 const uint16_t p3 = s[-4 * p]; |
| 459 const uint16_t p2 = s[-3 * p]; |
| 460 const uint16_t p1 = s[-2 * p]; |
| 461 const uint16_t p0 = s[-p]; |
| 462 const uint16_t q0 = s[0 * p]; |
| 463 const uint16_t q1 = s[1 * p]; |
| 464 const uint16_t q2 = s[2 * p]; |
| 465 const uint16_t q3 = s[3 * p]; |
| 466 const int8_t mask = highbd_filter_mask(*limit, *blimit, |
| 467 p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 468 highbd_filter4(mask, *thresh, s - 2 * p, s - 1 * p, s, s + 1 * p, bd); |
| 469 ++s; |
| 470 } |
| 471 } |
| 472 |
| 473 void vp9_highbd_lpf_horizontal_4_dual_c(uint16_t *s, int p, |
| 474 const uint8_t *blimit0, |
| 475 const uint8_t *limit0, |
| 476 const uint8_t *thresh0, |
| 477 const uint8_t *blimit1, |
| 478 const uint8_t *limit1, |
| 479 const uint8_t *thresh1, |
| 480 int bd) { |
| 481 vp9_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd); |
| 482 vp9_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); |
| 483 } |
| 484 |
| 485 void vp9_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, |
| 486 const uint8_t *limit, const uint8_t *thresh, |
| 487 int count, int bd) { |
| 488 int i; |
| 489 |
| 490 // loop filter designed to work using chars so that we can make maximum use |
| 491 // of 8 bit simd instructions. |
| 492 for (i = 0; i < 8 * count; ++i) { |
| 493 const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; |
| 494 const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; |
| 495 const int8_t mask = highbd_filter_mask(*limit, *blimit, |
| 496 p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 497 highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd); |
| 498 s += pitch; |
| 499 } |
| 500 } |
| 501 |
| 502 void vp9_highbd_lpf_vertical_4_dual_c(uint16_t *s, int pitch, |
| 503 const uint8_t *blimit0, |
| 504 const uint8_t *limit0, |
| 505 const uint8_t *thresh0, |
| 506 const uint8_t *blimit1, |
| 507 const uint8_t *limit1, |
| 508 const uint8_t *thresh1, |
| 509 int bd) { |
| 510 vp9_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd); |
| 511 vp9_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, |
| 512 thresh1, 1, bd); |
| 513 } |
| 514 |
| 515 static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, |
| 516 uint16_t *op3, uint16_t *op2, |
| 517 uint16_t *op1, uint16_t *op0, |
| 518 uint16_t *oq0, uint16_t *oq1, |
| 519 uint16_t *oq2, uint16_t *oq3, int bd) { |
| 520 if (flat && mask) { |
| 521 const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; |
| 522 const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; |
| 523 |
| 524 // 7-tap filter [1, 1, 1, 2, 1, 1, 1] |
| 525 *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); |
| 526 *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); |
| 527 *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); |
| 528 *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); |
| 529 *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); |
| 530 *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); |
| 531 } else { |
| 532 highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd); |
| 533 } |
| 534 } |
| 535 |
| 536 void vp9_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit, |
| 537 const uint8_t *limit, const uint8_t *thresh, |
| 538 int count, int bd) { |
| 539 int i; |
| 540 |
| 541 // loop filter designed to work using chars so that we can make maximum use |
| 542 // of 8 bit simd instructions. |
| 543 for (i = 0; i < 8 * count; ++i) { |
| 544 const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p]; |
| 545 const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p]; |
| 546 |
| 547 const int8_t mask = highbd_filter_mask(*limit, *blimit, |
| 548 p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 549 const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, |
| 550 bd); |
| 551 highbd_filter8(mask, *thresh, flat, |
| 552 s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, |
| 553 s, s + 1 * p, s + 2 * p, s + 3 * p, bd); |
| 554 ++s; |
| 555 } |
| 556 } |
| 557 |
| 558 void vp9_highbd_lpf_horizontal_8_dual_c(uint16_t *s, int p, |
| 559 const uint8_t *blimit0, |
| 560 const uint8_t *limit0, |
| 561 const uint8_t *thresh0, |
| 562 const uint8_t *blimit1, |
| 563 const uint8_t *limit1, |
| 564 const uint8_t *thresh1, |
| 565 int bd) { |
| 566 vp9_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd); |
| 567 vp9_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd); |
| 568 } |
| 569 |
| 570 void vp9_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, |
| 571 const uint8_t *limit, const uint8_t *thresh, |
| 572 int count, int bd) { |
| 573 int i; |
| 574 |
| 575 for (i = 0; i < 8 * count; ++i) { |
| 576 const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; |
| 577 const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; |
| 578 const int8_t mask = highbd_filter_mask(*limit, *blimit, |
| 579 p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 580 const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, |
| 581 bd); |
| 582 highbd_filter8(mask, *thresh, flat, |
| 583 s - 4, s - 3, s - 2, s - 1, |
| 584 s, s + 1, s + 2, s + 3, |
| 585 bd); |
| 586 s += pitch; |
| 587 } |
| 588 } |
| 589 |
| 590 void vp9_highbd_lpf_vertical_8_dual_c(uint16_t *s, int pitch, |
| 591 const uint8_t *blimit0, |
| 592 const uint8_t *limit0, |
| 593 const uint8_t *thresh0, |
| 594 const uint8_t *blimit1, |
| 595 const uint8_t *limit1, |
| 596 const uint8_t *thresh1, |
| 597 int bd) { |
| 598 vp9_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd); |
| 599 vp9_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, |
| 600 thresh1, 1, bd); |
| 601 } |
| 602 |
| 603 static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, |
| 604 uint8_t flat, uint8_t flat2, |
| 605 uint16_t *op7, uint16_t *op6, |
| 606 uint16_t *op5, uint16_t *op4, |
| 607 uint16_t *op3, uint16_t *op2, |
| 608 uint16_t *op1, uint16_t *op0, |
| 609 uint16_t *oq0, uint16_t *oq1, |
| 610 uint16_t *oq2, uint16_t *oq3, |
| 611 uint16_t *oq4, uint16_t *oq5, |
| 612 uint16_t *oq6, uint16_t *oq7, int bd) { |
| 613 if (flat2 && flat && mask) { |
| 614 const uint16_t p7 = *op7; |
| 615 const uint16_t p6 = *op6; |
| 616 const uint16_t p5 = *op5; |
| 617 const uint16_t p4 = *op4; |
| 618 const uint16_t p3 = *op3; |
| 619 const uint16_t p2 = *op2; |
| 620 const uint16_t p1 = *op1; |
| 621 const uint16_t p0 = *op0; |
| 622 const uint16_t q0 = *oq0; |
| 623 const uint16_t q1 = *oq1; |
| 624 const uint16_t q2 = *oq2; |
| 625 const uint16_t q3 = *oq3; |
| 626 const uint16_t q4 = *oq4; |
| 627 const uint16_t q5 = *oq5; |
| 628 const uint16_t q6 = *oq6; |
| 629 const uint16_t q7 = *oq7; |
| 630 |
| 631 // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] |
| 632 *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + |
| 633 q0, 4); |
| 634 *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + |
| 635 q0 + q1, 4); |
| 636 *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + |
| 637 q0 + q1 + q2, 4); |
| 638 *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + |
| 639 q0 + q1 + q2 + q3, 4); |
| 640 *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + |
| 641 q0 + q1 + q2 + q3 + q4, 4); |
| 642 *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + |
| 643 q0 + q1 + q2 + q3 + q4 + q5, 4); |
| 644 *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + |
| 645 q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); |
| 646 *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + |
| 647 q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); |
| 648 *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + |
| 649 q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); |
| 650 *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + |
| 651 q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); |
| 652 *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + |
| 653 q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); |
| 654 *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + |
| 655 q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); |
| 656 *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + |
| 657 q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); |
| 658 *oq6 = ROUND_POWER_OF_TWO(p0 + |
| 659 q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); |
| 660 } else { |
| 661 highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3, |
| 662 bd); |
| 663 } |
| 664 } |
| 665 |
| 666 void vp9_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit, |
| 667 const uint8_t *limit, const uint8_t *thresh, |
| 668 int count, int bd) { |
| 669 int i; |
| 670 |
| 671 // loop filter designed to work using chars so that we can make maximum use |
| 672 // of 8 bit simd instructions. |
| 673 for (i = 0; i < 8 * count; ++i) { |
| 674 const uint16_t p3 = s[-4 * p]; |
| 675 const uint16_t p2 = s[-3 * p]; |
| 676 const uint16_t p1 = s[-2 * p]; |
| 677 const uint16_t p0 = s[-p]; |
| 678 const uint16_t q0 = s[0 * p]; |
| 679 const uint16_t q1 = s[1 * p]; |
| 680 const uint16_t q2 = s[2 * p]; |
| 681 const uint16_t q3 = s[3 * p]; |
| 682 const int8_t mask = highbd_filter_mask(*limit, *blimit, |
| 683 p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 684 const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, |
| 685 bd); |
| 686 const int8_t flat2 = highbd_flat_mask5( |
| 687 1, s[-8 * p], s[-7 * p], s[-6 * p], s[-5 * p], p0, |
| 688 q0, s[4 * p], s[5 * p], s[6 * p], s[7 * p], bd); |
| 689 |
| 690 highbd_filter16(mask, *thresh, flat, flat2, |
| 691 s - 8 * p, s - 7 * p, s - 6 * p, s - 5 * p, |
| 692 s - 4 * p, s - 3 * p, s - 2 * p, s - 1 * p, |
| 693 s, s + 1 * p, s + 2 * p, s + 3 * p, |
| 694 s + 4 * p, s + 5 * p, s + 6 * p, s + 7 * p, |
| 695 bd); |
| 696 ++s; |
| 697 } |
| 698 } |
| 699 |
| 700 static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p, |
| 701 const uint8_t *blimit, |
| 702 const uint8_t *limit, |
| 703 const uint8_t *thresh, |
| 704 int count, int bd) { |
| 705 int i; |
| 706 |
| 707 for (i = 0; i < count; ++i) { |
| 708 const uint16_t p3 = s[-4]; |
| 709 const uint16_t p2 = s[-3]; |
| 710 const uint16_t p1 = s[-2]; |
| 711 const uint16_t p0 = s[-1]; |
| 712 const uint16_t q0 = s[0]; |
| 713 const uint16_t q1 = s[1]; |
| 714 const uint16_t q2 = s[2]; |
| 715 const uint16_t q3 = s[3]; |
| 716 const int8_t mask = highbd_filter_mask(*limit, *blimit, |
| 717 p3, p2, p1, p0, q0, q1, q2, q3, bd); |
| 718 const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, |
| 719 bd); |
| 720 const int8_t flat2 = highbd_flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, |
| 721 q0, s[4], s[5], s[6], s[7], bd); |
| 722 |
| 723 highbd_filter16(mask, *thresh, flat, flat2, |
| 724 s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, |
| 725 s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, |
| 726 bd); |
| 727 s += p; |
| 728 } |
| 729 } |
| 730 |
| 731 void vp9_highbd_lpf_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit, |
| 732 const uint8_t *limit, const uint8_t *thresh, |
| 733 int bd) { |
| 734 highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 8, bd); |
| 735 } |
| 736 |
| 737 void vp9_highbd_lpf_vertical_16_dual_c(uint16_t *s, int p, |
| 738 const uint8_t *blimit, |
| 739 const uint8_t *limit, |
| 740 const uint8_t *thresh, |
| 741 int bd) { |
| 742 highbd_mb_lpf_vertical_edge_w(s, p, blimit, limit, thresh, 16, bd); |
| 743 } |
| 744 #endif // CONFIG_VP9_HIGHBITDEPTH |
OLD | NEW |