| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2013 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include <assert.h> | 11 #include <assert.h> |
| 12 #include <stdio.h> | 12 #include <stdio.h> |
| 13 | 13 |
| 14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
| 15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
| 16 #include "vp9/common/vp9_common.h" | 16 #include "vp9/common/vp9_common.h" |
| 17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
| 18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
| 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" | 19 #include "vp9/common/mips/dspr2/vp9_common_dspr2.h" |
| 20 | 20 |
| 21 #if HAVE_DSPR2 | 21 #if HAVE_DSPR2 |
| 22 static void idct8_1d_rows_dspr2(const int16_t *input, int16_t *output, | 22 static void idct8_rows_dspr2(const int16_t *input, int16_t *output, |
| 23 uint32_t no_rows) { | 23 uint32_t no_rows) { |
| 24 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; | 24 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; |
| 25 const int const_2_power_13 = 8192; | 25 const int const_2_power_13 = 8192; |
| 26 int Temp0, Temp1, Temp2, Temp3, Temp4; | 26 int Temp0, Temp1, Temp2, Temp3, Temp4; |
| 27 int i; | 27 int i; |
| 28 | 28 |
| 29 for (i = no_rows; i--; ) { | 29 for (i = no_rows; i--; ) { |
| 30 __asm__ __volatile__ ( | 30 __asm__ __volatile__ ( |
| 31 /* | 31 /* |
| 32 temp_1 = (input[0] + input[4]) * cospi_16_64; | 32 temp_1 = (input[0] + input[4]) * cospi_16_64; |
| 33 step2_0 = dct_const_round_shift(temp_1); | 33 step2_0 = dct_const_round_shift(temp_1); |
| (...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 193 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64), | 193 [cospi_20_64] "r" (cospi_20_64), [cospi_8_64] "r" (cospi_8_64), |
| 194 [cospi_24_64] "r" (cospi_24_64), | 194 [cospi_24_64] "r" (cospi_24_64), |
| 195 [output] "r" (output), [input] "r" (input) | 195 [output] "r" (output), [input] "r" (input) |
| 196 ); | 196 ); |
| 197 | 197 |
| 198 input += 8; | 198 input += 8; |
| 199 output += 1; | 199 output += 1; |
| 200 } | 200 } |
| 201 } | 201 } |
| 202 | 202 |
| 203 static void idct8_1d_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, | 203 static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, |
| 204 int dest_stride) { | 204 int dest_stride) { |
| 205 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; | 205 int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; |
| 206 int Temp0, Temp1, Temp2, Temp3; | 206 int Temp0, Temp1, Temp2, Temp3; |
| 207 int i; | 207 int i; |
| 208 const int const_2_power_13 = 8192; | 208 const int const_2_power_13 = 8192; |
| 209 uint8_t *dest_pix; | 209 uint8_t *dest_pix; |
| 210 uint8_t *cm = vp9_ff_cropTbl; | 210 uint8_t *cm = vp9_ff_cropTbl; |
| 211 | 211 |
| 212 /* prefetch vp9_ff_cropTbl */ | 212 /* prefetch vp9_ff_cropTbl */ |
| 213 vp9_prefetch_load(vp9_ff_cropTbl); | 213 vp9_prefetch_load(vp9_ff_cropTbl); |
| 214 vp9_prefetch_load(vp9_ff_cropTbl + 32); | 214 vp9_prefetch_load(vp9_ff_cropTbl + 32); |
| (...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 455 uint32_t pos = 45; | 455 uint32_t pos = 45; |
| 456 | 456 |
| 457 /* bit positon for extract from acc */ | 457 /* bit positon for extract from acc */ |
| 458 __asm__ __volatile__ ( | 458 __asm__ __volatile__ ( |
| 459 "wrdsp %[pos], 1 \n\t" | 459 "wrdsp %[pos], 1 \n\t" |
| 460 : | 460 : |
| 461 : [pos] "r" (pos) | 461 : [pos] "r" (pos) |
| 462 ); | 462 ); |
| 463 | 463 |
| 464 // First transform rows | 464 // First transform rows |
| 465 idct8_1d_rows_dspr2(input, outptr, 8); | 465 idct8_rows_dspr2(input, outptr, 8); |
| 466 | 466 |
| 467 // Then transform columns and add to dest | 467 // Then transform columns and add to dest |
| 468 idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 468 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
| 469 } | 469 } |
| 470 | 470 |
| 471 static void iadst8_1d_dspr2(const int16_t *input, int16_t *output) { | 471 static void iadst8_dspr2(const int16_t *input, int16_t *output) { |
| 472 int s0, s1, s2, s3, s4, s5, s6, s7; | 472 int s0, s1, s2, s3, s4, s5, s6, s7; |
| 473 int x0, x1, x2, x3, x4, x5, x6, x7; | 473 int x0, x1, x2, x3, x4, x5, x6, x7; |
| 474 | 474 |
| 475 x0 = input[7]; | 475 x0 = input[7]; |
| 476 x1 = input[0]; | 476 x1 = input[0]; |
| 477 x2 = input[5]; | 477 x2 = input[5]; |
| 478 x3 = input[2]; | 478 x3 = input[2]; |
| 479 x4 = input[3]; | 479 x4 = input[3]; |
| 480 x5 = input[4]; | 480 x5 = input[4]; |
| 481 x6 = input[1]; | 481 x6 = input[1]; |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 556 | 556 |
| 557 /* bit positon for extract from acc */ | 557 /* bit positon for extract from acc */ |
| 558 __asm__ __volatile__ ( | 558 __asm__ __volatile__ ( |
| 559 "wrdsp %[pos], 1 \n\t" | 559 "wrdsp %[pos], 1 \n\t" |
| 560 : | 560 : |
| 561 : [pos] "r" (pos) | 561 : [pos] "r" (pos) |
| 562 ); | 562 ); |
| 563 | 563 |
| 564 switch (tx_type) { | 564 switch (tx_type) { |
| 565 case DCT_DCT: // DCT in both horizontal and vertical | 565 case DCT_DCT: // DCT in both horizontal and vertical |
| 566 idct8_1d_rows_dspr2(input, outptr, 8); | 566 idct8_rows_dspr2(input, outptr, 8); |
| 567 idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 567 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
| 568 break; | 568 break; |
| 569 case ADST_DCT: // ADST in vertical, DCT in horizontal | 569 case ADST_DCT: // ADST in vertical, DCT in horizontal |
| 570 idct8_1d_rows_dspr2(input, outptr, 8); | 570 idct8_rows_dspr2(input, outptr, 8); |
| 571 | 571 |
| 572 for (i = 0; i < 8; ++i) { | 572 for (i = 0; i < 8; ++i) { |
| 573 iadst8_1d_dspr2(&out[i * 8], temp_out); | 573 iadst8_dspr2(&out[i * 8], temp_out); |
| 574 | 574 |
| 575 for (j = 0; j < 8; ++j) | 575 for (j = 0; j < 8; ++j) |
| 576 dest[j * dest_stride + i] = | 576 dest[j * dest_stride + i] = |
| 577 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 577 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
| 578 + dest[j * dest_stride + i]); | 578 + dest[j * dest_stride + i]); |
| 579 } | 579 } |
| 580 break; | 580 break; |
| 581 case DCT_ADST: // DCT in vertical, ADST in horizontal | 581 case DCT_ADST: // DCT in vertical, ADST in horizontal |
| 582 for (i = 0; i < 8; ++i) { | 582 for (i = 0; i < 8; ++i) { |
| 583 iadst8_1d_dspr2(input, outptr); | 583 iadst8_dspr2(input, outptr); |
| 584 input += 8; | 584 input += 8; |
| 585 outptr += 8; | 585 outptr += 8; |
| 586 } | 586 } |
| 587 | 587 |
| 588 for (i = 0; i < 8; ++i) { | 588 for (i = 0; i < 8; ++i) { |
| 589 for (j = 0; j < 8; ++j) { | 589 for (j = 0; j < 8; ++j) { |
| 590 temp_in[i * 8 + j] = out[j * 8 + i]; | 590 temp_in[i * 8 + j] = out[j * 8 + i]; |
| 591 } | 591 } |
| 592 } | 592 } |
| 593 idct8_1d_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); | 593 idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride); |
| 594 break; | 594 break; |
| 595 case ADST_ADST: // ADST in both directions | 595 case ADST_ADST: // ADST in both directions |
| 596 for (i = 0; i < 8; ++i) { | 596 for (i = 0; i < 8; ++i) { |
| 597 iadst8_1d_dspr2(input, outptr); | 597 iadst8_dspr2(input, outptr); |
| 598 input += 8; | 598 input += 8; |
| 599 outptr += 8; | 599 outptr += 8; |
| 600 } | 600 } |
| 601 | 601 |
| 602 for (i = 0; i < 8; ++i) { | 602 for (i = 0; i < 8; ++i) { |
| 603 for (j = 0; j < 8; ++j) | 603 for (j = 0; j < 8; ++j) |
| 604 temp_in[j] = out[j * 8 + i]; | 604 temp_in[j] = out[j * 8 + i]; |
| 605 | 605 |
| 606 iadst8_1d_dspr2(temp_in, temp_out); | 606 iadst8_dspr2(temp_in, temp_out); |
| 607 | 607 |
| 608 for (j = 0; j < 8; ++j) | 608 for (j = 0; j < 8; ++j) |
| 609 dest[j * dest_stride + i] = | 609 dest[j * dest_stride + i] = |
| 610 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 610 clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
| 611 + dest[j * dest_stride + i]); | 611 + dest[j * dest_stride + i]); |
| 612 } | 612 } |
| 613 break; | 613 break; |
| 614 default: | 614 default: |
| 615 printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n"); | 615 printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n"); |
| 616 break; | 616 break; |
| 617 } | 617 } |
| 618 } | 618 } |
| 619 | 619 |
| 620 void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest, | 620 void vp9_idct8x8_10_add_dspr2(const int16_t *input, uint8_t *dest, |
| 621 int dest_stride) { | 621 int dest_stride) { |
| 622 DECLARE_ALIGNED(32, int16_t, out[8 * 8]); | 622 DECLARE_ALIGNED(32, int16_t, out[8 * 8]); |
| 623 int16_t *outptr = out; | 623 int16_t *outptr = out; |
| 624 uint32_t pos = 45; | 624 uint32_t pos = 45; |
| 625 | 625 |
| 626 /* bit positon for extract from acc */ | 626 /* bit positon for extract from acc */ |
| 627 __asm__ __volatile__ ( | 627 __asm__ __volatile__ ( |
| 628 "wrdsp %[pos], 1 \n\t" | 628 "wrdsp %[pos], 1 \n\t" |
| 629 : | 629 : |
| 630 : [pos] "r" (pos) | 630 : [pos] "r" (pos) |
| 631 ); | 631 ); |
| 632 | 632 |
| 633 // First transform rows | 633 // First transform rows |
| 634 idct8_1d_rows_dspr2(input, outptr, 4); | 634 idct8_rows_dspr2(input, outptr, 4); |
| 635 | 635 |
| 636 outptr += 4; | 636 outptr += 4; |
| 637 | 637 |
| 638 __asm__ __volatile__ ( | 638 __asm__ __volatile__ ( |
| 639 "sw $zero, 0(%[outptr]) \n\t" | 639 "sw $zero, 0(%[outptr]) \n\t" |
| 640 "sw $zero, 4(%[outptr]) \n\t" | 640 "sw $zero, 4(%[outptr]) \n\t" |
| 641 "sw $zero, 16(%[outptr]) \n\t" | 641 "sw $zero, 16(%[outptr]) \n\t" |
| 642 "sw $zero, 20(%[outptr]) \n\t" | 642 "sw $zero, 20(%[outptr]) \n\t" |
| 643 "sw $zero, 32(%[outptr]) \n\t" | 643 "sw $zero, 32(%[outptr]) \n\t" |
| 644 "sw $zero, 36(%[outptr]) \n\t" | 644 "sw $zero, 36(%[outptr]) \n\t" |
| 645 "sw $zero, 48(%[outptr]) \n\t" | 645 "sw $zero, 48(%[outptr]) \n\t" |
| 646 "sw $zero, 52(%[outptr]) \n\t" | 646 "sw $zero, 52(%[outptr]) \n\t" |
| 647 "sw $zero, 64(%[outptr]) \n\t" | 647 "sw $zero, 64(%[outptr]) \n\t" |
| 648 "sw $zero, 68(%[outptr]) \n\t" | 648 "sw $zero, 68(%[outptr]) \n\t" |
| 649 "sw $zero, 80(%[outptr]) \n\t" | 649 "sw $zero, 80(%[outptr]) \n\t" |
| 650 "sw $zero, 84(%[outptr]) \n\t" | 650 "sw $zero, 84(%[outptr]) \n\t" |
| 651 "sw $zero, 96(%[outptr]) \n\t" | 651 "sw $zero, 96(%[outptr]) \n\t" |
| 652 "sw $zero, 100(%[outptr]) \n\t" | 652 "sw $zero, 100(%[outptr]) \n\t" |
| 653 "sw $zero, 112(%[outptr]) \n\t" | 653 "sw $zero, 112(%[outptr]) \n\t" |
| 654 "sw $zero, 116(%[outptr]) \n\t" | 654 "sw $zero, 116(%[outptr]) \n\t" |
| 655 | 655 |
| 656 : | 656 : |
| 657 : [outptr] "r" (outptr) | 657 : [outptr] "r" (outptr) |
| 658 ); | 658 ); |
| 659 | 659 |
| 660 | 660 |
| 661 // Then transform columns and add to dest | 661 // Then transform columns and add to dest |
| 662 idct8_1d_columns_add_blk_dspr2(&out[0], dest, dest_stride); | 662 idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride); |
| 663 } | 663 } |
| 664 | 664 |
| 665 void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, | 665 void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, |
| 666 int dest_stride) { | 666 int dest_stride) { |
| 667 uint32_t pos = 45; | 667 uint32_t pos = 45; |
| 668 int32_t out; | 668 int32_t out; |
| 669 int32_t r; | 669 int32_t r; |
| 670 int32_t a1, absa1; | 670 int32_t a1, absa1; |
| 671 int32_t t1, t2, vector_a1, vector_1, vector_2; | 671 int32_t t1, t2, vector_a1, vector_1, vector_2; |
| 672 | 672 |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 736 | 736 |
| 737 : [t1] "=&r" (t1), [t2] "=&r" (t2), | 737 : [t1] "=&r" (t1), [t2] "=&r" (t2), |
| 738 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2), | 738 [vector_1] "=&r" (vector_1), [vector_2] "=&r" (vector_2), |
| 739 [dest] "+r" (dest) | 739 [dest] "+r" (dest) |
| 740 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) | 740 : [dest_stride] "r" (dest_stride), [vector_a1] "r" (vector_a1) |
| 741 ); | 741 ); |
| 742 } | 742 } |
| 743 } | 743 } |
| 744 } | 744 } |
| 745 #endif // #if HAVE_DSPR2 | 745 #endif // #if HAVE_DSPR2 |
| OLD | NEW |