| Index: source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c
|
| ===================================================================
|
| --- source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c (revision 240950)
|
| +++ source/libvpx/vp9/common/mips/dspr2/vp9_itrans32_dspr2.c (working copy)
|
| @@ -19,7 +19,8 @@
|
| #include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
|
|
|
| #if HAVE_DSPR2
|
| -static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output) {
|
| +static void idct32_1d_rows_dspr2(const int16_t *input, int16_t *output,
|
| + uint32_t no_rows) {
|
| int16_t step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6;
|
| int16_t step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13;
|
| int16_t step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20;
|
| @@ -42,7 +43,7 @@
|
| const int const_2_power_13 = 8192;
|
| const int32_t *input_int;
|
|
|
| - for (i = 32; i--; ) {
|
| + for (i = no_rows; i--; ) {
|
| input_int = (const int32_t *)input;
|
|
|
| if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] |
|
| @@ -881,12 +882,74 @@
|
| );
|
|
|
| // Rows
|
| - idct32_1d_rows_dspr2(input, outptr);
|
| + idct32_1d_rows_dspr2(input, outptr, 32);
|
|
|
| // Columns
|
| vp9_idct32_1d_cols_add_blk_dspr2(out, dest, dest_stride);
|
| }
|
|
|
| +void vp9_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest,
|
| + int stride) {
|
| + DECLARE_ALIGNED(32, int16_t, out[32 * 32]);
|
| + int16_t *outptr = out;
|
| + uint32_t i;
|
| + uint32_t pos = 45;
|
| +
|
| + /* bit positon for extract from acc */
|
| + __asm__ __volatile__ (
|
| + "wrdsp %[pos], 1 \n\t"
|
| + :
|
| + : [pos] "r" (pos)
|
| + );
|
| +
|
| + // Rows
|
| + idct32_1d_rows_dspr2(input, outptr, 8);
|
| +
|
| + outptr += 8;
|
| + __asm__ __volatile__ (
|
| + "sw $zero, 0(%[outptr]) \n\t"
|
| + "sw $zero, 4(%[outptr]) \n\t"
|
| + "sw $zero, 8(%[outptr]) \n\t"
|
| + "sw $zero, 12(%[outptr]) \n\t"
|
| + "sw $zero, 16(%[outptr]) \n\t"
|
| + "sw $zero, 20(%[outptr]) \n\t"
|
| + "sw $zero, 24(%[outptr]) \n\t"
|
| + "sw $zero, 28(%[outptr]) \n\t"
|
| + "sw $zero, 32(%[outptr]) \n\t"
|
| + "sw $zero, 36(%[outptr]) \n\t"
|
| + "sw $zero, 40(%[outptr]) \n\t"
|
| + "sw $zero, 44(%[outptr]) \n\t"
|
| +
|
| + :
|
| + : [outptr] "r" (outptr)
|
| + );
|
| +
|
| + for (i = 0; i < 31; ++i) {
|
| + outptr += 32;
|
| +
|
| + __asm__ __volatile__ (
|
| + "sw $zero, 0(%[outptr]) \n\t"
|
| + "sw $zero, 4(%[outptr]) \n\t"
|
| + "sw $zero, 8(%[outptr]) \n\t"
|
| + "sw $zero, 12(%[outptr]) \n\t"
|
| + "sw $zero, 16(%[outptr]) \n\t"
|
| + "sw $zero, 20(%[outptr]) \n\t"
|
| + "sw $zero, 24(%[outptr]) \n\t"
|
| + "sw $zero, 28(%[outptr]) \n\t"
|
| + "sw $zero, 32(%[outptr]) \n\t"
|
| + "sw $zero, 36(%[outptr]) \n\t"
|
| + "sw $zero, 40(%[outptr]) \n\t"
|
| + "sw $zero, 44(%[outptr]) \n\t"
|
| +
|
| + :
|
| + : [outptr] "r" (outptr)
|
| + );
|
| + }
|
| +
|
| + // Columns
|
| + vp9_idct32_1d_cols_add_blk_dspr2(out, dest, stride);
|
| +}
|
| +
|
| void vp9_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest,
|
| int stride) {
|
| int r, out;
|
|
|