| Index: source/libvpx/vpx_dsp/mips/itrans8_dspr2.c
|
| diff --git a/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c b/source/libvpx/vpx_dsp/mips/itrans8_dspr2.c
|
| similarity index 91%
|
| copy from source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
|
| copy to source/libvpx/vpx_dsp/mips/itrans8_dspr2.c
|
| index 5270fa17f76df0a52b34359d74bb498ed425debe..823e845d59d5618396990d4b18c5e0ae962df131 100644
|
| --- a/source/libvpx/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c
|
| +++ b/source/libvpx/vpx_dsp/mips/itrans8_dspr2.c
|
| @@ -1,5 +1,5 @@
|
| /*
|
| - * Copyright (c) 2013 The WebM project authors. All Rights Reserved.
|
| + * Copyright (c) 2015 The WebM project authors. All Rights Reserved.
|
| *
|
| * Use of this source code is governed by a BSD-style license
|
| * that can be found in the LICENSE file in the root of the source
|
| @@ -8,20 +8,13 @@
|
| * be found in the AUTHORS file in the root of the source tree.
|
| */
|
|
|
| -#include <assert.h>
|
| -#include <stdio.h>
|
| -
|
| #include "./vpx_config.h"
|
| -#include "./vp9_rtcd.h"
|
| -#include "vp9/common/vp9_common.h"
|
| -#include "vp9/common/vp9_blockd.h"
|
| -#include "vp9/common/mips/dspr2/vp9_common_dspr2.h"
|
| +#include "./vpx_dsp_rtcd.h"
|
| +#include "vpx_dsp/mips/inv_txfm_dspr2.h"
|
| #include "vpx_dsp/txfm_common.h"
|
| -#include "vpx_ports/mem.h"
|
|
|
| #if HAVE_DSPR2
|
| -static void idct8_rows_dspr2(const int16_t *input, int16_t *output,
|
| - uint32_t no_rows) {
|
| +void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows) {
|
| int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
|
| const int const_2_power_13 = 8192;
|
| int Temp0, Temp1, Temp2, Temp3, Temp4;
|
| @@ -201,8 +194,8 @@ static void idct8_rows_dspr2(const int16_t *input, int16_t *output,
|
| }
|
| }
|
|
|
| -static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
|
| - int dest_stride) {
|
| +void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
|
| + int dest_stride) {
|
| int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7;
|
| int Temp0, Temp1, Temp2, Temp3;
|
| int i;
|
| @@ -449,7 +442,7 @@ static void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest,
|
| }
|
| }
|
|
|
| -void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
|
| +void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
|
| int dest_stride) {
|
| DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
|
| int16_t *outptr = out;
|
| @@ -469,156 +462,7 @@ void vp9_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
|
| idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
|
| }
|
|
|
| -static void iadst8_dspr2(const int16_t *input, int16_t *output) {
|
| - int s0, s1, s2, s3, s4, s5, s6, s7;
|
| - int x0, x1, x2, x3, x4, x5, x6, x7;
|
| -
|
| - x0 = input[7];
|
| - x1 = input[0];
|
| - x2 = input[5];
|
| - x3 = input[2];
|
| - x4 = input[3];
|
| - x5 = input[4];
|
| - x6 = input[1];
|
| - x7 = input[6];
|
| -
|
| - if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
|
| - output[0] = output[1] = output[2] = output[3] = output[4]
|
| - = output[5] = output[6] = output[7] = 0;
|
| - return;
|
| - }
|
| -
|
| - // stage 1
|
| - s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
| - s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
|
| - s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
| - s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
| - s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
| - s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
|
| - s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
| - s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
| -
|
| - x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS);
|
| - x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS);
|
| - x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS);
|
| - x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS);
|
| - x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS);
|
| - x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS);
|
| - x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS);
|
| - x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS);
|
| -
|
| - // stage 2
|
| - s0 = x0;
|
| - s1 = x1;
|
| - s2 = x2;
|
| - s3 = x3;
|
| - s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
| - s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
| - s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
| - s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
| -
|
| - x0 = s0 + s2;
|
| - x1 = s1 + s3;
|
| - x2 = s0 - s2;
|
| - x3 = s1 - s3;
|
| - x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS);
|
| - x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS);
|
| - x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS);
|
| - x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS);
|
| -
|
| - // stage 3
|
| - s2 = cospi_16_64 * (x2 + x3);
|
| - s3 = cospi_16_64 * (x2 - x3);
|
| - s6 = cospi_16_64 * (x6 + x7);
|
| - s7 = cospi_16_64 * (x6 - x7);
|
| -
|
| - x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS);
|
| - x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS);
|
| - x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS);
|
| - x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS);
|
| -
|
| - output[0] = x0;
|
| - output[1] = -x4;
|
| - output[2] = x6;
|
| - output[3] = -x2;
|
| - output[4] = x3;
|
| - output[5] = -x7;
|
| - output[6] = x5;
|
| - output[7] = -x1;
|
| -}
|
| -
|
| -void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest,
|
| - int dest_stride, int tx_type) {
|
| - int i, j;
|
| - DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
|
| - int16_t *outptr = out;
|
| - int16_t temp_in[8 * 8], temp_out[8];
|
| - uint32_t pos = 45;
|
| -
|
| - /* bit positon for extract from acc */
|
| - __asm__ __volatile__ (
|
| - "wrdsp %[pos], 1 \n\t"
|
| - :
|
| - : [pos] "r" (pos)
|
| - );
|
| -
|
| - switch (tx_type) {
|
| - case DCT_DCT: // DCT in both horizontal and vertical
|
| - idct8_rows_dspr2(input, outptr, 8);
|
| - idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
|
| - break;
|
| - case ADST_DCT: // ADST in vertical, DCT in horizontal
|
| - idct8_rows_dspr2(input, outptr, 8);
|
| -
|
| - for (i = 0; i < 8; ++i) {
|
| - iadst8_dspr2(&out[i * 8], temp_out);
|
| -
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * dest_stride + i] =
|
| - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * dest_stride + i]);
|
| - }
|
| - break;
|
| - case DCT_ADST: // DCT in vertical, ADST in horizontal
|
| - for (i = 0; i < 8; ++i) {
|
| - iadst8_dspr2(input, outptr);
|
| - input += 8;
|
| - outptr += 8;
|
| - }
|
| -
|
| - for (i = 0; i < 8; ++i) {
|
| - for (j = 0; j < 8; ++j) {
|
| - temp_in[i * 8 + j] = out[j * 8 + i];
|
| - }
|
| - }
|
| - idct8_columns_add_blk_dspr2(&temp_in[0], dest, dest_stride);
|
| - break;
|
| - case ADST_ADST: // ADST in both directions
|
| - for (i = 0; i < 8; ++i) {
|
| - iadst8_dspr2(input, outptr);
|
| - input += 8;
|
| - outptr += 8;
|
| - }
|
| -
|
| - for (i = 0; i < 8; ++i) {
|
| - for (j = 0; j < 8; ++j)
|
| - temp_in[j] = out[j * 8 + i];
|
| -
|
| - iadst8_dspr2(temp_in, temp_out);
|
| -
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * dest_stride + i] =
|
| - clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * dest_stride + i]);
|
| - }
|
| - break;
|
| - default:
|
| - printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n");
|
| - break;
|
| - }
|
| -}
|
| -
|
| -void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
|
| +void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
|
| int dest_stride) {
|
| DECLARE_ALIGNED(32, int16_t, out[8 * 8]);
|
| int16_t *outptr = out;
|
| @@ -663,7 +507,7 @@ void vp9_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest,
|
| idct8_columns_add_blk_dspr2(&out[0], dest, dest_stride);
|
| }
|
|
|
| -void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
|
| +void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
|
| int dest_stride) {
|
| uint32_t pos = 45;
|
| int32_t out;
|
| @@ -743,4 +587,82 @@ void vp9_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest,
|
| }
|
| }
|
| }
|
| -#endif // #if HAVE_DSPR2
|
| +
|
| +void iadst8_dspr2(const int16_t *input, int16_t *output) {
|
| + int s0, s1, s2, s3, s4, s5, s6, s7;
|
| + int x0, x1, x2, x3, x4, x5, x6, x7;
|
| +
|
| + x0 = input[7];
|
| + x1 = input[0];
|
| + x2 = input[5];
|
| + x3 = input[2];
|
| + x4 = input[3];
|
| + x5 = input[4];
|
| + x6 = input[1];
|
| + x7 = input[6];
|
| +
|
| + if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
|
| + output[0] = output[1] = output[2] = output[3] = output[4]
|
| + = output[5] = output[6] = output[7] = 0;
|
| + return;
|
| + }
|
| +
|
| + // stage 1
|
| + s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
| + s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
|
| + s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
| + s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
| + s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
| + s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
|
| + s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
| + s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
| +
|
| + x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS);
|
| + x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS);
|
| + x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS);
|
| + x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS);
|
| + x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS);
|
| + x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS);
|
| + x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS);
|
| + x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS);
|
| +
|
| + // stage 2
|
| + s0 = x0;
|
| + s1 = x1;
|
| + s2 = x2;
|
| + s3 = x3;
|
| + s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
| + s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
| + s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
| + s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
| +
|
| + x0 = s0 + s2;
|
| + x1 = s1 + s3;
|
| + x2 = s0 - s2;
|
| + x3 = s1 - s3;
|
| + x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS);
|
| + x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS);
|
| + x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS);
|
| + x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS);
|
| +
|
| + // stage 3
|
| + s2 = cospi_16_64 * (x2 + x3);
|
| + s3 = cospi_16_64 * (x2 - x3);
|
| + s6 = cospi_16_64 * (x6 + x7);
|
| + s7 = cospi_16_64 * (x6 - x7);
|
| +
|
| + x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS);
|
| + x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS);
|
| + x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS);
|
| + x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS);
|
| +
|
| + output[0] = x0;
|
| + output[1] = -x4;
|
| + output[2] = x6;
|
| + output[3] = -x2;
|
| + output[4] = x3;
|
| + output[5] = -x7;
|
| + output[6] = x5;
|
| + output[7] = -x1;
|
| +}
|
| +#endif // HAVE_DSPR2
|
|
|