| Index: source/libvpx/vp9/common/vp9_idct.c
|
| ===================================================================
|
| --- source/libvpx/vp9/common/vp9_idct.c (revision 293081)
|
| +++ source/libvpx/vp9/common/vp9_idct.c (working copy)
|
| @@ -34,7 +34,7 @@
|
| // bd of x uses trans_low with 8+x bits, need to remove 24-x bits
|
| #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
|
| #else
|
| -#define WRAPLOW(x, bd) (x)
|
| +#define WRAPLOW(x, bd) ((int32_t)(x))
|
| #endif // CONFIG_EMULATE_HARDWARE
|
|
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| @@ -276,10 +276,10 @@
|
| static void iadst4(const tran_low_t *input, tran_low_t *output) {
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| - tran_high_t x0 = input[0];
|
| - tran_high_t x1 = input[1];
|
| - tran_high_t x2 = input[2];
|
| - tran_high_t x3 = input[3];
|
| + tran_low_t x0 = input[0];
|
| + tran_low_t x1 = input[1];
|
| + tran_low_t x2 = input[2];
|
| + tran_low_t x3 = input[3];
|
|
|
| if (!(x0 | x1 | x2 | x3)) {
|
| output[0] = output[1] = output[2] = output[3] = 0;
|
| @@ -295,24 +295,19 @@
|
| s6 = sinpi_4_9 * x3;
|
| s7 = x0 - x2 + x3;
|
|
|
| - x0 = s0 + s3 + s5;
|
| - x1 = s1 - s4 - s6;
|
| - x2 = sinpi_3_9 * s7;
|
| - x3 = s2;
|
| + s0 = s0 + s3 + s5;
|
| + s1 = s1 - s4 - s6;
|
| + s3 = s2;
|
| + s2 = sinpi_3_9 * s7;
|
|
|
| - s0 = x0 + x3;
|
| - s1 = x1 + x3;
|
| - s2 = x2;
|
| - s3 = x0 + x1 - x3;
|
| -
|
| // 1-D transform scaling factor is sqrt(2).
|
| // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
|
| // + 1b (addition) = 29b.
|
| // Hence the output bit depth is 15b.
|
| - output[0] = WRAPLOW(dct_const_round_shift(s0), 8);
|
| - output[1] = WRAPLOW(dct_const_round_shift(s1), 8);
|
| + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8);
|
| + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8);
|
| output[2] = WRAPLOW(dct_const_round_shift(s2), 8);
|
| - output[3] = WRAPLOW(dct_const_round_shift(s3), 8);
|
| + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8);
|
| }
|
|
|
| void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
| @@ -367,14 +362,14 @@
|
| }
|
|
|
| // stage 1
|
| - s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
|
| - s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
|
| - s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
|
| - s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
|
| - s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
|
| - s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
|
| - s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
| - s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
| + s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1);
|
| + s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1);
|
| + s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3);
|
| + s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3);
|
| + s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5);
|
| + s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5);
|
| + s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7);
|
| + s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7);
|
|
|
| x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8);
|
| x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8);
|
| @@ -386,14 +381,14 @@
|
| x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8);
|
|
|
| // stage 2
|
| - s0 = x0;
|
| - s1 = x1;
|
| - s2 = x2;
|
| - s3 = x3;
|
| - s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
|
| - s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
|
| - s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
| - s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
| + s0 = (int)x0;
|
| + s1 = (int)x1;
|
| + s2 = (int)x2;
|
| + s3 = (int)x3;
|
| + s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5);
|
| + s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5);
|
| + s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7);
|
| + s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7);
|
|
|
| x0 = WRAPLOW(s0 + s2, 8);
|
| x1 = WRAPLOW(s1 + s3, 8);
|
| @@ -405,10 +400,10 @@
|
| x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
|
|
|
| // stage 3
|
| - s2 = cospi_16_64 * (x2 + x3);
|
| - s3 = cospi_16_64 * (x2 - x3);
|
| - s6 = cospi_16_64 * (x6 + x7);
|
| - s7 = cospi_16_64 * (x6 - x7);
|
| + s2 = (int)(cospi_16_64 * (x2 + x3));
|
| + s3 = (int)(cospi_16_64 * (x2 - x3));
|
| + s6 = (int)(cospi_16_64 * (x6 + x7));
|
| + s7 = (int)(cospi_16_64 * (x6 - x7));
|
|
|
| x2 = WRAPLOW(dct_const_round_shift(s2), 8);
|
| x3 = WRAPLOW(dct_const_round_shift(s3), 8);
|
| @@ -1702,10 +1697,10 @@
|
| static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| - tran_high_t x0 = input[0];
|
| - tran_high_t x1 = input[1];
|
| - tran_high_t x2 = input[2];
|
| - tran_high_t x3 = input[3];
|
| + tran_low_t x0 = input[0];
|
| + tran_low_t x1 = input[1];
|
| + tran_low_t x2 = input[2];
|
| + tran_low_t x3 = input[3];
|
| (void) bd;
|
|
|
| if (!(x0 | x1 | x2 | x3)) {
|
| @@ -1720,26 +1715,21 @@
|
| s4 = sinpi_1_9 * x2;
|
| s5 = sinpi_2_9 * x3;
|
| s6 = sinpi_4_9 * x3;
|
| - s7 = x0 - x2 + x3;
|
| + s7 = (tran_high_t)(x0 - x2 + x3);
|
|
|
| - x0 = s0 + s3 + s5;
|
| - x1 = s1 - s4 - s6;
|
| - x2 = sinpi_3_9 * s7;
|
| - x3 = s2;
|
| + s0 = s0 + s3 + s5;
|
| + s1 = s1 - s4 - s6;
|
| + s3 = s2;
|
| + s2 = sinpi_3_9 * s7;
|
|
|
| - s0 = x0 + x3;
|
| - s1 = x1 + x3;
|
| - s2 = x2;
|
| - s3 = x0 + x1 - x3;
|
| -
|
| // 1-D transform scaling factor is sqrt(2).
|
| // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
|
| // + 1b (addition) = 29b.
|
| // Hence the output bit depth is 15b.
|
| - output[0] = WRAPLOW(dct_const_round_shift(s0), bd);
|
| - output[1] = WRAPLOW(dct_const_round_shift(s1), bd);
|
| + output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd);
|
| + output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd);
|
| output[2] = WRAPLOW(dct_const_round_shift(s2), bd);
|
| - output[3] = WRAPLOW(dct_const_round_shift(s3), bd);
|
| + output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd);
|
| }
|
|
|
| void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| @@ -1779,14 +1769,14 @@
|
| static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| - tran_high_t x0 = input[7];
|
| - tran_high_t x1 = input[0];
|
| - tran_high_t x2 = input[5];
|
| - tran_high_t x3 = input[2];
|
| - tran_high_t x4 = input[3];
|
| - tran_high_t x5 = input[4];
|
| - tran_high_t x6 = input[1];
|
| - tran_high_t x7 = input[6];
|
| + tran_low_t x0 = input[7];
|
| + tran_low_t x1 = input[0];
|
| + tran_low_t x2 = input[5];
|
| + tran_low_t x3 = input[2];
|
| + tran_low_t x4 = input[3];
|
| + tran_low_t x5 = input[4];
|
| + tran_low_t x6 = input[1];
|
| + tran_low_t x7 = input[6];
|
| (void) bd;
|
|
|
| if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) {
|
| @@ -2113,22 +2103,22 @@
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
| tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
|
|
| - tran_high_t x0 = input[15];
|
| - tran_high_t x1 = input[0];
|
| - tran_high_t x2 = input[13];
|
| - tran_high_t x3 = input[2];
|
| - tran_high_t x4 = input[11];
|
| - tran_high_t x5 = input[4];
|
| - tran_high_t x6 = input[9];
|
| - tran_high_t x7 = input[6];
|
| - tran_high_t x8 = input[7];
|
| - tran_high_t x9 = input[8];
|
| - tran_high_t x10 = input[5];
|
| - tran_high_t x11 = input[10];
|
| - tran_high_t x12 = input[3];
|
| - tran_high_t x13 = input[12];
|
| - tran_high_t x14 = input[1];
|
| - tran_high_t x15 = input[14];
|
| + tran_low_t x0 = input[15];
|
| + tran_low_t x1 = input[0];
|
| + tran_low_t x2 = input[13];
|
| + tran_low_t x3 = input[2];
|
| + tran_low_t x4 = input[11];
|
| + tran_low_t x5 = input[4];
|
| + tran_low_t x6 = input[9];
|
| + tran_low_t x7 = input[6];
|
| + tran_low_t x8 = input[7];
|
| + tran_low_t x9 = input[8];
|
| + tran_low_t x10 = input[5];
|
| + tran_low_t x11 = input[10];
|
| + tran_low_t x12 = input[3];
|
| + tran_low_t x13 = input[12];
|
| + tran_low_t x14 = input[1];
|
| + tran_low_t x15 = input[14];
|
| (void) bd;
|
|
|
| if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8
|
|
|