| Index: source/libvpx/vp9/common/vp9_idct.c
|
| ===================================================================
|
| --- source/libvpx/vp9/common/vp9_idct.c (revision 292608)
|
| +++ source/libvpx/vp9/common/vp9_idct.c (working copy)
|
| @@ -8,48 +8,47 @@
|
| * be found in the AUTHORS file in the root of the source tree.
|
| */
|
|
|
| -#include <assert.h>
|
| #include <math.h>
|
|
|
| -#include "./vpx_config.h"
|
| #include "./vp9_rtcd.h"
|
| #include "vp9/common/vp9_systemdependent.h"
|
| #include "vp9/common/vp9_blockd.h"
|
| -#include "vp9/common/vp9_common.h"
|
| #include "vp9/common/vp9_idct.h"
|
|
|
| -#if CONFIG_EMULATE_HARDWARE_HIGHBITDEPTH
|
| -// When CONFIG_EMULATE_HW_HIGHBITDEPTH is 1 the transform performs strict
|
| -// overflow wrapping to match expected hardware implementations.
|
| +#if CONFIG_EMULATE_HARDWARE
|
| +// When CONFIG_EMULATE_HARDWARE is 1 the transform performs a
|
| +// non-normative method to handle overflows. A stream that causes
|
| +// overflows in the inverse transform is considered invalid in VP9,
|
| +// and a hardware implementer is free to choose any reasonable
|
| +// method to handle overflows. However to aid in hardware
|
| +// verification they can use a specific implementation of the
|
| +// WRAPLOW() macro below that is identical to their intended
|
| +// hardware implementation (and also use configure options to trigger
|
| +// the C-implementation of the transform).
|
| +//
|
| +// The particular WRAPLOW implementation below performs strict
|
| +// overflow wrapping to match common hardware implementations.
|
| // bd of 8 uses trans_low with 16bits, need to remove 16bits
|
| // bd of 10 uses trans_low with 18bits, need to remove 14bits
|
| // bd of 12 uses trans_low with 20bits, need to remove 12bits
|
| // bd of x uses trans_low with 8+x bits, need to remove 24-x bits
|
| -#define WRAPLOW(x) ((((int32_t)x) << (24 - bd)) >> (24 - bd))
|
| +#define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd))
|
| #else
|
| -#define WRAPLOW(x) (x)
|
| -#endif // CONFIG_EMULATE_HARDWARE_HIGHBITDEPTH
|
| +#define WRAPLOW(x, bd) (x)
|
| +#endif // CONFIG_EMULATE_HARDWARE
|
|
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| -static INLINE tran_low_t clamp_high(tran_high_t value, tran_low_t low,
|
| - tran_low_t high) {
|
| - return value < low ? low : (value > high ? high : value);
|
| +static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans,
|
| + int bd) {
|
| + trans = WRAPLOW(trans, bd);
|
| + return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd);
|
| }
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| -static INLINE tran_low_t clip_pixel_bd_high(tran_high_t dest,
|
| - tran_high_t trans, int bd) {
|
| - trans = WRAPLOW(trans);
|
| - switch (bd) {
|
| - case 8:
|
| - default:
|
| - return clamp_high(WRAPLOW(dest + trans), 0, 255);
|
| - case 10:
|
| - return clamp_high(WRAPLOW(dest + trans), 0, 1023);
|
| - case 12:
|
| - return clamp_high(WRAPLOW(dest + trans), 0, 4095);
|
| - }
|
| +static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) {
|
| + trans = WRAPLOW(trans, 8);
|
| + return clip_pixel(WRAPLOW(dest + trans, 8));
|
| }
|
| -#endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|
| void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
|
| /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
|
| @@ -72,10 +71,10 @@
|
| c1 = e1 - c1;
|
| a1 -= b1;
|
| d1 += c1;
|
| - op[0] = a1;
|
| - op[1] = b1;
|
| - op[2] = c1;
|
| - op[3] = d1;
|
| + op[0] = WRAPLOW(a1, 8);
|
| + op[1] = WRAPLOW(b1, 8);
|
| + op[2] = WRAPLOW(c1, 8);
|
| + op[3] = WRAPLOW(d1, 8);
|
| ip += 4;
|
| op += 4;
|
| }
|
| @@ -93,10 +92,10 @@
|
| c1 = e1 - c1;
|
| a1 -= b1;
|
| d1 += c1;
|
| - dest[stride * 0] = clip_pixel(dest[stride * 0] + a1);
|
| - dest[stride * 1] = clip_pixel(dest[stride * 1] + b1);
|
| - dest[stride * 2] = clip_pixel(dest[stride * 2] + c1);
|
| - dest[stride * 3] = clip_pixel(dest[stride * 3] + d1);
|
| + dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1);
|
| + dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1);
|
| + dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1);
|
| + dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1);
|
|
|
| ip++;
|
| dest++;
|
| @@ -113,17 +112,17 @@
|
| a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
| e1 = a1 >> 1;
|
| a1 -= e1;
|
| - op[0] = a1;
|
| - op[1] = op[2] = op[3] = e1;
|
| + op[0] = WRAPLOW(a1, 8);
|
| + op[1] = op[2] = op[3] = WRAPLOW(e1, 8);
|
|
|
| ip = tmp;
|
| for (i = 0; i < 4; i++) {
|
| e1 = ip[0] >> 1;
|
| a1 = ip[0] - e1;
|
| - dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);
|
| - dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1);
|
| - dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1);
|
| - dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1);
|
| + dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1);
|
| + dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1);
|
| + dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1);
|
| + dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1);
|
| ip++;
|
| dest++;
|
| }
|
| @@ -135,18 +134,18 @@
|
| // stage 1
|
| temp1 = (input[0] + input[2]) * cospi_16_64;
|
| temp2 = (input[0] - input[2]) * cospi_16_64;
|
| - step[0] = dct_const_round_shift(temp1);
|
| - step[1] = dct_const_round_shift(temp2);
|
| + step[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
| temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
| - step[2] = dct_const_round_shift(temp1);
|
| - step[3] = dct_const_round_shift(temp2);
|
| + step[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| // stage 2
|
| - output[0] = step[0] + step[3];
|
| - output[1] = step[1] + step[2];
|
| - output[2] = step[1] - step[2];
|
| - output[3] = step[0] - step[3];
|
| + output[0] = WRAPLOW(step[0] + step[3], 8);
|
| + output[1] = WRAPLOW(step[1] + step[2], 8);
|
| + output[2] = WRAPLOW(step[1] - step[2], 8);
|
| + output[3] = WRAPLOW(step[0] - step[3], 8);
|
| }
|
|
|
| void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
|
| @@ -167,9 +166,10 @@
|
| for (j = 0; j < 4; ++j)
|
| temp_in[j] = out[j * 4 + i];
|
| idct4(temp_in, temp_out);
|
| - for (j = 0; j < 4; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 4; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 4));
|
| + }
|
| }
|
| }
|
|
|
| @@ -177,15 +177,15 @@
|
| int dest_stride) {
|
| int i;
|
| tran_high_t a1;
|
| - tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| - out = dct_const_round_shift(out * cospi_16_64);
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
|
| a1 = ROUND_POWER_OF_TWO(out, 4);
|
|
|
| for (i = 0; i < 4; i++) {
|
| - dest[0] = clip_pixel(dest[0] + a1);
|
| - dest[1] = clip_pixel(dest[1] + a1);
|
| - dest[2] = clip_pixel(dest[2] + a1);
|
| - dest[3] = clip_pixel(dest[3] + a1);
|
| + dest[0] = clip_pixel_add(dest[0], a1);
|
| + dest[1] = clip_pixel_add(dest[1], a1);
|
| + dest[2] = clip_pixel_add(dest[2], a1);
|
| + dest[3] = clip_pixel_add(dest[3], a1);
|
| dest += dest_stride;
|
| }
|
| }
|
| @@ -200,39 +200,39 @@
|
| step1[3] = input[6];
|
| temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
|
| temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
|
| - step1[4] = dct_const_round_shift(temp1);
|
| - step1[7] = dct_const_round_shift(temp2);
|
| + step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
|
| temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
|
| - step1[5] = dct_const_round_shift(temp1);
|
| - step1[6] = dct_const_round_shift(temp2);
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| // stage 2 & stage 3 - even half
|
| idct4(step1, step1);
|
|
|
| // stage 2 - odd half
|
| - step2[4] = step1[4] + step1[5];
|
| - step2[5] = step1[4] - step1[5];
|
| - step2[6] = -step1[6] + step1[7];
|
| - step2[7] = step1[6] + step1[7];
|
| + step2[4] = WRAPLOW(step1[4] + step1[5], 8);
|
| + step2[5] = WRAPLOW(step1[4] - step1[5], 8);
|
| + step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
|
| + step2[7] = WRAPLOW(step1[6] + step1[7], 8);
|
|
|
| // stage 3 -odd half
|
| step1[4] = step2[4];
|
| temp1 = (step2[6] - step2[5]) * cospi_16_64;
|
| temp2 = (step2[5] + step2[6]) * cospi_16_64;
|
| - step1[5] = dct_const_round_shift(temp1);
|
| - step1[6] = dct_const_round_shift(temp2);
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[7] = step2[7];
|
|
|
| // stage 4
|
| - output[0] = step1[0] + step1[7];
|
| - output[1] = step1[1] + step1[6];
|
| - output[2] = step1[2] + step1[5];
|
| - output[3] = step1[3] + step1[4];
|
| - output[4] = step1[3] - step1[4];
|
| - output[5] = step1[2] - step1[5];
|
| - output[6] = step1[1] - step1[6];
|
| - output[7] = step1[0] - step1[7];
|
| + output[0] = WRAPLOW(step1[0] + step1[7], 8);
|
| + output[1] = WRAPLOW(step1[1] + step1[6], 8);
|
| + output[2] = WRAPLOW(step1[2] + step1[5], 8);
|
| + output[3] = WRAPLOW(step1[3] + step1[4], 8);
|
| + output[4] = WRAPLOW(step1[3] - step1[4], 8);
|
| + output[5] = WRAPLOW(step1[2] - step1[5], 8);
|
| + output[6] = WRAPLOW(step1[1] - step1[6], 8);
|
| + output[7] = WRAPLOW(step1[0] - step1[7], 8);
|
| }
|
|
|
| void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
|
| @@ -253,9 +253,10 @@
|
| for (j = 0; j < 8; ++j)
|
| temp_in[j] = out[j * 8 + i];
|
| idct8(temp_in, temp_out);
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 8; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 5));
|
| + }
|
| }
|
| }
|
|
|
| @@ -262,12 +263,12 @@
|
| void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
|
| int i, j;
|
| tran_high_t a1;
|
| - tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| - out = dct_const_round_shift(out * cospi_16_64);
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
|
| a1 = ROUND_POWER_OF_TWO(out, 5);
|
| for (j = 0; j < 8; ++j) {
|
| for (i = 0; i < 8; ++i)
|
| - dest[i] = clip_pixel(dest[i] + a1);
|
| + dest[i] = clip_pixel_add(dest[i], a1);
|
| dest += stride;
|
| }
|
| }
|
| @@ -308,10 +309,10 @@
|
| // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
|
| // + 1b (addition) = 29b.
|
| // Hence the output bit depth is 15b.
|
| - output[0] = dct_const_round_shift(s0);
|
| - output[1] = dct_const_round_shift(s1);
|
| - output[2] = dct_const_round_shift(s2);
|
| - output[3] = dct_const_round_shift(s3);
|
| + output[0] = WRAPLOW(dct_const_round_shift(s0), 8);
|
| + output[1] = WRAPLOW(dct_const_round_shift(s1), 8);
|
| + output[2] = WRAPLOW(dct_const_round_shift(s2), 8);
|
| + output[3] = WRAPLOW(dct_const_round_shift(s3), 8);
|
| }
|
|
|
| void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
|
| @@ -340,11 +341,13 @@
|
| for (j = 0; j < 4; ++j)
|
| temp_in[j] = out[j * 4 + i];
|
| IHT_4[tx_type].cols(temp_in, temp_out);
|
| - for (j = 0; j < 4; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 4; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 4));
|
| + }
|
| }
|
| }
|
| +
|
| static void iadst8(const tran_low_t *input, tran_low_t *output) {
|
| int s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| @@ -373,14 +376,14 @@
|
| s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
| s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
|
|
| - x0 = dct_const_round_shift(s0 + s4);
|
| - x1 = dct_const_round_shift(s1 + s5);
|
| - x2 = dct_const_round_shift(s2 + s6);
|
| - x3 = dct_const_round_shift(s3 + s7);
|
| - x4 = dct_const_round_shift(s0 - s4);
|
| - x5 = dct_const_round_shift(s1 - s5);
|
| - x6 = dct_const_round_shift(s2 - s6);
|
| - x7 = dct_const_round_shift(s3 - s7);
|
| + x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8);
|
| + x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8);
|
| + x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8);
|
| + x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8);
|
| + x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8);
|
| + x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8);
|
| + x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8);
|
|
|
| // stage 2
|
| s0 = x0;
|
| @@ -392,14 +395,14 @@
|
| s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
| s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
|
|
| - x0 = s0 + s2;
|
| - x1 = s1 + s3;
|
| - x2 = s0 - s2;
|
| - x3 = s1 - s3;
|
| - x4 = dct_const_round_shift(s4 + s6);
|
| - x5 = dct_const_round_shift(s5 + s7);
|
| - x6 = dct_const_round_shift(s4 - s6);
|
| - x7 = dct_const_round_shift(s5 - s7);
|
| + x0 = WRAPLOW(s0 + s2, 8);
|
| + x1 = WRAPLOW(s1 + s3, 8);
|
| + x2 = WRAPLOW(s0 - s2, 8);
|
| + x3 = WRAPLOW(s1 - s3, 8);
|
| + x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8);
|
| + x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8);
|
| + x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8);
|
| + x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
|
|
|
| // stage 3
|
| s2 = cospi_16_64 * (x2 + x3);
|
| @@ -407,19 +410,19 @@
|
| s6 = cospi_16_64 * (x6 + x7);
|
| s7 = cospi_16_64 * (x6 - x7);
|
|
|
| - x2 = dct_const_round_shift(s2);
|
| - x3 = dct_const_round_shift(s3);
|
| - x6 = dct_const_round_shift(s6);
|
| - x7 = dct_const_round_shift(s7);
|
| + x2 = WRAPLOW(dct_const_round_shift(s2), 8);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3), 8);
|
| + x6 = WRAPLOW(dct_const_round_shift(s6), 8);
|
| + x7 = WRAPLOW(dct_const_round_shift(s7), 8);
|
|
|
| - output[0] = x0;
|
| - output[1] = -x4;
|
| - output[2] = x6;
|
| - output[3] = -x2;
|
| - output[4] = x3;
|
| - output[5] = -x7;
|
| - output[6] = x5;
|
| - output[7] = -x1;
|
| + output[0] = WRAPLOW(x0, 8);
|
| + output[1] = WRAPLOW(-x4, 8);
|
| + output[2] = WRAPLOW(x6, 8);
|
| + output[3] = WRAPLOW(-x2, 8);
|
| + output[4] = WRAPLOW(x3, 8);
|
| + output[5] = WRAPLOW(-x7, 8);
|
| + output[6] = WRAPLOW(x5, 8);
|
| + output[7] = WRAPLOW(-x1, 8);
|
| }
|
|
|
| static const transform_2d IHT_8[] = {
|
| @@ -449,9 +452,10 @@
|
| for (j = 0; j < 8; ++j)
|
| temp_in[j] = out[j * 8 + i];
|
| ht.cols(temp_in, temp_out);
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 8; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 5));
|
| + }
|
| }
|
| }
|
|
|
| @@ -474,9 +478,10 @@
|
| for (j = 0; j < 8; ++j)
|
| temp_in[j] = out[j * 8 + i];
|
| idct8(temp_in, temp_out);
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 8; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 5));
|
| + }
|
| }
|
| }
|
|
|
| @@ -514,23 +519,23 @@
|
|
|
| temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
|
| temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
|
| - step2[8] = dct_const_round_shift(temp1);
|
| - step2[15] = dct_const_round_shift(temp2);
|
| + step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
|
| temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
|
| - step2[9] = dct_const_round_shift(temp1);
|
| - step2[14] = dct_const_round_shift(temp2);
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
|
| temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
|
| - step2[10] = dct_const_round_shift(temp1);
|
| - step2[13] = dct_const_round_shift(temp2);
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
|
| temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
|
| - step2[11] = dct_const_round_shift(temp1);
|
| - step2[12] = dct_const_round_shift(temp2);
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| // stage 3
|
| step1[0] = step2[0];
|
| @@ -540,109 +545,109 @@
|
|
|
| temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
|
| temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
|
| - step1[4] = dct_const_round_shift(temp1);
|
| - step1[7] = dct_const_round_shift(temp2);
|
| + step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
|
| temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
|
| - step1[5] = dct_const_round_shift(temp1);
|
| - step1[6] = dct_const_round_shift(temp2);
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| - step1[8] = step2[8] + step2[9];
|
| - step1[9] = step2[8] - step2[9];
|
| - step1[10] = -step2[10] + step2[11];
|
| - step1[11] = step2[10] + step2[11];
|
| - step1[12] = step2[12] + step2[13];
|
| - step1[13] = step2[12] - step2[13];
|
| - step1[14] = -step2[14] + step2[15];
|
| - step1[15] = step2[14] + step2[15];
|
| + step1[8] = WRAPLOW(step2[8] + step2[9], 8);
|
| + step1[9] = WRAPLOW(step2[8] - step2[9], 8);
|
| + step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
|
| + step1[11] = WRAPLOW(step2[10] + step2[11], 8);
|
| + step1[12] = WRAPLOW(step2[12] + step2[13], 8);
|
| + step1[13] = WRAPLOW(step2[12] - step2[13], 8);
|
| + step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
|
| + step1[15] = WRAPLOW(step2[14] + step2[15], 8);
|
|
|
| // stage 4
|
| temp1 = (step1[0] + step1[1]) * cospi_16_64;
|
| temp2 = (step1[0] - step1[1]) * cospi_16_64;
|
| - step2[0] = dct_const_round_shift(temp1);
|
| - step2[1] = dct_const_round_shift(temp2);
|
| + step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
|
| temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
|
| - step2[2] = dct_const_round_shift(temp1);
|
| - step2[3] = dct_const_round_shift(temp2);
|
| - step2[4] = step1[4] + step1[5];
|
| - step2[5] = step1[4] - step1[5];
|
| - step2[6] = -step1[6] + step1[7];
|
| - step2[7] = step1[6] + step1[7];
|
| + step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| + step2[4] = WRAPLOW(step1[4] + step1[5], 8);
|
| + step2[5] = WRAPLOW(step1[4] - step1[5], 8);
|
| + step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
|
| + step2[7] = WRAPLOW(step1[6] + step1[7], 8);
|
|
|
| step2[8] = step1[8];
|
| step2[15] = step1[15];
|
| temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
|
| temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
|
| - step2[9] = dct_const_round_shift(temp1);
|
| - step2[14] = dct_const_round_shift(temp2);
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
|
| temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
|
| - step2[10] = dct_const_round_shift(temp1);
|
| - step2[13] = dct_const_round_shift(temp2);
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step2[11] = step1[11];
|
| step2[12] = step1[12];
|
|
|
| // stage 5
|
| - step1[0] = step2[0] + step2[3];
|
| - step1[1] = step2[1] + step2[2];
|
| - step1[2] = step2[1] - step2[2];
|
| - step1[3] = step2[0] - step2[3];
|
| + step1[0] = WRAPLOW(step2[0] + step2[3], 8);
|
| + step1[1] = WRAPLOW(step2[1] + step2[2], 8);
|
| + step1[2] = WRAPLOW(step2[1] - step2[2], 8);
|
| + step1[3] = WRAPLOW(step2[0] - step2[3], 8);
|
| step1[4] = step2[4];
|
| temp1 = (step2[6] - step2[5]) * cospi_16_64;
|
| temp2 = (step2[5] + step2[6]) * cospi_16_64;
|
| - step1[5] = dct_const_round_shift(temp1);
|
| - step1[6] = dct_const_round_shift(temp2);
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[7] = step2[7];
|
|
|
| - step1[8] = step2[8] + step2[11];
|
| - step1[9] = step2[9] + step2[10];
|
| - step1[10] = step2[9] - step2[10];
|
| - step1[11] = step2[8] - step2[11];
|
| - step1[12] = -step2[12] + step2[15];
|
| - step1[13] = -step2[13] + step2[14];
|
| - step1[14] = step2[13] + step2[14];
|
| - step1[15] = step2[12] + step2[15];
|
| + step1[8] = WRAPLOW(step2[8] + step2[11], 8);
|
| + step1[9] = WRAPLOW(step2[9] + step2[10], 8);
|
| + step1[10] = WRAPLOW(step2[9] - step2[10], 8);
|
| + step1[11] = WRAPLOW(step2[8] - step2[11], 8);
|
| + step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
|
| + step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
|
| + step1[14] = WRAPLOW(step2[13] + step2[14], 8);
|
| + step1[15] = WRAPLOW(step2[12] + step2[15], 8);
|
|
|
| // stage 6
|
| - step2[0] = step1[0] + step1[7];
|
| - step2[1] = step1[1] + step1[6];
|
| - step2[2] = step1[2] + step1[5];
|
| - step2[3] = step1[3] + step1[4];
|
| - step2[4] = step1[3] - step1[4];
|
| - step2[5] = step1[2] - step1[5];
|
| - step2[6] = step1[1] - step1[6];
|
| - step2[7] = step1[0] - step1[7];
|
| + step2[0] = WRAPLOW(step1[0] + step1[7], 8);
|
| + step2[1] = WRAPLOW(step1[1] + step1[6], 8);
|
| + step2[2] = WRAPLOW(step1[2] + step1[5], 8);
|
| + step2[3] = WRAPLOW(step1[3] + step1[4], 8);
|
| + step2[4] = WRAPLOW(step1[3] - step1[4], 8);
|
| + step2[5] = WRAPLOW(step1[2] - step1[5], 8);
|
| + step2[6] = WRAPLOW(step1[1] - step1[6], 8);
|
| + step2[7] = WRAPLOW(step1[0] - step1[7], 8);
|
| step2[8] = step1[8];
|
| step2[9] = step1[9];
|
| temp1 = (-step1[10] + step1[13]) * cospi_16_64;
|
| temp2 = (step1[10] + step1[13]) * cospi_16_64;
|
| - step2[10] = dct_const_round_shift(temp1);
|
| - step2[13] = dct_const_round_shift(temp2);
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = (-step1[11] + step1[12]) * cospi_16_64;
|
| temp2 = (step1[11] + step1[12]) * cospi_16_64;
|
| - step2[11] = dct_const_round_shift(temp1);
|
| - step2[12] = dct_const_round_shift(temp2);
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step2[14] = step1[14];
|
| step2[15] = step1[15];
|
|
|
| // stage 7
|
| - output[0] = step2[0] + step2[15];
|
| - output[1] = step2[1] + step2[14];
|
| - output[2] = step2[2] + step2[13];
|
| - output[3] = step2[3] + step2[12];
|
| - output[4] = step2[4] + step2[11];
|
| - output[5] = step2[5] + step2[10];
|
| - output[6] = step2[6] + step2[9];
|
| - output[7] = step2[7] + step2[8];
|
| - output[8] = step2[7] - step2[8];
|
| - output[9] = step2[6] - step2[9];
|
| - output[10] = step2[5] - step2[10];
|
| - output[11] = step2[4] - step2[11];
|
| - output[12] = step2[3] - step2[12];
|
| - output[13] = step2[2] - step2[13];
|
| - output[14] = step2[1] - step2[14];
|
| - output[15] = step2[0] - step2[15];
|
| + output[0] = WRAPLOW(step2[0] + step2[15], 8);
|
| + output[1] = WRAPLOW(step2[1] + step2[14], 8);
|
| + output[2] = WRAPLOW(step2[2] + step2[13], 8);
|
| + output[3] = WRAPLOW(step2[3] + step2[12], 8);
|
| + output[4] = WRAPLOW(step2[4] + step2[11], 8);
|
| + output[5] = WRAPLOW(step2[5] + step2[10], 8);
|
| + output[6] = WRAPLOW(step2[6] + step2[9], 8);
|
| + output[7] = WRAPLOW(step2[7] + step2[8], 8);
|
| + output[8] = WRAPLOW(step2[7] - step2[8], 8);
|
| + output[9] = WRAPLOW(step2[6] - step2[9], 8);
|
| + output[10] = WRAPLOW(step2[5] - step2[10], 8);
|
| + output[11] = WRAPLOW(step2[4] - step2[11], 8);
|
| + output[12] = WRAPLOW(step2[3] - step2[12], 8);
|
| + output[13] = WRAPLOW(step2[2] - step2[13], 8);
|
| + output[14] = WRAPLOW(step2[1] - step2[14], 8);
|
| + output[15] = WRAPLOW(step2[0] - step2[15], 8);
|
| }
|
|
|
| void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest,
|
| @@ -664,9 +669,10 @@
|
| for (j = 0; j < 16; ++j)
|
| temp_in[j] = out[j * 16 + i];
|
| idct16(temp_in, temp_out);
|
| - for (j = 0; j < 16; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 16; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 6));
|
| + }
|
| }
|
| }
|
|
|
| @@ -718,22 +724,22 @@
|
| s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
|
| s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
|
|
|
| - x0 = dct_const_round_shift(s0 + s8);
|
| - x1 = dct_const_round_shift(s1 + s9);
|
| - x2 = dct_const_round_shift(s2 + s10);
|
| - x3 = dct_const_round_shift(s3 + s11);
|
| - x4 = dct_const_round_shift(s4 + s12);
|
| - x5 = dct_const_round_shift(s5 + s13);
|
| - x6 = dct_const_round_shift(s6 + s14);
|
| - x7 = dct_const_round_shift(s7 + s15);
|
| - x8 = dct_const_round_shift(s0 - s8);
|
| - x9 = dct_const_round_shift(s1 - s9);
|
| - x10 = dct_const_round_shift(s2 - s10);
|
| - x11 = dct_const_round_shift(s3 - s11);
|
| - x12 = dct_const_round_shift(s4 - s12);
|
| - x13 = dct_const_round_shift(s5 - s13);
|
| - x14 = dct_const_round_shift(s6 - s14);
|
| - x15 = dct_const_round_shift(s7 - s15);
|
| + x0 = WRAPLOW(dct_const_round_shift(s0 + s8), 8);
|
| + x1 = WRAPLOW(dct_const_round_shift(s1 + s9), 8);
|
| + x2 = WRAPLOW(dct_const_round_shift(s2 + s10), 8);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3 + s11), 8);
|
| + x4 = WRAPLOW(dct_const_round_shift(s4 + s12), 8);
|
| + x5 = WRAPLOW(dct_const_round_shift(s5 + s13), 8);
|
| + x6 = WRAPLOW(dct_const_round_shift(s6 + s14), 8);
|
| + x7 = WRAPLOW(dct_const_round_shift(s7 + s15), 8);
|
| + x8 = WRAPLOW(dct_const_round_shift(s0 - s8), 8);
|
| + x9 = WRAPLOW(dct_const_round_shift(s1 - s9), 8);
|
| + x10 = WRAPLOW(dct_const_round_shift(s2 - s10), 8);
|
| + x11 = WRAPLOW(dct_const_round_shift(s3 - s11), 8);
|
| + x12 = WRAPLOW(dct_const_round_shift(s4 - s12), 8);
|
| + x13 = WRAPLOW(dct_const_round_shift(s5 - s13), 8);
|
| + x14 = WRAPLOW(dct_const_round_shift(s6 - s14), 8);
|
| + x15 = WRAPLOW(dct_const_round_shift(s7 - s15), 8);
|
|
|
| // stage 2
|
| s0 = x0;
|
| @@ -753,22 +759,22 @@
|
| s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
|
| s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
|
|
|
| - x0 = s0 + s4;
|
| - x1 = s1 + s5;
|
| - x2 = s2 + s6;
|
| - x3 = s3 + s7;
|
| - x4 = s0 - s4;
|
| - x5 = s1 - s5;
|
| - x6 = s2 - s6;
|
| - x7 = s3 - s7;
|
| - x8 = dct_const_round_shift(s8 + s12);
|
| - x9 = dct_const_round_shift(s9 + s13);
|
| - x10 = dct_const_round_shift(s10 + s14);
|
| - x11 = dct_const_round_shift(s11 + s15);
|
| - x12 = dct_const_round_shift(s8 - s12);
|
| - x13 = dct_const_round_shift(s9 - s13);
|
| - x14 = dct_const_round_shift(s10 - s14);
|
| - x15 = dct_const_round_shift(s11 - s15);
|
| + x0 = WRAPLOW(s0 + s4, 8);
|
| + x1 = WRAPLOW(s1 + s5, 8);
|
| + x2 = WRAPLOW(s2 + s6, 8);
|
| + x3 = WRAPLOW(s3 + s7, 8);
|
| + x4 = WRAPLOW(s0 - s4, 8);
|
| + x5 = WRAPLOW(s1 - s5, 8);
|
| + x6 = WRAPLOW(s2 - s6, 8);
|
| + x7 = WRAPLOW(s3 - s7, 8);
|
| + x8 = WRAPLOW(dct_const_round_shift(s8 + s12), 8);
|
| + x9 = WRAPLOW(dct_const_round_shift(s9 + s13), 8);
|
| + x10 = WRAPLOW(dct_const_round_shift(s10 + s14), 8);
|
| + x11 = WRAPLOW(dct_const_round_shift(s11 + s15), 8);
|
| + x12 = WRAPLOW(dct_const_round_shift(s8 - s12), 8);
|
| + x13 = WRAPLOW(dct_const_round_shift(s9 - s13), 8);
|
| + x14 = WRAPLOW(dct_const_round_shift(s10 - s14), 8);
|
| + x15 = WRAPLOW(dct_const_round_shift(s11 - s15), 8);
|
|
|
| // stage 3
|
| s0 = x0;
|
| @@ -788,22 +794,22 @@
|
| s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
|
| s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
|
|
|
| - x0 = s0 + s2;
|
| - x1 = s1 + s3;
|
| - x2 = s0 - s2;
|
| - x3 = s1 - s3;
|
| - x4 = dct_const_round_shift(s4 + s6);
|
| - x5 = dct_const_round_shift(s5 + s7);
|
| - x6 = dct_const_round_shift(s4 - s6);
|
| - x7 = dct_const_round_shift(s5 - s7);
|
| - x8 = s8 + s10;
|
| - x9 = s9 + s11;
|
| - x10 = s8 - s10;
|
| - x11 = s9 - s11;
|
| - x12 = dct_const_round_shift(s12 + s14);
|
| - x13 = dct_const_round_shift(s13 + s15);
|
| - x14 = dct_const_round_shift(s12 - s14);
|
| - x15 = dct_const_round_shift(s13 - s15);
|
| + x0 = WRAPLOW(check_range(s0 + s2), 8);
|
| + x1 = WRAPLOW(check_range(s1 + s3), 8);
|
| + x2 = WRAPLOW(check_range(s0 - s2), 8);
|
| + x3 = WRAPLOW(check_range(s1 - s3), 8);
|
| + x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8);
|
| + x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8);
|
| + x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8);
|
| + x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8);
|
| + x8 = WRAPLOW(check_range(s8 + s10), 8);
|
| + x9 = WRAPLOW(check_range(s9 + s11), 8);
|
| + x10 = WRAPLOW(check_range(s8 - s10), 8);
|
| + x11 = WRAPLOW(check_range(s9 - s11), 8);
|
| + x12 = WRAPLOW(dct_const_round_shift(s12 + s14), 8);
|
| + x13 = WRAPLOW(dct_const_round_shift(s13 + s15), 8);
|
| + x14 = WRAPLOW(dct_const_round_shift(s12 - s14), 8);
|
| + x15 = WRAPLOW(dct_const_round_shift(s13 - s15), 8);
|
|
|
| // stage 4
|
| s2 = (- cospi_16_64) * (x2 + x3);
|
| @@ -815,31 +821,31 @@
|
| s14 = (- cospi_16_64) * (x14 + x15);
|
| s15 = cospi_16_64 * (x14 - x15);
|
|
|
| - x2 = dct_const_round_shift(s2);
|
| - x3 = dct_const_round_shift(s3);
|
| - x6 = dct_const_round_shift(s6);
|
| - x7 = dct_const_round_shift(s7);
|
| - x10 = dct_const_round_shift(s10);
|
| - x11 = dct_const_round_shift(s11);
|
| - x14 = dct_const_round_shift(s14);
|
| - x15 = dct_const_round_shift(s15);
|
| + x2 = WRAPLOW(dct_const_round_shift(s2), 8);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3), 8);
|
| + x6 = WRAPLOW(dct_const_round_shift(s6), 8);
|
| + x7 = WRAPLOW(dct_const_round_shift(s7), 8);
|
| + x10 = WRAPLOW(dct_const_round_shift(s10), 8);
|
| + x11 = WRAPLOW(dct_const_round_shift(s11), 8);
|
| + x14 = WRAPLOW(dct_const_round_shift(s14), 8);
|
| + x15 = WRAPLOW(dct_const_round_shift(s15), 8);
|
|
|
| - output[0] = x0;
|
| - output[1] = -x8;
|
| - output[2] = x12;
|
| - output[3] = -x4;
|
| - output[4] = x6;
|
| - output[5] = x14;
|
| - output[6] = x10;
|
| - output[7] = x2;
|
| - output[8] = x3;
|
| - output[9] = x11;
|
| - output[10] = x15;
|
| - output[11] = x7;
|
| - output[12] = x5;
|
| - output[13] = -x13;
|
| - output[14] = x9;
|
| - output[15] = -x1;
|
| + output[0] = WRAPLOW(x0, 8);
|
| + output[1] = WRAPLOW(-x8, 8);
|
| + output[2] = WRAPLOW(x12, 8);
|
| + output[3] = WRAPLOW(-x4, 8);
|
| + output[4] = WRAPLOW(x6, 8);
|
| + output[5] = WRAPLOW(x14, 8);
|
| + output[6] = WRAPLOW(x10, 8);
|
| + output[7] = WRAPLOW(x2, 8);
|
| + output[8] = WRAPLOW(x3, 8);
|
| + output[9] = WRAPLOW(x11, 8);
|
| + output[10] = WRAPLOW(x15, 8);
|
| + output[11] = WRAPLOW(x7, 8);
|
| + output[12] = WRAPLOW(x5, 8);
|
| + output[13] = WRAPLOW(-x13, 8);
|
| + output[14] = WRAPLOW(x9, 8);
|
| + output[15] = WRAPLOW(-x1, 8);
|
| }
|
|
|
| static const transform_2d IHT_16[] = {
|
| @@ -869,9 +875,10 @@
|
| for (j = 0; j < 16; ++j)
|
| temp_in[j] = out[j * 16 + i];
|
| ht.cols(temp_in, temp_out);
|
| - for (j = 0; j < 16; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 16; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 6));
|
| + }
|
| }
|
| }
|
|
|
| @@ -895,9 +902,10 @@
|
| for (j = 0; j < 16; ++j)
|
| temp_in[j] = out[j*16 + i];
|
| idct16(temp_in, temp_out);
|
| - for (j = 0; j < 16; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 16; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 6));
|
| + }
|
| }
|
| }
|
|
|
| @@ -904,12 +912,12 @@
|
| void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) {
|
| int i, j;
|
| tran_high_t a1;
|
| - tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| - out = dct_const_round_shift(out * cospi_16_64);
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
|
| a1 = ROUND_POWER_OF_TWO(out, 6);
|
| for (j = 0; j < 16; ++j) {
|
| for (i = 0; i < 16; ++i)
|
| - dest[i] = clip_pixel(dest[i] + a1);
|
| + dest[i] = clip_pixel_add(dest[i], a1);
|
| dest += stride;
|
| }
|
| }
|
| @@ -938,43 +946,43 @@
|
|
|
| temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
|
| temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
|
| - step1[16] = dct_const_round_shift(temp1);
|
| - step1[31] = dct_const_round_shift(temp2);
|
| + step1[16] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[31] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
|
| temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
|
| - step1[17] = dct_const_round_shift(temp1);
|
| - step1[30] = dct_const_round_shift(temp2);
|
| + step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
|
| temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
|
| - step1[18] = dct_const_round_shift(temp1);
|
| - step1[29] = dct_const_round_shift(temp2);
|
| + step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
|
| temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
|
| - step1[19] = dct_const_round_shift(temp1);
|
| - step1[28] = dct_const_round_shift(temp2);
|
| + step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
|
| temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
|
| - step1[20] = dct_const_round_shift(temp1);
|
| - step1[27] = dct_const_round_shift(temp2);
|
| + step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
|
| temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
|
| - step1[21] = dct_const_round_shift(temp1);
|
| - step1[26] = dct_const_round_shift(temp2);
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
|
| temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
|
| - step1[22] = dct_const_round_shift(temp1);
|
| - step1[25] = dct_const_round_shift(temp2);
|
| + step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
|
| temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
|
| - step1[23] = dct_const_round_shift(temp1);
|
| - step1[24] = dct_const_round_shift(temp2);
|
| + step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| // stage 2
|
| step2[0] = step1[0];
|
| @@ -988,40 +996,40 @@
|
|
|
| temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
|
| temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
|
| - step2[8] = dct_const_round_shift(temp1);
|
| - step2[15] = dct_const_round_shift(temp2);
|
| + step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
|
| temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
|
| - step2[9] = dct_const_round_shift(temp1);
|
| - step2[14] = dct_const_round_shift(temp2);
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
|
| temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
|
| - step2[10] = dct_const_round_shift(temp1);
|
| - step2[13] = dct_const_round_shift(temp2);
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
|
| temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
|
| - step2[11] = dct_const_round_shift(temp1);
|
| - step2[12] = dct_const_round_shift(temp2);
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| - step2[16] = step1[16] + step1[17];
|
| - step2[17] = step1[16] - step1[17];
|
| - step2[18] = -step1[18] + step1[19];
|
| - step2[19] = step1[18] + step1[19];
|
| - step2[20] = step1[20] + step1[21];
|
| - step2[21] = step1[20] - step1[21];
|
| - step2[22] = -step1[22] + step1[23];
|
| - step2[23] = step1[22] + step1[23];
|
| - step2[24] = step1[24] + step1[25];
|
| - step2[25] = step1[24] - step1[25];
|
| - step2[26] = -step1[26] + step1[27];
|
| - step2[27] = step1[26] + step1[27];
|
| - step2[28] = step1[28] + step1[29];
|
| - step2[29] = step1[28] - step1[29];
|
| - step2[30] = -step1[30] + step1[31];
|
| - step2[31] = step1[30] + step1[31];
|
| + step2[16] = WRAPLOW(step1[16] + step1[17], 8);
|
| + step2[17] = WRAPLOW(step1[16] - step1[17], 8);
|
| + step2[18] = WRAPLOW(-step1[18] + step1[19], 8);
|
| + step2[19] = WRAPLOW(step1[18] + step1[19], 8);
|
| + step2[20] = WRAPLOW(step1[20] + step1[21], 8);
|
| + step2[21] = WRAPLOW(step1[20] - step1[21], 8);
|
| + step2[22] = WRAPLOW(-step1[22] + step1[23], 8);
|
| + step2[23] = WRAPLOW(step1[22] + step1[23], 8);
|
| + step2[24] = WRAPLOW(step1[24] + step1[25], 8);
|
| + step2[25] = WRAPLOW(step1[24] - step1[25], 8);
|
| + step2[26] = WRAPLOW(-step1[26] + step1[27], 8);
|
| + step2[27] = WRAPLOW(step1[26] + step1[27], 8);
|
| + step2[28] = WRAPLOW(step1[28] + step1[29], 8);
|
| + step2[29] = WRAPLOW(step1[28] - step1[29], 8);
|
| + step2[30] = WRAPLOW(-step1[30] + step1[31], 8);
|
| + step2[31] = WRAPLOW(step1[30] + step1[31], 8);
|
|
|
| // stage 3
|
| step1[0] = step2[0];
|
| @@ -1031,42 +1039,42 @@
|
|
|
| temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
|
| temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
|
| - step1[4] = dct_const_round_shift(temp1);
|
| - step1[7] = dct_const_round_shift(temp2);
|
| + step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
|
| temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
|
| - step1[5] = dct_const_round_shift(temp1);
|
| - step1[6] = dct_const_round_shift(temp2);
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
|
|
| - step1[8] = step2[8] + step2[9];
|
| - step1[9] = step2[8] - step2[9];
|
| - step1[10] = -step2[10] + step2[11];
|
| - step1[11] = step2[10] + step2[11];
|
| - step1[12] = step2[12] + step2[13];
|
| - step1[13] = step2[12] - step2[13];
|
| - step1[14] = -step2[14] + step2[15];
|
| - step1[15] = step2[14] + step2[15];
|
| + step1[8] = WRAPLOW(step2[8] + step2[9], 8);
|
| + step1[9] = WRAPLOW(step2[8] - step2[9], 8);
|
| + step1[10] = WRAPLOW(-step2[10] + step2[11], 8);
|
| + step1[11] = WRAPLOW(step2[10] + step2[11], 8);
|
| + step1[12] = WRAPLOW(step2[12] + step2[13], 8);
|
| + step1[13] = WRAPLOW(step2[12] - step2[13], 8);
|
| + step1[14] = WRAPLOW(-step2[14] + step2[15], 8);
|
| + step1[15] = WRAPLOW(step2[14] + step2[15], 8);
|
|
|
| step1[16] = step2[16];
|
| step1[31] = step2[31];
|
| temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
|
| temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
|
| - step1[17] = dct_const_round_shift(temp1);
|
| - step1[30] = dct_const_round_shift(temp2);
|
| + step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
|
| temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
|
| - step1[18] = dct_const_round_shift(temp1);
|
| - step1[29] = dct_const_round_shift(temp2);
|
| + step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[19] = step2[19];
|
| step1[20] = step2[20];
|
| temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
|
| temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
|
| - step1[21] = dct_const_round_shift(temp1);
|
| - step1[26] = dct_const_round_shift(temp2);
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
|
| temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
|
| - step1[22] = dct_const_round_shift(temp1);
|
| - step1[25] = dct_const_round_shift(temp2);
|
| + step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[23] = step2[23];
|
| step1[24] = step2[24];
|
| step1[27] = step2[27];
|
| @@ -1075,87 +1083,87 @@
|
| // stage 4
|
| temp1 = (step1[0] + step1[1]) * cospi_16_64;
|
| temp2 = (step1[0] - step1[1]) * cospi_16_64;
|
| - step2[0] = dct_const_round_shift(temp1);
|
| - step2[1] = dct_const_round_shift(temp2);
|
| + step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
|
| temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
|
| - step2[2] = dct_const_round_shift(temp1);
|
| - step2[3] = dct_const_round_shift(temp2);
|
| - step2[4] = step1[4] + step1[5];
|
| - step2[5] = step1[4] - step1[5];
|
| - step2[6] = -step1[6] + step1[7];
|
| - step2[7] = step1[6] + step1[7];
|
| + step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| + step2[4] = WRAPLOW(step1[4] + step1[5], 8);
|
| + step2[5] = WRAPLOW(step1[4] - step1[5], 8);
|
| + step2[6] = WRAPLOW(-step1[6] + step1[7], 8);
|
| + step2[7] = WRAPLOW(step1[6] + step1[7], 8);
|
|
|
| step2[8] = step1[8];
|
| step2[15] = step1[15];
|
| temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
|
| temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
|
| - step2[9] = dct_const_round_shift(temp1);
|
| - step2[14] = dct_const_round_shift(temp2);
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
|
| temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
|
| - step2[10] = dct_const_round_shift(temp1);
|
| - step2[13] = dct_const_round_shift(temp2);
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step2[11] = step1[11];
|
| step2[12] = step1[12];
|
|
|
| - step2[16] = step1[16] + step1[19];
|
| - step2[17] = step1[17] + step1[18];
|
| - step2[18] = step1[17] - step1[18];
|
| - step2[19] = step1[16] - step1[19];
|
| - step2[20] = -step1[20] + step1[23];
|
| - step2[21] = -step1[21] + step1[22];
|
| - step2[22] = step1[21] + step1[22];
|
| - step2[23] = step1[20] + step1[23];
|
| + step2[16] = WRAPLOW(step1[16] + step1[19], 8);
|
| + step2[17] = WRAPLOW(step1[17] + step1[18], 8);
|
| + step2[18] = WRAPLOW(step1[17] - step1[18], 8);
|
| + step2[19] = WRAPLOW(step1[16] - step1[19], 8);
|
| + step2[20] = WRAPLOW(-step1[20] + step1[23], 8);
|
| + step2[21] = WRAPLOW(-step1[21] + step1[22], 8);
|
| + step2[22] = WRAPLOW(step1[21] + step1[22], 8);
|
| + step2[23] = WRAPLOW(step1[20] + step1[23], 8);
|
|
|
| - step2[24] = step1[24] + step1[27];
|
| - step2[25] = step1[25] + step1[26];
|
| - step2[26] = step1[25] - step1[26];
|
| - step2[27] = step1[24] - step1[27];
|
| - step2[28] = -step1[28] + step1[31];
|
| - step2[29] = -step1[29] + step1[30];
|
| - step2[30] = step1[29] + step1[30];
|
| - step2[31] = step1[28] + step1[31];
|
| + step2[24] = WRAPLOW(step1[24] + step1[27], 8);
|
| + step2[25] = WRAPLOW(step1[25] + step1[26], 8);
|
| + step2[26] = WRAPLOW(step1[25] - step1[26], 8);
|
| + step2[27] = WRAPLOW(step1[24] - step1[27], 8);
|
| + step2[28] = WRAPLOW(-step1[28] + step1[31], 8);
|
| + step2[29] = WRAPLOW(-step1[29] + step1[30], 8);
|
| + step2[30] = WRAPLOW(step1[29] + step1[30], 8);
|
| + step2[31] = WRAPLOW(step1[28] + step1[31], 8);
|
|
|
| // stage 5
|
| - step1[0] = step2[0] + step2[3];
|
| - step1[1] = step2[1] + step2[2];
|
| - step1[2] = step2[1] - step2[2];
|
| - step1[3] = step2[0] - step2[3];
|
| + step1[0] = WRAPLOW(step2[0] + step2[3], 8);
|
| + step1[1] = WRAPLOW(step2[1] + step2[2], 8);
|
| + step1[2] = WRAPLOW(step2[1] - step2[2], 8);
|
| + step1[3] = WRAPLOW(step2[0] - step2[3], 8);
|
| step1[4] = step2[4];
|
| temp1 = (step2[6] - step2[5]) * cospi_16_64;
|
| temp2 = (step2[5] + step2[6]) * cospi_16_64;
|
| - step1[5] = dct_const_round_shift(temp1);
|
| - step1[6] = dct_const_round_shift(temp2);
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[7] = step2[7];
|
|
|
| - step1[8] = step2[8] + step2[11];
|
| - step1[9] = step2[9] + step2[10];
|
| - step1[10] = step2[9] - step2[10];
|
| - step1[11] = step2[8] - step2[11];
|
| - step1[12] = -step2[12] + step2[15];
|
| - step1[13] = -step2[13] + step2[14];
|
| - step1[14] = step2[13] + step2[14];
|
| - step1[15] = step2[12] + step2[15];
|
| + step1[8] = WRAPLOW(step2[8] + step2[11], 8);
|
| + step1[9] = WRAPLOW(step2[9] + step2[10], 8);
|
| + step1[10] = WRAPLOW(step2[9] - step2[10], 8);
|
| + step1[11] = WRAPLOW(step2[8] - step2[11], 8);
|
| + step1[12] = WRAPLOW(-step2[12] + step2[15], 8);
|
| + step1[13] = WRAPLOW(-step2[13] + step2[14], 8);
|
| + step1[14] = WRAPLOW(step2[13] + step2[14], 8);
|
| + step1[15] = WRAPLOW(step2[12] + step2[15], 8);
|
|
|
| step1[16] = step2[16];
|
| step1[17] = step2[17];
|
| temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
|
| temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
|
| - step1[18] = dct_const_round_shift(temp1);
|
| - step1[29] = dct_const_round_shift(temp2);
|
| + step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
|
| temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
|
| - step1[19] = dct_const_round_shift(temp1);
|
| - step1[28] = dct_const_round_shift(temp2);
|
| + step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
|
| temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
|
| - step1[20] = dct_const_round_shift(temp1);
|
| - step1[27] = dct_const_round_shift(temp2);
|
| + step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
|
| temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
|
| - step1[21] = dct_const_round_shift(temp1);
|
| - step1[26] = dct_const_round_shift(temp2);
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[22] = step2[22];
|
| step1[23] = step2[23];
|
| step1[24] = step2[24];
|
| @@ -1164,62 +1172,62 @@
|
| step1[31] = step2[31];
|
|
|
| // stage 6
|
| - step2[0] = step1[0] + step1[7];
|
| - step2[1] = step1[1] + step1[6];
|
| - step2[2] = step1[2] + step1[5];
|
| - step2[3] = step1[3] + step1[4];
|
| - step2[4] = step1[3] - step1[4];
|
| - step2[5] = step1[2] - step1[5];
|
| - step2[6] = step1[1] - step1[6];
|
| - step2[7] = step1[0] - step1[7];
|
| + step2[0] = WRAPLOW(step1[0] + step1[7], 8);
|
| + step2[1] = WRAPLOW(step1[1] + step1[6], 8);
|
| + step2[2] = WRAPLOW(step1[2] + step1[5], 8);
|
| + step2[3] = WRAPLOW(step1[3] + step1[4], 8);
|
| + step2[4] = WRAPLOW(step1[3] - step1[4], 8);
|
| + step2[5] = WRAPLOW(step1[2] - step1[5], 8);
|
| + step2[6] = WRAPLOW(step1[1] - step1[6], 8);
|
| + step2[7] = WRAPLOW(step1[0] - step1[7], 8);
|
| step2[8] = step1[8];
|
| step2[9] = step1[9];
|
| temp1 = (-step1[10] + step1[13]) * cospi_16_64;
|
| temp2 = (step1[10] + step1[13]) * cospi_16_64;
|
| - step2[10] = dct_const_round_shift(temp1);
|
| - step2[13] = dct_const_round_shift(temp2);
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = (-step1[11] + step1[12]) * cospi_16_64;
|
| temp2 = (step1[11] + step1[12]) * cospi_16_64;
|
| - step2[11] = dct_const_round_shift(temp1);
|
| - step2[12] = dct_const_round_shift(temp2);
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step2[14] = step1[14];
|
| step2[15] = step1[15];
|
|
|
| - step2[16] = step1[16] + step1[23];
|
| - step2[17] = step1[17] + step1[22];
|
| - step2[18] = step1[18] + step1[21];
|
| - step2[19] = step1[19] + step1[20];
|
| - step2[20] = step1[19] - step1[20];
|
| - step2[21] = step1[18] - step1[21];
|
| - step2[22] = step1[17] - step1[22];
|
| - step2[23] = step1[16] - step1[23];
|
| + step2[16] = WRAPLOW(step1[16] + step1[23], 8);
|
| + step2[17] = WRAPLOW(step1[17] + step1[22], 8);
|
| + step2[18] = WRAPLOW(step1[18] + step1[21], 8);
|
| + step2[19] = WRAPLOW(step1[19] + step1[20], 8);
|
| + step2[20] = WRAPLOW(step1[19] - step1[20], 8);
|
| + step2[21] = WRAPLOW(step1[18] - step1[21], 8);
|
| + step2[22] = WRAPLOW(step1[17] - step1[22], 8);
|
| + step2[23] = WRAPLOW(step1[16] - step1[23], 8);
|
|
|
| - step2[24] = -step1[24] + step1[31];
|
| - step2[25] = -step1[25] + step1[30];
|
| - step2[26] = -step1[26] + step1[29];
|
| - step2[27] = -step1[27] + step1[28];
|
| - step2[28] = step1[27] + step1[28];
|
| - step2[29] = step1[26] + step1[29];
|
| - step2[30] = step1[25] + step1[30];
|
| - step2[31] = step1[24] + step1[31];
|
| + step2[24] = WRAPLOW(-step1[24] + step1[31], 8);
|
| + step2[25] = WRAPLOW(-step1[25] + step1[30], 8);
|
| + step2[26] = WRAPLOW(-step1[26] + step1[29], 8);
|
| + step2[27] = WRAPLOW(-step1[27] + step1[28], 8);
|
| + step2[28] = WRAPLOW(step1[27] + step1[28], 8);
|
| + step2[29] = WRAPLOW(step1[26] + step1[29], 8);
|
| + step2[30] = WRAPLOW(step1[25] + step1[30], 8);
|
| + step2[31] = WRAPLOW(step1[24] + step1[31], 8);
|
|
|
| // stage 7
|
| - step1[0] = step2[0] + step2[15];
|
| - step1[1] = step2[1] + step2[14];
|
| - step1[2] = step2[2] + step2[13];
|
| - step1[3] = step2[3] + step2[12];
|
| - step1[4] = step2[4] + step2[11];
|
| - step1[5] = step2[5] + step2[10];
|
| - step1[6] = step2[6] + step2[9];
|
| - step1[7] = step2[7] + step2[8];
|
| - step1[8] = step2[7] - step2[8];
|
| - step1[9] = step2[6] - step2[9];
|
| - step1[10] = step2[5] - step2[10];
|
| - step1[11] = step2[4] - step2[11];
|
| - step1[12] = step2[3] - step2[12];
|
| - step1[13] = step2[2] - step2[13];
|
| - step1[14] = step2[1] - step2[14];
|
| - step1[15] = step2[0] - step2[15];
|
| + step1[0] = WRAPLOW(step2[0] + step2[15], 8);
|
| + step1[1] = WRAPLOW(step2[1] + step2[14], 8);
|
| + step1[2] = WRAPLOW(step2[2] + step2[13], 8);
|
| + step1[3] = WRAPLOW(step2[3] + step2[12], 8);
|
| + step1[4] = WRAPLOW(step2[4] + step2[11], 8);
|
| + step1[5] = WRAPLOW(step2[5] + step2[10], 8);
|
| + step1[6] = WRAPLOW(step2[6] + step2[9], 8);
|
| + step1[7] = WRAPLOW(step2[7] + step2[8], 8);
|
| + step1[8] = WRAPLOW(step2[7] - step2[8], 8);
|
| + step1[9] = WRAPLOW(step2[6] - step2[9], 8);
|
| + step1[10] = WRAPLOW(step2[5] - step2[10], 8);
|
| + step1[11] = WRAPLOW(step2[4] - step2[11], 8);
|
| + step1[12] = WRAPLOW(step2[3] - step2[12], 8);
|
| + step1[13] = WRAPLOW(step2[2] - step2[13], 8);
|
| + step1[14] = WRAPLOW(step2[1] - step2[14], 8);
|
| + step1[15] = WRAPLOW(step2[0] - step2[15], 8);
|
|
|
| step1[16] = step2[16];
|
| step1[17] = step2[17];
|
| @@ -1227,20 +1235,20 @@
|
| step1[19] = step2[19];
|
| temp1 = (-step2[20] + step2[27]) * cospi_16_64;
|
| temp2 = (step2[20] + step2[27]) * cospi_16_64;
|
| - step1[20] = dct_const_round_shift(temp1);
|
| - step1[27] = dct_const_round_shift(temp2);
|
| + step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = (-step2[21] + step2[26]) * cospi_16_64;
|
| temp2 = (step2[21] + step2[26]) * cospi_16_64;
|
| - step1[21] = dct_const_round_shift(temp1);
|
| - step1[26] = dct_const_round_shift(temp2);
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = (-step2[22] + step2[25]) * cospi_16_64;
|
| temp2 = (step2[22] + step2[25]) * cospi_16_64;
|
| - step1[22] = dct_const_round_shift(temp1);
|
| - step1[25] = dct_const_round_shift(temp2);
|
| + step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| temp1 = (-step2[23] + step2[24]) * cospi_16_64;
|
| temp2 = (step2[23] + step2[24]) * cospi_16_64;
|
| - step1[23] = dct_const_round_shift(temp1);
|
| - step1[24] = dct_const_round_shift(temp2);
|
| + step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8);
|
| + step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8);
|
| step1[28] = step2[28];
|
| step1[29] = step2[29];
|
| step1[30] = step2[30];
|
| @@ -1247,38 +1255,38 @@
|
| step1[31] = step2[31];
|
|
|
| // final stage
|
| - output[0] = step1[0] + step1[31];
|
| - output[1] = step1[1] + step1[30];
|
| - output[2] = step1[2] + step1[29];
|
| - output[3] = step1[3] + step1[28];
|
| - output[4] = step1[4] + step1[27];
|
| - output[5] = step1[5] + step1[26];
|
| - output[6] = step1[6] + step1[25];
|
| - output[7] = step1[7] + step1[24];
|
| - output[8] = step1[8] + step1[23];
|
| - output[9] = step1[9] + step1[22];
|
| - output[10] = step1[10] + step1[21];
|
| - output[11] = step1[11] + step1[20];
|
| - output[12] = step1[12] + step1[19];
|
| - output[13] = step1[13] + step1[18];
|
| - output[14] = step1[14] + step1[17];
|
| - output[15] = step1[15] + step1[16];
|
| - output[16] = step1[15] - step1[16];
|
| - output[17] = step1[14] - step1[17];
|
| - output[18] = step1[13] - step1[18];
|
| - output[19] = step1[12] - step1[19];
|
| - output[20] = step1[11] - step1[20];
|
| - output[21] = step1[10] - step1[21];
|
| - output[22] = step1[9] - step1[22];
|
| - output[23] = step1[8] - step1[23];
|
| - output[24] = step1[7] - step1[24];
|
| - output[25] = step1[6] - step1[25];
|
| - output[26] = step1[5] - step1[26];
|
| - output[27] = step1[4] - step1[27];
|
| - output[28] = step1[3] - step1[28];
|
| - output[29] = step1[2] - step1[29];
|
| - output[30] = step1[1] - step1[30];
|
| - output[31] = step1[0] - step1[31];
|
| + output[0] = WRAPLOW(step1[0] + step1[31], 8);
|
| + output[1] = WRAPLOW(step1[1] + step1[30], 8);
|
| + output[2] = WRAPLOW(step1[2] + step1[29], 8);
|
| + output[3] = WRAPLOW(step1[3] + step1[28], 8);
|
| + output[4] = WRAPLOW(step1[4] + step1[27], 8);
|
| + output[5] = WRAPLOW(step1[5] + step1[26], 8);
|
| + output[6] = WRAPLOW(step1[6] + step1[25], 8);
|
| + output[7] = WRAPLOW(step1[7] + step1[24], 8);
|
| + output[8] = WRAPLOW(step1[8] + step1[23], 8);
|
| + output[9] = WRAPLOW(step1[9] + step1[22], 8);
|
| + output[10] = WRAPLOW(step1[10] + step1[21], 8);
|
| + output[11] = WRAPLOW(step1[11] + step1[20], 8);
|
| + output[12] = WRAPLOW(step1[12] + step1[19], 8);
|
| + output[13] = WRAPLOW(step1[13] + step1[18], 8);
|
| + output[14] = WRAPLOW(step1[14] + step1[17], 8);
|
| + output[15] = WRAPLOW(step1[15] + step1[16], 8);
|
| + output[16] = WRAPLOW(step1[15] - step1[16], 8);
|
| + output[17] = WRAPLOW(step1[14] - step1[17], 8);
|
| + output[18] = WRAPLOW(step1[13] - step1[18], 8);
|
| + output[19] = WRAPLOW(step1[12] - step1[19], 8);
|
| + output[20] = WRAPLOW(step1[11] - step1[20], 8);
|
| + output[21] = WRAPLOW(step1[10] - step1[21], 8);
|
| + output[22] = WRAPLOW(step1[9] - step1[22], 8);
|
| + output[23] = WRAPLOW(step1[8] - step1[23], 8);
|
| + output[24] = WRAPLOW(step1[7] - step1[24], 8);
|
| + output[25] = WRAPLOW(step1[6] - step1[25], 8);
|
| + output[26] = WRAPLOW(step1[5] - step1[26], 8);
|
| + output[27] = WRAPLOW(step1[4] - step1[27], 8);
|
| + output[28] = WRAPLOW(step1[3] - step1[28], 8);
|
| + output[29] = WRAPLOW(step1[2] - step1[29], 8);
|
| + output[30] = WRAPLOW(step1[1] - step1[30], 8);
|
| + output[31] = WRAPLOW(step1[0] - step1[31], 8);
|
| }
|
|
|
| void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest,
|
| @@ -1313,9 +1321,10 @@
|
| for (j = 0; j < 32; ++j)
|
| temp_in[j] = out[j * 32 + i];
|
| idct32(temp_in, temp_out);
|
| - for (j = 0; j < 32; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 32; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 6));
|
| + }
|
| }
|
| }
|
|
|
| @@ -1339,9 +1348,10 @@
|
| for (j = 0; j < 32; ++j)
|
| temp_in[j] = out[j * 32 + i];
|
| idct32(temp_in, temp_out);
|
| - for (j = 0; j < 32; ++j)
|
| - dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * stride + i]);
|
| + for (j = 0; j < 32; ++j) {
|
| + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
|
| + ROUND_POWER_OF_TWO(temp_out[j], 6));
|
| + }
|
| }
|
| }
|
|
|
| @@ -1349,13 +1359,13 @@
|
| int i, j;
|
| tran_high_t a1;
|
|
|
| - tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| - out = dct_const_round_shift(out * cospi_16_64);
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8);
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8);
|
| a1 = ROUND_POWER_OF_TWO(out, 6);
|
|
|
| for (j = 0; j < 32; ++j) {
|
| for (i = 0; i < 32; ++i)
|
| - dest[i] = clip_pixel(dest[i] + a1);
|
| + dest[i] = clip_pixel_add(dest[i], a1);
|
| dest += stride;
|
| }
|
| }
|
| @@ -1448,8 +1458,8 @@
|
| }
|
|
|
| #if CONFIG_VP9_HIGHBITDEPTH
|
| -void vp9_high_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
|
| 0.5 shifts per pixel. */
|
| int i;
|
| @@ -1471,10 +1481,10 @@
|
| c1 = e1 - c1;
|
| a1 -= b1;
|
| d1 += c1;
|
| - op[0] = WRAPLOW(a1);
|
| - op[1] = WRAPLOW(b1);
|
| - op[2] = WRAPLOW(c1);
|
| - op[3] = WRAPLOW(d1);
|
| + op[0] = WRAPLOW(a1, bd);
|
| + op[1] = WRAPLOW(b1, bd);
|
| + op[2] = WRAPLOW(c1, bd);
|
| + op[3] = WRAPLOW(d1, bd);
|
| ip += 4;
|
| op += 4;
|
| }
|
| @@ -1492,10 +1502,10 @@
|
| c1 = e1 - c1;
|
| a1 -= b1;
|
| d1 += c1;
|
| - dest[stride * 0] = clip_pixel_bd_high(dest[stride * 0], a1, bd);
|
| - dest[stride * 1] = clip_pixel_bd_high(dest[stride * 1], b1, bd);
|
| - dest[stride * 2] = clip_pixel_bd_high(dest[stride * 2], c1, bd);
|
| - dest[stride * 3] = clip_pixel_bd_high(dest[stride * 3], d1, bd);
|
| + dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd);
|
| + dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd);
|
| + dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd);
|
| + dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd);
|
|
|
| ip++;
|
| dest++;
|
| @@ -1502,29 +1512,8 @@
|
| }
|
| }
|
|
|
| -static void high_idct4(const tran_low_t *input, tran_low_t *output, int bd) {
|
| - tran_low_t step[4];
|
| - tran_high_t temp1, temp2;
|
| - (void) bd;
|
| - // stage 1
|
| - temp1 = (input[0] + input[2]) * cospi_16_64;
|
| - temp2 = (input[0] - input[2]) * cospi_16_64;
|
| - step[0] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step[1] = WRAPLOW(dct_const_round_shift(temp2));
|
| - temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
| - temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
| - step[2] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step[3] = WRAPLOW(dct_const_round_shift(temp2));
|
| -
|
| - // stage 2
|
| - output[0] = WRAPLOW(step[0] + step[3]);
|
| - output[1] = WRAPLOW(step[1] + step[2]);
|
| - output[2] = WRAPLOW(step[1] - step[2]);
|
| - output[3] = WRAPLOW(step[0] - step[3]);
|
| -}
|
| -
|
| -void vp9_high_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
|
| - int dest_stride, int bd) {
|
| +void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8,
|
| + int dest_stride, int bd) {
|
| int i;
|
| tran_high_t a1, e1;
|
| tran_low_t tmp[4];
|
| @@ -1536,24 +1525,49 @@
|
| a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
| e1 = a1 >> 1;
|
| a1 -= e1;
|
| - op[0] = WRAPLOW(a1);
|
| - op[1] = op[2] = op[3] = WRAPLOW(e1);
|
| + op[0] = WRAPLOW(a1, bd);
|
| + op[1] = op[2] = op[3] = WRAPLOW(e1, bd);
|
|
|
| ip = tmp;
|
| for (i = 0; i < 4; i++) {
|
| e1 = ip[0] >> 1;
|
| a1 = ip[0] - e1;
|
| - dest[dest_stride * 0] = clip_pixel_bd_high(dest[dest_stride * 0], a1, bd);
|
| - dest[dest_stride * 1] = clip_pixel_bd_high(dest[dest_stride * 1], e1, bd);
|
| - dest[dest_stride * 2] = clip_pixel_bd_high(dest[dest_stride * 2], e1, bd);
|
| - dest[dest_stride * 3] = clip_pixel_bd_high(dest[dest_stride * 3], e1, bd);
|
| + dest[dest_stride * 0] = highbd_clip_pixel_add(
|
| + dest[dest_stride * 0], a1, bd);
|
| + dest[dest_stride * 1] = highbd_clip_pixel_add(
|
| + dest[dest_stride * 1], e1, bd);
|
| + dest[dest_stride * 2] = highbd_clip_pixel_add(
|
| + dest[dest_stride * 2], e1, bd);
|
| + dest[dest_stride * 3] = highbd_clip_pixel_add(
|
| + dest[dest_stride * 3], e1, bd);
|
| ip++;
|
| dest++;
|
| }
|
| }
|
|
|
| -void vp9_high_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +static void highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) {
|
| + tran_low_t step[4];
|
| + tran_high_t temp1, temp2;
|
| + (void) bd;
|
| + // stage 1
|
| + temp1 = (input[0] + input[2]) * cospi_16_64;
|
| + temp2 = (input[0] - input[2]) * cospi_16_64;
|
| + step[0] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step[1] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| + temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
|
| + temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
|
| + step[2] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step[3] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| +
|
| + // stage 2
|
| + output[0] = WRAPLOW(step[0] + step[3], bd);
|
| + output[1] = WRAPLOW(step[1] + step[2], bd);
|
| + output[2] = WRAPLOW(step[1] - step[2], bd);
|
| + output[3] = WRAPLOW(step[0] - step[3], bd);
|
| +}
|
| +
|
| +void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[4 * 4];
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -1562,7 +1576,7 @@
|
|
|
| // Rows
|
| for (i = 0; i < 4; ++i) {
|
| - high_idct4(input, outptr, bd);
|
| + highbd_idct4(input, outptr, bd);
|
| input += 4;
|
| outptr += 4;
|
| }
|
| @@ -1571,33 +1585,34 @@
|
| for (i = 0; i < 4; ++i) {
|
| for (j = 0; j < 4; ++j)
|
| temp_in[j] = out[j * 4 + i];
|
| - high_idct4(temp_in, temp_out, bd);
|
| - for (j = 0; j < 4; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + highbd_idct4(temp_in, temp_out, bd);
|
| + for (j = 0; j < 4; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int dest_stride, int bd) {
|
| +void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int dest_stride, int bd) {
|
| int i;
|
| tran_high_t a1;
|
| - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
|
|
| - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
| a1 = ROUND_POWER_OF_TWO(out, 4);
|
|
|
| for (i = 0; i < 4; i++) {
|
| - dest[0] = clip_pixel_bd_high(dest[0], a1, bd);
|
| - dest[1] = clip_pixel_bd_high(dest[1], a1, bd);
|
| - dest[2] = clip_pixel_bd_high(dest[2], a1, bd);
|
| - dest[3] = clip_pixel_bd_high(dest[3], a1, bd);
|
| + dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);
|
| + dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);
|
| + dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);
|
| + dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);
|
| dest += dest_stride;
|
| }
|
| }
|
|
|
| -static void high_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
|
| +static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_low_t step1[8], step2[8];
|
| tran_high_t temp1, temp2;
|
| // stage 1
|
| @@ -1607,43 +1622,43 @@
|
| step1[3] = input[6];
|
| temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
|
| temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
|
| - step1[4] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[7] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
|
| temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
|
| - step1[5] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[6] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| // stage 2 & stage 3 - even half
|
| - high_idct4(step1, step1, bd);
|
| + highbd_idct4(step1, step1, bd);
|
|
|
| // stage 2 - odd half
|
| - step2[4] = WRAPLOW(step1[4] + step1[5]);
|
| - step2[5] = WRAPLOW(step1[4] - step1[5]);
|
| - step2[6] = WRAPLOW(-step1[6] + step1[7]);
|
| - step2[7] = WRAPLOW(step1[6] + step1[7]);
|
| + step2[4] = WRAPLOW(step1[4] + step1[5], bd);
|
| + step2[5] = WRAPLOW(step1[4] - step1[5], bd);
|
| + step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
|
| + step2[7] = WRAPLOW(step1[6] + step1[7], bd);
|
|
|
| // stage 3 - odd half
|
| step1[4] = step2[4];
|
| temp1 = (step2[6] - step2[5]) * cospi_16_64;
|
| temp2 = (step2[5] + step2[6]) * cospi_16_64;
|
| - step1[5] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[6] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[7] = step2[7];
|
|
|
| // stage 4
|
| - output[0] = WRAPLOW(step1[0] + step1[7]);
|
| - output[1] = WRAPLOW(step1[1] + step1[6]);
|
| - output[2] = WRAPLOW(step1[2] + step1[5]);
|
| - output[3] = WRAPLOW(step1[3] + step1[4]);
|
| - output[4] = WRAPLOW(step1[3] - step1[4]);
|
| - output[5] = WRAPLOW(step1[2] - step1[5]);
|
| - output[6] = WRAPLOW(step1[1] - step1[6]);
|
| - output[7] = WRAPLOW(step1[0] - step1[7]);
|
| + output[0] = WRAPLOW(step1[0] + step1[7], bd);
|
| + output[1] = WRAPLOW(step1[1] + step1[6], bd);
|
| + output[2] = WRAPLOW(step1[2] + step1[5], bd);
|
| + output[3] = WRAPLOW(step1[3] + step1[4], bd);
|
| + output[4] = WRAPLOW(step1[3] - step1[4], bd);
|
| + output[5] = WRAPLOW(step1[2] - step1[5], bd);
|
| + output[6] = WRAPLOW(step1[1] - step1[6], bd);
|
| + output[7] = WRAPLOW(step1[0] - step1[7], bd);
|
| }
|
|
|
| -void vp9_high_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[8 * 8];
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -1652,7 +1667,7 @@
|
|
|
| // First transform rows.
|
| for (i = 0; i < 8; ++i) {
|
| - high_idct8(input, outptr, bd);
|
| + highbd_idct8(input, outptr, bd);
|
| input += 8;
|
| outptr += 8;
|
| }
|
| @@ -1661,30 +1676,30 @@
|
| for (i = 0; i < 8; ++i) {
|
| for (j = 0; j < 8; ++j)
|
| temp_in[j] = out[j * 8 + i];
|
| - high_idct8(temp_in, temp_out, bd);
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(dest[j * stride + i],
|
| - ROUND_POWER_OF_TWO(temp_out[j], 5),
|
| - bd);
|
| + highbd_idct8(temp_in, temp_out, bd);
|
| + for (j = 0; j < 8; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| int i, j;
|
| tran_high_t a1;
|
| - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
| - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
| a1 = ROUND_POWER_OF_TWO(out, 5);
|
| for (j = 0; j < 8; ++j) {
|
| for (i = 0; i < 8; ++i)
|
| - dest[i] = clip_pixel_bd_high(dest[i], a1, bd);
|
| + dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
|
| dest += stride;
|
| }
|
| }
|
|
|
| -static void high_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
|
| +static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| tran_high_t x0 = input[0];
|
| @@ -1721,19 +1736,19 @@
|
| // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
|
| // + 1b (addition) = 29b.
|
| // Hence the output bit depth is 15b.
|
| - output[0] = WRAPLOW(dct_const_round_shift(s0));
|
| - output[1] = WRAPLOW(dct_const_round_shift(s1));
|
| - output[2] = WRAPLOW(dct_const_round_shift(s2));
|
| - output[3] = WRAPLOW(dct_const_round_shift(s3));
|
| + output[0] = WRAPLOW(dct_const_round_shift(s0), bd);
|
| + output[1] = WRAPLOW(dct_const_round_shift(s1), bd);
|
| + output[2] = WRAPLOW(dct_const_round_shift(s2), bd);
|
| + output[3] = WRAPLOW(dct_const_round_shift(s3), bd);
|
| }
|
|
|
| -void vp9_high_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int tx_type, int bd) {
|
| - const high_transform_2d IHT_4[] = {
|
| - { high_idct4, high_idct4 }, // DCT_DCT = 0
|
| - { high_iadst4, high_idct4 }, // ADST_DCT = 1
|
| - { high_idct4, high_iadst4 }, // DCT_ADST = 2
|
| - { high_iadst4, high_iadst4 } // ADST_ADST = 3
|
| +void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int tx_type, int bd) {
|
| + const highbd_transform_2d IHT_4[] = {
|
| + { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0
|
| + { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1
|
| + { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2
|
| + { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3
|
| };
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
|
|
| @@ -1754,13 +1769,14 @@
|
| for (j = 0; j < 4; ++j)
|
| temp_in[j] = out[j * 4 + i];
|
| IHT_4[tx_type].cols(temp_in, temp_out, bd);
|
| - for (j = 0; j < 4; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + for (j = 0; j < 4; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
|
| + }
|
| }
|
| }
|
|
|
| -static void high_iadst8(const tran_low_t *input, tran_low_t *output, int bd) {
|
| +static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| tran_high_t x0 = input[7];
|
| @@ -1788,14 +1804,14 @@
|
| s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
|
| s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
|
|
|
| - x0 = WRAPLOW(dct_const_round_shift(s0 + s4));
|
| - x1 = WRAPLOW(dct_const_round_shift(s1 + s5));
|
| - x2 = WRAPLOW(dct_const_round_shift(s2 + s6));
|
| - x3 = WRAPLOW(dct_const_round_shift(s3 + s7));
|
| - x4 = WRAPLOW(dct_const_round_shift(s0 - s4));
|
| - x5 = WRAPLOW(dct_const_round_shift(s1 - s5));
|
| - x6 = WRAPLOW(dct_const_round_shift(s2 - s6));
|
| - x7 = WRAPLOW(dct_const_round_shift(s3 - s7));
|
| + x0 = WRAPLOW(dct_const_round_shift(s0 + s4), bd);
|
| + x1 = WRAPLOW(dct_const_round_shift(s1 + s5), bd);
|
| + x2 = WRAPLOW(dct_const_round_shift(s2 + s6), bd);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3 + s7), bd);
|
| + x4 = WRAPLOW(dct_const_round_shift(s0 - s4), bd);
|
| + x5 = WRAPLOW(dct_const_round_shift(s1 - s5), bd);
|
| + x6 = WRAPLOW(dct_const_round_shift(s2 - s6), bd);
|
| + x7 = WRAPLOW(dct_const_round_shift(s3 - s7), bd);
|
|
|
| // stage 2
|
| s0 = x0;
|
| @@ -1807,14 +1823,14 @@
|
| s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
|
| s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
|
|
|
| - x0 = s0 + s2;
|
| - x1 = s1 + s3;
|
| - x2 = s0 - s2;
|
| - x3 = s1 - s3;
|
| - x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
|
| - x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
|
| - x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
|
| - x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
|
| + x0 = WRAPLOW(s0 + s2, bd);
|
| + x1 = WRAPLOW(s1 + s3, bd);
|
| + x2 = WRAPLOW(s0 - s2, bd);
|
| + x3 = WRAPLOW(s1 - s3, bd);
|
| + x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd);
|
| + x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd);
|
| + x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd);
|
| + x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd);
|
|
|
| // stage 3
|
| s2 = cospi_16_64 * (x2 + x3);
|
| @@ -1822,35 +1838,35 @@
|
| s6 = cospi_16_64 * (x6 + x7);
|
| s7 = cospi_16_64 * (x6 - x7);
|
|
|
| - x2 = WRAPLOW(dct_const_round_shift(s2));
|
| - x3 = WRAPLOW(dct_const_round_shift(s3));
|
| - x6 = WRAPLOW(dct_const_round_shift(s6));
|
| - x7 = WRAPLOW(dct_const_round_shift(s7));
|
| + x2 = WRAPLOW(dct_const_round_shift(s2), bd);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3), bd);
|
| + x6 = WRAPLOW(dct_const_round_shift(s6), bd);
|
| + x7 = WRAPLOW(dct_const_round_shift(s7), bd);
|
|
|
| - output[0] = WRAPLOW(x0);
|
| - output[1] = WRAPLOW(-x4);
|
| - output[2] = WRAPLOW(x6);
|
| - output[3] = WRAPLOW(-x2);
|
| - output[4] = WRAPLOW(x3);
|
| - output[5] = WRAPLOW(-x7);
|
| - output[6] = WRAPLOW(x5);
|
| - output[7] = WRAPLOW(-x1);
|
| + output[0] = WRAPLOW(x0, bd);
|
| + output[1] = WRAPLOW(-x4, bd);
|
| + output[2] = WRAPLOW(x6, bd);
|
| + output[3] = WRAPLOW(-x2, bd);
|
| + output[4] = WRAPLOW(x3, bd);
|
| + output[5] = WRAPLOW(-x7, bd);
|
| + output[6] = WRAPLOW(x5, bd);
|
| + output[7] = WRAPLOW(-x1, bd);
|
| }
|
|
|
| -static const high_transform_2d HIGH_IHT_8[] = {
|
| - { high_idct8, high_idct8 }, // DCT_DCT = 0
|
| - { high_iadst8, high_idct8 }, // ADST_DCT = 1
|
| - { high_idct8, high_iadst8 }, // DCT_ADST = 2
|
| - { high_iadst8, high_iadst8 } // ADST_ADST = 3
|
| +static const highbd_transform_2d HIGH_IHT_8[] = {
|
| + { highbd_idct8, highbd_idct8 }, // DCT_DCT = 0
|
| + { highbd_iadst8, highbd_idct8 }, // ADST_DCT = 1
|
| + { highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2
|
| + { highbd_iadst8, highbd_iadst8 } // ADST_ADST = 3
|
| };
|
|
|
| -void vp9_high_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int tx_type, int bd) {
|
| +void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int tx_type, int bd) {
|
| int i, j;
|
| tran_low_t out[8 * 8];
|
| tran_low_t *outptr = out;
|
| tran_low_t temp_in[8], temp_out[8];
|
| - const high_transform_2d ht = HIGH_IHT_8[tx_type];
|
| + const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
|
|
| // Inverse transform row vectors.
|
| @@ -1865,14 +1881,15 @@
|
| for (j = 0; j < 8; ++j)
|
| temp_in[j] = out[j * 8 + i];
|
| ht.cols(temp_in, temp_out, bd);
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + for (j = 0; j < 8; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[8 * 8] = { 0 };
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -1882,7 +1899,7 @@
|
| // First transform rows.
|
| // Only first 4 row has non-zero coefs.
|
| for (i = 0; i < 4; ++i) {
|
| - high_idct8(input, outptr, bd);
|
| + highbd_idct8(input, outptr, bd);
|
| input += 8;
|
| outptr += 8;
|
| }
|
| @@ -1890,14 +1907,15 @@
|
| for (i = 0; i < 8; ++i) {
|
| for (j = 0; j < 8; ++j)
|
| temp_in[j] = out[j * 8 + i];
|
| - high_idct8(temp_in, temp_out, bd);
|
| - for (j = 0; j < 8; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + highbd_idct8(temp_in, temp_out, bd);
|
| + for (j = 0; j < 8; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
|
| + }
|
| }
|
| }
|
|
|
| -static void high_idct16(const tran_low_t *input, tran_low_t *output, int bd) {
|
| +static void highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_low_t step1[16], step2[16];
|
| tran_high_t temp1, temp2;
|
| (void) bd;
|
| @@ -1932,23 +1950,23 @@
|
|
|
| temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
|
| temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
|
| - step2[8] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[15] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
|
| temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
|
| - step2[9] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[14] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
|
| temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
|
| - step2[10] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[13] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
|
| temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
|
| - step2[11] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[12] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| // stage 3
|
| step1[0] = step2[0];
|
| @@ -1958,113 +1976,113 @@
|
|
|
| temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
|
| temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
|
| - step1[4] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[7] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
|
| temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
|
| - step1[5] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[6] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| - step1[8] = WRAPLOW(step2[8] + step2[9]);
|
| - step1[9] = WRAPLOW(step2[8] - step2[9]);
|
| - step1[10] = WRAPLOW(-step2[10] + step2[11]);
|
| - step1[11] = WRAPLOW(step2[10] + step2[11]);
|
| - step1[12] = WRAPLOW(step2[12] + step2[13]);
|
| - step1[13] = WRAPLOW(step2[12] - step2[13]);
|
| - step1[14] = WRAPLOW(-step2[14] + step2[15]);
|
| - step1[15] = WRAPLOW(step2[14] + step2[15]);
|
| + step1[8] = WRAPLOW(step2[8] + step2[9], bd);
|
| + step1[9] = WRAPLOW(step2[8] - step2[9], bd);
|
| + step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
|
| + step1[11] = WRAPLOW(step2[10] + step2[11], bd);
|
| + step1[12] = WRAPLOW(step2[12] + step2[13], bd);
|
| + step1[13] = WRAPLOW(step2[12] - step2[13], bd);
|
| + step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
|
| + step1[15] = WRAPLOW(step2[14] + step2[15], bd);
|
|
|
| // stage 4
|
| temp1 = (step1[0] + step1[1]) * cospi_16_64;
|
| temp2 = (step1[0] - step1[1]) * cospi_16_64;
|
| - step2[0] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[1] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
|
| temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
|
| - step2[2] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[3] = WRAPLOW(dct_const_round_shift(temp2));
|
| - step2[4] = WRAPLOW(step1[4] + step1[5]);
|
| - step2[5] = WRAPLOW(step1[4] - step1[5]);
|
| - step2[6] = WRAPLOW(-step1[6] + step1[7]);
|
| - step2[7] = WRAPLOW(step1[6] + step1[7]);
|
| + step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| + step2[4] = WRAPLOW(step1[4] + step1[5], bd);
|
| + step2[5] = WRAPLOW(step1[4] - step1[5], bd);
|
| + step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
|
| + step2[7] = WRAPLOW(step1[6] + step1[7], bd);
|
|
|
| step2[8] = step1[8];
|
| step2[15] = step1[15];
|
| temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
|
| temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
|
| - step2[9] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[14] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
|
| temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
|
| - step2[10] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[13] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step2[11] = step1[11];
|
| step2[12] = step1[12];
|
|
|
| // stage 5
|
| - step1[0] = WRAPLOW(step2[0] + step2[3]);
|
| - step1[1] = WRAPLOW(step2[1] + step2[2]);
|
| - step1[2] = WRAPLOW(step2[1] - step2[2]);
|
| - step1[3] = WRAPLOW(step2[0] - step2[3]);
|
| + step1[0] = WRAPLOW(step2[0] + step2[3], bd);
|
| + step1[1] = WRAPLOW(step2[1] + step2[2], bd);
|
| + step1[2] = WRAPLOW(step2[1] - step2[2], bd);
|
| + step1[3] = WRAPLOW(step2[0] - step2[3], bd);
|
| step1[4] = step2[4];
|
| temp1 = (step2[6] - step2[5]) * cospi_16_64;
|
| temp2 = (step2[5] + step2[6]) * cospi_16_64;
|
| - step1[5] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[6] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[7] = step2[7];
|
|
|
| - step1[8] = WRAPLOW(step2[8] + step2[11]);
|
| - step1[9] = WRAPLOW(step2[9] + step2[10]);
|
| - step1[10] = WRAPLOW(step2[9] - step2[10]);
|
| - step1[11] = WRAPLOW(step2[8] - step2[11]);
|
| - step1[12] = WRAPLOW(-step2[12] + step2[15]);
|
| - step1[13] = WRAPLOW(-step2[13] + step2[14]);
|
| - step1[14] = WRAPLOW(step2[13] + step2[14]);
|
| - step1[15] = WRAPLOW(step2[12] + step2[15]);
|
| + step1[8] = WRAPLOW(step2[8] + step2[11], bd);
|
| + step1[9] = WRAPLOW(step2[9] + step2[10], bd);
|
| + step1[10] = WRAPLOW(step2[9] - step2[10], bd);
|
| + step1[11] = WRAPLOW(step2[8] - step2[11], bd);
|
| + step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
|
| + step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
|
| + step1[14] = WRAPLOW(step2[13] + step2[14], bd);
|
| + step1[15] = WRAPLOW(step2[12] + step2[15], bd);
|
|
|
| // stage 6
|
| - step2[0] = WRAPLOW(step1[0] + step1[7]);
|
| - step2[1] = WRAPLOW(step1[1] + step1[6]);
|
| - step2[2] = WRAPLOW(step1[2] + step1[5]);
|
| - step2[3] = WRAPLOW(step1[3] + step1[4]);
|
| - step2[4] = WRAPLOW(step1[3] - step1[4]);
|
| - step2[5] = WRAPLOW(step1[2] - step1[5]);
|
| - step2[6] = WRAPLOW(step1[1] - step1[6]);
|
| - step2[7] = WRAPLOW(step1[0] - step1[7]);
|
| + step2[0] = WRAPLOW(step1[0] + step1[7], bd);
|
| + step2[1] = WRAPLOW(step1[1] + step1[6], bd);
|
| + step2[2] = WRAPLOW(step1[2] + step1[5], bd);
|
| + step2[3] = WRAPLOW(step1[3] + step1[4], bd);
|
| + step2[4] = WRAPLOW(step1[3] - step1[4], bd);
|
| + step2[5] = WRAPLOW(step1[2] - step1[5], bd);
|
| + step2[6] = WRAPLOW(step1[1] - step1[6], bd);
|
| + step2[7] = WRAPLOW(step1[0] - step1[7], bd);
|
| step2[8] = step1[8];
|
| step2[9] = step1[9];
|
| temp1 = (-step1[10] + step1[13]) * cospi_16_64;
|
| temp2 = (step1[10] + step1[13]) * cospi_16_64;
|
| - step2[10] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[13] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = (-step1[11] + step1[12]) * cospi_16_64;
|
| temp2 = (step1[11] + step1[12]) * cospi_16_64;
|
| - step2[11] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[12] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step2[14] = step1[14];
|
| step2[15] = step1[15];
|
|
|
| // stage 7
|
| - output[0] = WRAPLOW(step2[0] + step2[15]);
|
| - output[1] = WRAPLOW(step2[1] + step2[14]);
|
| - output[2] = WRAPLOW(step2[2] + step2[13]);
|
| - output[3] = WRAPLOW(step2[3] + step2[12]);
|
| - output[4] = WRAPLOW(step2[4] + step2[11]);
|
| - output[5] = WRAPLOW(step2[5] + step2[10]);
|
| - output[6] = WRAPLOW(step2[6] + step2[9]);
|
| - output[7] = WRAPLOW(step2[7] + step2[8]);
|
| - output[8] = WRAPLOW(step2[7] - step2[8]);
|
| - output[9] = WRAPLOW(step2[6] - step2[9]);
|
| - output[10] = WRAPLOW(step2[5] - step2[10]);
|
| - output[11] = WRAPLOW(step2[4] - step2[11]);
|
| - output[12] = WRAPLOW(step2[3] - step2[12]);
|
| - output[13] = WRAPLOW(step2[2] - step2[13]);
|
| - output[14] = WRAPLOW(step2[1] - step2[14]);
|
| - output[15] = WRAPLOW(step2[0] - step2[15]);
|
| + output[0] = WRAPLOW(step2[0] + step2[15], bd);
|
| + output[1] = WRAPLOW(step2[1] + step2[14], bd);
|
| + output[2] = WRAPLOW(step2[2] + step2[13], bd);
|
| + output[3] = WRAPLOW(step2[3] + step2[12], bd);
|
| + output[4] = WRAPLOW(step2[4] + step2[11], bd);
|
| + output[5] = WRAPLOW(step2[5] + step2[10], bd);
|
| + output[6] = WRAPLOW(step2[6] + step2[9], bd);
|
| + output[7] = WRAPLOW(step2[7] + step2[8], bd);
|
| + output[8] = WRAPLOW(step2[7] - step2[8], bd);
|
| + output[9] = WRAPLOW(step2[6] - step2[9], bd);
|
| + output[10] = WRAPLOW(step2[5] - step2[10], bd);
|
| + output[11] = WRAPLOW(step2[4] - step2[11], bd);
|
| + output[12] = WRAPLOW(step2[3] - step2[12], bd);
|
| + output[13] = WRAPLOW(step2[2] - step2[13], bd);
|
| + output[14] = WRAPLOW(step2[1] - step2[14], bd);
|
| + output[15] = WRAPLOW(step2[0] - step2[15], bd);
|
| }
|
|
|
| -void vp9_high_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[16 * 16];
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -2073,7 +2091,7 @@
|
|
|
| // First transform rows.
|
| for (i = 0; i < 16; ++i) {
|
| - high_idct16(input, outptr, bd);
|
| + highbd_idct16(input, outptr, bd);
|
| input += 16;
|
| outptr += 16;
|
| }
|
| @@ -2082,14 +2100,16 @@
|
| for (i = 0; i < 16; ++i) {
|
| for (j = 0; j < 16; ++j)
|
| temp_in[j] = out[j * 16 + i];
|
| - high_idct16(temp_in, temp_out, bd);
|
| - for (j = 0; j < 16; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + highbd_idct16(temp_in, temp_out, bd);
|
| + for (j = 0; j < 16; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
| + }
|
| }
|
| }
|
|
|
| -static void high_iadst16(const tran_low_t *input, tran_low_t *output, int bd) {
|
| +static void highbd_iadst16(const tran_low_t *input, tran_low_t *output,
|
| + int bd) {
|
| tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
| tran_high_t s9, s10, s11, s12, s13, s14, s15;
|
|
|
| @@ -2135,22 +2155,22 @@
|
| s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
|
| s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
|
|
|
| - x0 = WRAPLOW(dct_const_round_shift(s0 + s8));
|
| - x1 = WRAPLOW(dct_const_round_shift(s1 + s9));
|
| - x2 = WRAPLOW(dct_const_round_shift(s2 + s10));
|
| - x3 = WRAPLOW(dct_const_round_shift(s3 + s11));
|
| - x4 = WRAPLOW(dct_const_round_shift(s4 + s12));
|
| - x5 = WRAPLOW(dct_const_round_shift(s5 + s13));
|
| - x6 = WRAPLOW(dct_const_round_shift(s6 + s14));
|
| - x7 = WRAPLOW(dct_const_round_shift(s7 + s15));
|
| - x8 = WRAPLOW(dct_const_round_shift(s0 - s8));
|
| - x9 = WRAPLOW(dct_const_round_shift(s1 - s9));
|
| - x10 = WRAPLOW(dct_const_round_shift(s2 - s10));
|
| - x11 = WRAPLOW(dct_const_round_shift(s3 - s11));
|
| - x12 = WRAPLOW(dct_const_round_shift(s4 - s12));
|
| - x13 = WRAPLOW(dct_const_round_shift(s5 - s13));
|
| - x14 = WRAPLOW(dct_const_round_shift(s6 - s14));
|
| - x15 = WRAPLOW(dct_const_round_shift(s7 - s15));
|
| + x0 = WRAPLOW(dct_const_round_shift(s0 + s8), bd);
|
| + x1 = WRAPLOW(dct_const_round_shift(s1 + s9), bd);
|
| + x2 = WRAPLOW(dct_const_round_shift(s2 + s10), bd);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3 + s11), bd);
|
| + x4 = WRAPLOW(dct_const_round_shift(s4 + s12), bd);
|
| + x5 = WRAPLOW(dct_const_round_shift(s5 + s13), bd);
|
| + x6 = WRAPLOW(dct_const_round_shift(s6 + s14), bd);
|
| + x7 = WRAPLOW(dct_const_round_shift(s7 + s15), bd);
|
| + x8 = WRAPLOW(dct_const_round_shift(s0 - s8), bd);
|
| + x9 = WRAPLOW(dct_const_round_shift(s1 - s9), bd);
|
| + x10 = WRAPLOW(dct_const_round_shift(s2 - s10), bd);
|
| + x11 = WRAPLOW(dct_const_round_shift(s3 - s11), bd);
|
| + x12 = WRAPLOW(dct_const_round_shift(s4 - s12), bd);
|
| + x13 = WRAPLOW(dct_const_round_shift(s5 - s13), bd);
|
| + x14 = WRAPLOW(dct_const_round_shift(s6 - s14), bd);
|
| + x15 = WRAPLOW(dct_const_round_shift(s7 - s15), bd);
|
|
|
| // stage 2
|
| s0 = x0;
|
| @@ -2170,22 +2190,22 @@
|
| s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
|
| s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
|
|
|
| - x0 = WRAPLOW(s0 + s4);
|
| - x1 = WRAPLOW(s1 + s5);
|
| - x2 = WRAPLOW(s2 + s6);
|
| - x3 = WRAPLOW(s3 + s7);
|
| - x4 = WRAPLOW(s0 - s4);
|
| - x5 = WRAPLOW(s1 - s5);
|
| - x6 = WRAPLOW(s2 - s6);
|
| - x7 = WRAPLOW(s3 - s7);
|
| - x8 = WRAPLOW(dct_const_round_shift(s8 + s12));
|
| - x9 = WRAPLOW(dct_const_round_shift(s9 + s13));
|
| - x10 = WRAPLOW(dct_const_round_shift(s10 + s14));
|
| - x11 = WRAPLOW(dct_const_round_shift(s11 + s15));
|
| - x12 = WRAPLOW(dct_const_round_shift(s8 - s12));
|
| - x13 = WRAPLOW(dct_const_round_shift(s9 - s13));
|
| - x14 = WRAPLOW(dct_const_round_shift(s10 - s14));
|
| - x15 = WRAPLOW(dct_const_round_shift(s11 - s15));
|
| + x0 = WRAPLOW(s0 + s4, bd);
|
| + x1 = WRAPLOW(s1 + s5, bd);
|
| + x2 = WRAPLOW(s2 + s6, bd);
|
| + x3 = WRAPLOW(s3 + s7, bd);
|
| + x4 = WRAPLOW(s0 - s4, bd);
|
| + x5 = WRAPLOW(s1 - s5, bd);
|
| + x6 = WRAPLOW(s2 - s6, bd);
|
| + x7 = WRAPLOW(s3 - s7, bd);
|
| + x8 = WRAPLOW(dct_const_round_shift(s8 + s12), bd);
|
| + x9 = WRAPLOW(dct_const_round_shift(s9 + s13), bd);
|
| + x10 = WRAPLOW(dct_const_round_shift(s10 + s14), bd);
|
| + x11 = WRAPLOW(dct_const_round_shift(s11 + s15), bd);
|
| + x12 = WRAPLOW(dct_const_round_shift(s8 - s12), bd);
|
| + x13 = WRAPLOW(dct_const_round_shift(s9 - s13), bd);
|
| + x14 = WRAPLOW(dct_const_round_shift(s10 - s14), bd);
|
| + x15 = WRAPLOW(dct_const_round_shift(s11 - s15), bd);
|
|
|
| // stage 3
|
| s0 = x0;
|
| @@ -2205,22 +2225,22 @@
|
| s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
|
| s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
|
|
|
| - x0 = WRAPLOW(s0 + s2);
|
| - x1 = WRAPLOW(s1 + s3);
|
| - x2 = WRAPLOW(s0 - s2);
|
| - x3 = WRAPLOW(s1 - s3);
|
| - x4 = WRAPLOW(dct_const_round_shift(s4 + s6));
|
| - x5 = WRAPLOW(dct_const_round_shift(s5 + s7));
|
| - x6 = WRAPLOW(dct_const_round_shift(s4 - s6));
|
| - x7 = WRAPLOW(dct_const_round_shift(s5 - s7));
|
| - x8 = WRAPLOW(s8 + s10);
|
| - x9 = WRAPLOW(s9 + s11);
|
| - x10 = WRAPLOW(s8 - s10);
|
| - x11 = WRAPLOW(s9 - s11);
|
| - x12 = WRAPLOW(dct_const_round_shift(s12 + s14));
|
| - x13 = WRAPLOW(dct_const_round_shift(s13 + s15));
|
| - x14 = WRAPLOW(dct_const_round_shift(s12 - s14));
|
| - x15 = WRAPLOW(dct_const_round_shift(s13 - s15));
|
| + x0 = WRAPLOW(s0 + s2, bd);
|
| + x1 = WRAPLOW(s1 + s3, bd);
|
| + x2 = WRAPLOW(s0 - s2, bd);
|
| + x3 = WRAPLOW(s1 - s3, bd);
|
| + x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd);
|
| + x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd);
|
| + x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd);
|
| + x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd);
|
| + x8 = WRAPLOW(s8 + s10, bd);
|
| + x9 = WRAPLOW(s9 + s11, bd);
|
| + x10 = WRAPLOW(s8 - s10, bd);
|
| + x11 = WRAPLOW(s9 - s11, bd);
|
| + x12 = WRAPLOW(dct_const_round_shift(s12 + s14), bd);
|
| + x13 = WRAPLOW(dct_const_round_shift(s13 + s15), bd);
|
| + x14 = WRAPLOW(dct_const_round_shift(s12 - s14), bd);
|
| + x15 = WRAPLOW(dct_const_round_shift(s13 - s15), bd);
|
|
|
| // stage 4
|
| s2 = (- cospi_16_64) * (x2 + x3);
|
| @@ -2232,47 +2252,47 @@
|
| s14 = (- cospi_16_64) * (x14 + x15);
|
| s15 = cospi_16_64 * (x14 - x15);
|
|
|
| - x2 = WRAPLOW(dct_const_round_shift(s2));
|
| - x3 = WRAPLOW(dct_const_round_shift(s3));
|
| - x6 = WRAPLOW(dct_const_round_shift(s6));
|
| - x7 = WRAPLOW(dct_const_round_shift(s7));
|
| - x10 = WRAPLOW(dct_const_round_shift(s10));
|
| - x11 = WRAPLOW(dct_const_round_shift(s11));
|
| - x14 = WRAPLOW(dct_const_round_shift(s14));
|
| - x15 = WRAPLOW(dct_const_round_shift(s15));
|
| + x2 = WRAPLOW(dct_const_round_shift(s2), bd);
|
| + x3 = WRAPLOW(dct_const_round_shift(s3), bd);
|
| + x6 = WRAPLOW(dct_const_round_shift(s6), bd);
|
| + x7 = WRAPLOW(dct_const_round_shift(s7), bd);
|
| + x10 = WRAPLOW(dct_const_round_shift(s10), bd);
|
| + x11 = WRAPLOW(dct_const_round_shift(s11), bd);
|
| + x14 = WRAPLOW(dct_const_round_shift(s14), bd);
|
| + x15 = WRAPLOW(dct_const_round_shift(s15), bd);
|
|
|
| - output[0] = WRAPLOW(x0);
|
| - output[1] = WRAPLOW(-x8);
|
| - output[2] = WRAPLOW(x12);
|
| - output[3] = WRAPLOW(-x4);
|
| - output[4] = WRAPLOW(x6);
|
| - output[5] = WRAPLOW(x14);
|
| - output[6] = WRAPLOW(x10);
|
| - output[7] = WRAPLOW(x2);
|
| - output[8] = WRAPLOW(x3);
|
| - output[9] = WRAPLOW(x11);
|
| - output[10] = WRAPLOW(x15);
|
| - output[11] = WRAPLOW(x7);
|
| - output[12] = WRAPLOW(x5);
|
| - output[13] = WRAPLOW(-x13);
|
| - output[14] = WRAPLOW(x9);
|
| - output[15] = WRAPLOW(-x1);
|
| + output[0] = WRAPLOW(x0, bd);
|
| + output[1] = WRAPLOW(-x8, bd);
|
| + output[2] = WRAPLOW(x12, bd);
|
| + output[3] = WRAPLOW(-x4, bd);
|
| + output[4] = WRAPLOW(x6, bd);
|
| + output[5] = WRAPLOW(x14, bd);
|
| + output[6] = WRAPLOW(x10, bd);
|
| + output[7] = WRAPLOW(x2, bd);
|
| + output[8] = WRAPLOW(x3, bd);
|
| + output[9] = WRAPLOW(x11, bd);
|
| + output[10] = WRAPLOW(x15, bd);
|
| + output[11] = WRAPLOW(x7, bd);
|
| + output[12] = WRAPLOW(x5, bd);
|
| + output[13] = WRAPLOW(-x13, bd);
|
| + output[14] = WRAPLOW(x9, bd);
|
| + output[15] = WRAPLOW(-x1, bd);
|
| }
|
|
|
| -static const high_transform_2d HIGH_IHT_16[] = {
|
| - { high_idct16, high_idct16 }, // DCT_DCT = 0
|
| - { high_iadst16, high_idct16 }, // ADST_DCT = 1
|
| - { high_idct16, high_iadst16 }, // DCT_ADST = 2
|
| - { high_iadst16, high_iadst16 } // ADST_ADST = 3
|
| +static const highbd_transform_2d HIGH_IHT_16[] = {
|
| + { highbd_idct16, highbd_idct16 }, // DCT_DCT = 0
|
| + { highbd_iadst16, highbd_idct16 }, // ADST_DCT = 1
|
| + { highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2
|
| + { highbd_iadst16, highbd_iadst16 } // ADST_ADST = 3
|
| };
|
|
|
| -void vp9_high_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int tx_type, int bd) {
|
| +void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int tx_type, int bd) {
|
| int i, j;
|
| tran_low_t out[16 * 16];
|
| tran_low_t *outptr = out;
|
| tran_low_t temp_in[16], temp_out[16];
|
| - const high_transform_2d ht = HIGH_IHT_16[tx_type];
|
| + const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
|
|
| // Rows
|
| @@ -2287,14 +2307,15 @@
|
| for (j = 0; j < 16; ++j)
|
| temp_in[j] = out[j * 16 + i];
|
| ht.cols(temp_in, temp_out, bd);
|
| - for (j = 0; j < 16; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + for (j = 0; j < 16; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[16 * 16] = { 0 };
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -2304,7 +2325,7 @@
|
| // First transform rows. Since all non-zero dct coefficients are in
|
| // upper-left 4x4 area, we only need to calculate first 4 rows here.
|
| for (i = 0; i < 4; ++i) {
|
| - high_idct16(input, outptr, bd);
|
| + highbd_idct16(input, outptr, bd);
|
| input += 16;
|
| outptr += 16;
|
| }
|
| @@ -2313,30 +2334,31 @@
|
| for (i = 0; i < 16; ++i) {
|
| for (j = 0; j < 16; ++j)
|
| temp_in[j] = out[j*16 + i];
|
| - high_idct16(temp_in, temp_out, bd);
|
| - for (j = 0; j < 16; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + highbd_idct16(temp_in, temp_out, bd);
|
| + for (j = 0; j < 16; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| int i, j;
|
| tran_high_t a1;
|
| - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
|
|
| - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
| a1 = ROUND_POWER_OF_TWO(out, 6);
|
| for (j = 0; j < 16; ++j) {
|
| for (i = 0; i < 16; ++i)
|
| - dest[i] = clip_pixel_bd_high(dest[i], a1, bd);
|
| + dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
|
| dest += stride;
|
| }
|
| }
|
|
|
| -static void high_idct32(const tran_low_t *input, tran_low_t *output, int bd) {
|
| +static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) {
|
| tran_low_t step1[32], step2[32];
|
| tran_high_t temp1, temp2;
|
| (void) bd;
|
| @@ -2361,43 +2383,43 @@
|
|
|
| temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
|
| temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
|
| - step1[16] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[31] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[16] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[31] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
|
| temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
|
| - step1[17] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[30] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
|
| temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
|
| - step1[18] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[29] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
|
| temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
|
| - step1[19] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[28] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
|
| temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
|
| - step1[20] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[27] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
|
| temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
|
| - step1[21] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[26] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
|
| temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
|
| - step1[22] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[25] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
|
| temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
|
| - step1[23] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[24] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| // stage 2
|
| step2[0] = step1[0];
|
| @@ -2411,40 +2433,40 @@
|
|
|
| temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
|
| temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
|
| - step2[8] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[15] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
|
| temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
|
| - step2[9] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[14] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
|
| temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
|
| - step2[10] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[13] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
|
| temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
|
| - step2[11] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[12] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| - step2[16] = WRAPLOW(step1[16] + step1[17]);
|
| - step2[17] = WRAPLOW(step1[16] - step1[17]);
|
| - step2[18] = WRAPLOW(-step1[18] + step1[19]);
|
| - step2[19] = WRAPLOW(step1[18] + step1[19]);
|
| - step2[20] = WRAPLOW(step1[20] + step1[21]);
|
| - step2[21] = WRAPLOW(step1[20] - step1[21]);
|
| - step2[22] = WRAPLOW(-step1[22] + step1[23]);
|
| - step2[23] = WRAPLOW(step1[22] + step1[23]);
|
| - step2[24] = WRAPLOW(step1[24] + step1[25]);
|
| - step2[25] = WRAPLOW(step1[24] - step1[25]);
|
| - step2[26] = WRAPLOW(-step1[26] + step1[27]);
|
| - step2[27] = WRAPLOW(step1[26] + step1[27]);
|
| - step2[28] = WRAPLOW(step1[28] + step1[29]);
|
| - step2[29] = WRAPLOW(step1[28] - step1[29]);
|
| - step2[30] = WRAPLOW(-step1[30] + step1[31]);
|
| - step2[31] = WRAPLOW(step1[30] + step1[31]);
|
| + step2[16] = WRAPLOW(step1[16] + step1[17], bd);
|
| + step2[17] = WRAPLOW(step1[16] - step1[17], bd);
|
| + step2[18] = WRAPLOW(-step1[18] + step1[19], bd);
|
| + step2[19] = WRAPLOW(step1[18] + step1[19], bd);
|
| + step2[20] = WRAPLOW(step1[20] + step1[21], bd);
|
| + step2[21] = WRAPLOW(step1[20] - step1[21], bd);
|
| + step2[22] = WRAPLOW(-step1[22] + step1[23], bd);
|
| + step2[23] = WRAPLOW(step1[22] + step1[23], bd);
|
| + step2[24] = WRAPLOW(step1[24] + step1[25], bd);
|
| + step2[25] = WRAPLOW(step1[24] - step1[25], bd);
|
| + step2[26] = WRAPLOW(-step1[26] + step1[27], bd);
|
| + step2[27] = WRAPLOW(step1[26] + step1[27], bd);
|
| + step2[28] = WRAPLOW(step1[28] + step1[29], bd);
|
| + step2[29] = WRAPLOW(step1[28] - step1[29], bd);
|
| + step2[30] = WRAPLOW(-step1[30] + step1[31], bd);
|
| + step2[31] = WRAPLOW(step1[30] + step1[31], bd);
|
|
|
| // stage 3
|
| step1[0] = step2[0];
|
| @@ -2454,42 +2476,42 @@
|
|
|
| temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
|
| temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
|
| - step1[4] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[7] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
|
| temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
|
| - step1[5] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[6] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
|
|
| - step1[8] = WRAPLOW(step2[8] + step2[9]);
|
| - step1[9] = WRAPLOW(step2[8] - step2[9]);
|
| - step1[10] = WRAPLOW(-step2[10] + step2[11]);
|
| - step1[11] = WRAPLOW(step2[10] + step2[11]);
|
| - step1[12] = WRAPLOW(step2[12] + step2[13]);
|
| - step1[13] = WRAPLOW(step2[12] - step2[13]);
|
| - step1[14] = WRAPLOW(-step2[14] + step2[15]);
|
| - step1[15] = WRAPLOW(step2[14] + step2[15]);
|
| + step1[8] = WRAPLOW(step2[8] + step2[9], bd);
|
| + step1[9] = WRAPLOW(step2[8] - step2[9], bd);
|
| + step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
|
| + step1[11] = WRAPLOW(step2[10] + step2[11], bd);
|
| + step1[12] = WRAPLOW(step2[12] + step2[13], bd);
|
| + step1[13] = WRAPLOW(step2[12] - step2[13], bd);
|
| + step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
|
| + step1[15] = WRAPLOW(step2[14] + step2[15], bd);
|
|
|
| step1[16] = step2[16];
|
| step1[31] = step2[31];
|
| temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
|
| temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
|
| - step1[17] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[30] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
|
| temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
|
| - step1[18] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[29] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[19] = step2[19];
|
| step1[20] = step2[20];
|
| temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
|
| temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
|
| - step1[21] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[26] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
|
| temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
|
| - step1[22] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[25] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[23] = step2[23];
|
| step1[24] = step2[24];
|
| step1[27] = step2[27];
|
| @@ -2498,87 +2520,87 @@
|
| // stage 4
|
| temp1 = (step1[0] + step1[1]) * cospi_16_64;
|
| temp2 = (step1[0] - step1[1]) * cospi_16_64;
|
| - step2[0] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[1] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
|
| temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
|
| - step2[2] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[3] = WRAPLOW(dct_const_round_shift(temp2));
|
| - step2[4] = WRAPLOW(step1[4] + step1[5]);
|
| - step2[5] = WRAPLOW(step1[4] - step1[5]);
|
| - step2[6] = WRAPLOW(-step1[6] + step1[7]);
|
| - step2[7] = WRAPLOW(step1[6] + step1[7]);
|
| + step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| + step2[4] = WRAPLOW(step1[4] + step1[5], bd);
|
| + step2[5] = WRAPLOW(step1[4] - step1[5], bd);
|
| + step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
|
| + step2[7] = WRAPLOW(step1[6] + step1[7], bd);
|
|
|
| step2[8] = step1[8];
|
| step2[15] = step1[15];
|
| temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
|
| temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
|
| - step2[9] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[14] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
|
| temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
|
| - step2[10] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[13] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step2[11] = step1[11];
|
| step2[12] = step1[12];
|
|
|
| - step2[16] = WRAPLOW(step1[16] + step1[19]);
|
| - step2[17] = WRAPLOW(step1[17] + step1[18]);
|
| - step2[18] = WRAPLOW(step1[17] - step1[18]);
|
| - step2[19] = WRAPLOW(step1[16] - step1[19]);
|
| - step2[20] = WRAPLOW(-step1[20] + step1[23]);
|
| - step2[21] = WRAPLOW(-step1[21] + step1[22]);
|
| - step2[22] = WRAPLOW(step1[21] + step1[22]);
|
| - step2[23] = WRAPLOW(step1[20] + step1[23]);
|
| + step2[16] = WRAPLOW(step1[16] + step1[19], bd);
|
| + step2[17] = WRAPLOW(step1[17] + step1[18], bd);
|
| + step2[18] = WRAPLOW(step1[17] - step1[18], bd);
|
| + step2[19] = WRAPLOW(step1[16] - step1[19], bd);
|
| + step2[20] = WRAPLOW(-step1[20] + step1[23], bd);
|
| + step2[21] = WRAPLOW(-step1[21] + step1[22], bd);
|
| + step2[22] = WRAPLOW(step1[21] + step1[22], bd);
|
| + step2[23] = WRAPLOW(step1[20] + step1[23], bd);
|
|
|
| - step2[24] = WRAPLOW(step1[24] + step1[27]);
|
| - step2[25] = WRAPLOW(step1[25] + step1[26]);
|
| - step2[26] = WRAPLOW(step1[25] - step1[26]);
|
| - step2[27] = WRAPLOW(step1[24] - step1[27]);
|
| - step2[28] = WRAPLOW(-step1[28] + step1[31]);
|
| - step2[29] = WRAPLOW(-step1[29] + step1[30]);
|
| - step2[30] = WRAPLOW(step1[29] + step1[30]);
|
| - step2[31] = WRAPLOW(step1[28] + step1[31]);
|
| + step2[24] = WRAPLOW(step1[24] + step1[27], bd);
|
| + step2[25] = WRAPLOW(step1[25] + step1[26], bd);
|
| + step2[26] = WRAPLOW(step1[25] - step1[26], bd);
|
| + step2[27] = WRAPLOW(step1[24] - step1[27], bd);
|
| + step2[28] = WRAPLOW(-step1[28] + step1[31], bd);
|
| + step2[29] = WRAPLOW(-step1[29] + step1[30], bd);
|
| + step2[30] = WRAPLOW(step1[29] + step1[30], bd);
|
| + step2[31] = WRAPLOW(step1[28] + step1[31], bd);
|
|
|
| // stage 5
|
| - step1[0] = WRAPLOW(step2[0] + step2[3]);
|
| - step1[1] = WRAPLOW(step2[1] + step2[2]);
|
| - step1[2] = WRAPLOW(step2[1] - step2[2]);
|
| - step1[3] = WRAPLOW(step2[0] - step2[3]);
|
| + step1[0] = WRAPLOW(step2[0] + step2[3], bd);
|
| + step1[1] = WRAPLOW(step2[1] + step2[2], bd);
|
| + step1[2] = WRAPLOW(step2[1] - step2[2], bd);
|
| + step1[3] = WRAPLOW(step2[0] - step2[3], bd);
|
| step1[4] = step2[4];
|
| temp1 = (step2[6] - step2[5]) * cospi_16_64;
|
| temp2 = (step2[5] + step2[6]) * cospi_16_64;
|
| - step1[5] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[6] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[7] = step2[7];
|
|
|
| - step1[8] = WRAPLOW(step2[8] + step2[11]);
|
| - step1[9] = WRAPLOW(step2[9] + step2[10]);
|
| - step1[10] = WRAPLOW(step2[9] - step2[10]);
|
| - step1[11] = WRAPLOW(step2[8] - step2[11]);
|
| - step1[12] = WRAPLOW(-step2[12] + step2[15]);
|
| - step1[13] = WRAPLOW(-step2[13] + step2[14]);
|
| - step1[14] = WRAPLOW(step2[13] + step2[14]);
|
| - step1[15] = WRAPLOW(step2[12] + step2[15]);
|
| + step1[8] = WRAPLOW(step2[8] + step2[11], bd);
|
| + step1[9] = WRAPLOW(step2[9] + step2[10], bd);
|
| + step1[10] = WRAPLOW(step2[9] - step2[10], bd);
|
| + step1[11] = WRAPLOW(step2[8] - step2[11], bd);
|
| + step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
|
| + step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
|
| + step1[14] = WRAPLOW(step2[13] + step2[14], bd);
|
| + step1[15] = WRAPLOW(step2[12] + step2[15], bd);
|
|
|
| step1[16] = step2[16];
|
| step1[17] = step2[17];
|
| temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
|
| temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
|
| - step1[18] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[29] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
|
| temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
|
| - step1[19] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[28] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
|
| temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
|
| - step1[20] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[27] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
|
| temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
|
| - step1[21] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[26] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[22] = step2[22];
|
| step1[23] = step2[23];
|
| step1[24] = step2[24];
|
| @@ -2587,62 +2609,62 @@
|
| step1[31] = step2[31];
|
|
|
| // stage 6
|
| - step2[0] = WRAPLOW(step1[0] + step1[7]);
|
| - step2[1] = WRAPLOW(step1[1] + step1[6]);
|
| - step2[2] = WRAPLOW(step1[2] + step1[5]);
|
| - step2[3] = WRAPLOW(step1[3] + step1[4]);
|
| - step2[4] = WRAPLOW(step1[3] - step1[4]);
|
| - step2[5] = WRAPLOW(step1[2] - step1[5]);
|
| - step2[6] = WRAPLOW(step1[1] - step1[6]);
|
| - step2[7] = WRAPLOW(step1[0] - step1[7]);
|
| + step2[0] = WRAPLOW(step1[0] + step1[7], bd);
|
| + step2[1] = WRAPLOW(step1[1] + step1[6], bd);
|
| + step2[2] = WRAPLOW(step1[2] + step1[5], bd);
|
| + step2[3] = WRAPLOW(step1[3] + step1[4], bd);
|
| + step2[4] = WRAPLOW(step1[3] - step1[4], bd);
|
| + step2[5] = WRAPLOW(step1[2] - step1[5], bd);
|
| + step2[6] = WRAPLOW(step1[1] - step1[6], bd);
|
| + step2[7] = WRAPLOW(step1[0] - step1[7], bd);
|
| step2[8] = step1[8];
|
| step2[9] = step1[9];
|
| temp1 = (-step1[10] + step1[13]) * cospi_16_64;
|
| temp2 = (step1[10] + step1[13]) * cospi_16_64;
|
| - step2[10] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[13] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = (-step1[11] + step1[12]) * cospi_16_64;
|
| temp2 = (step1[11] + step1[12]) * cospi_16_64;
|
| - step2[11] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step2[12] = WRAPLOW(dct_const_round_shift(temp2));
|
| - step2[14] = WRAPLOW(step1[14]);
|
| - step2[15] = WRAPLOW(step1[15]);
|
| + step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| + step2[14] = step1[14];
|
| + step2[15] = step1[15];
|
|
|
| - step2[16] = WRAPLOW(step1[16] + step1[23]);
|
| - step2[17] = WRAPLOW(step1[17] + step1[22]);
|
| - step2[18] = WRAPLOW(step1[18] + step1[21]);
|
| - step2[19] = WRAPLOW(step1[19] + step1[20]);
|
| - step2[20] = WRAPLOW(step1[19] - step1[20]);
|
| - step2[21] = WRAPLOW(step1[18] - step1[21]);
|
| - step2[22] = WRAPLOW(step1[17] - step1[22]);
|
| - step2[23] = WRAPLOW(step1[16] - step1[23]);
|
| + step2[16] = WRAPLOW(step1[16] + step1[23], bd);
|
| + step2[17] = WRAPLOW(step1[17] + step1[22], bd);
|
| + step2[18] = WRAPLOW(step1[18] + step1[21], bd);
|
| + step2[19] = WRAPLOW(step1[19] + step1[20], bd);
|
| + step2[20] = WRAPLOW(step1[19] - step1[20], bd);
|
| + step2[21] = WRAPLOW(step1[18] - step1[21], bd);
|
| + step2[22] = WRAPLOW(step1[17] - step1[22], bd);
|
| + step2[23] = WRAPLOW(step1[16] - step1[23], bd);
|
|
|
| - step2[24] = WRAPLOW(-step1[24] + step1[31]);
|
| - step2[25] = WRAPLOW(-step1[25] + step1[30]);
|
| - step2[26] = WRAPLOW(-step1[26] + step1[29]);
|
| - step2[27] = WRAPLOW(-step1[27] + step1[28]);
|
| - step2[28] = WRAPLOW(step1[27] + step1[28]);
|
| - step2[29] = WRAPLOW(step1[26] + step1[29]);
|
| - step2[30] = WRAPLOW(step1[25] + step1[30]);
|
| - step2[31] = WRAPLOW(step1[24] + step1[31]);
|
| + step2[24] = WRAPLOW(-step1[24] + step1[31], bd);
|
| + step2[25] = WRAPLOW(-step1[25] + step1[30], bd);
|
| + step2[26] = WRAPLOW(-step1[26] + step1[29], bd);
|
| + step2[27] = WRAPLOW(-step1[27] + step1[28], bd);
|
| + step2[28] = WRAPLOW(step1[27] + step1[28], bd);
|
| + step2[29] = WRAPLOW(step1[26] + step1[29], bd);
|
| + step2[30] = WRAPLOW(step1[25] + step1[30], bd);
|
| + step2[31] = WRAPLOW(step1[24] + step1[31], bd);
|
|
|
| // stage 7
|
| - step1[0] = WRAPLOW(step2[0] + step2[15]);
|
| - step1[1] = WRAPLOW(step2[1] + step2[14]);
|
| - step1[2] = WRAPLOW(step2[2] + step2[13]);
|
| - step1[3] = WRAPLOW(step2[3] + step2[12]);
|
| - step1[4] = WRAPLOW(step2[4] + step2[11]);
|
| - step1[5] = WRAPLOW(step2[5] + step2[10]);
|
| - step1[6] = WRAPLOW(step2[6] + step2[9]);
|
| - step1[7] = WRAPLOW(step2[7] + step2[8]);
|
| - step1[8] = WRAPLOW(step2[7] - step2[8]);
|
| - step1[9] = WRAPLOW(step2[6] - step2[9]);
|
| - step1[10] = WRAPLOW(step2[5] - step2[10]);
|
| - step1[11] = WRAPLOW(step2[4] - step2[11]);
|
| - step1[12] = WRAPLOW(step2[3] - step2[12]);
|
| - step1[13] = WRAPLOW(step2[2] - step2[13]);
|
| - step1[14] = WRAPLOW(step2[1] - step2[14]);
|
| - step1[15] = WRAPLOW(step2[0] - step2[15]);
|
| + step1[0] = WRAPLOW(step2[0] + step2[15], bd);
|
| + step1[1] = WRAPLOW(step2[1] + step2[14], bd);
|
| + step1[2] = WRAPLOW(step2[2] + step2[13], bd);
|
| + step1[3] = WRAPLOW(step2[3] + step2[12], bd);
|
| + step1[4] = WRAPLOW(step2[4] + step2[11], bd);
|
| + step1[5] = WRAPLOW(step2[5] + step2[10], bd);
|
| + step1[6] = WRAPLOW(step2[6] + step2[9], bd);
|
| + step1[7] = WRAPLOW(step2[7] + step2[8], bd);
|
| + step1[8] = WRAPLOW(step2[7] - step2[8], bd);
|
| + step1[9] = WRAPLOW(step2[6] - step2[9], bd);
|
| + step1[10] = WRAPLOW(step2[5] - step2[10], bd);
|
| + step1[11] = WRAPLOW(step2[4] - step2[11], bd);
|
| + step1[12] = WRAPLOW(step2[3] - step2[12], bd);
|
| + step1[13] = WRAPLOW(step2[2] - step2[13], bd);
|
| + step1[14] = WRAPLOW(step2[1] - step2[14], bd);
|
| + step1[15] = WRAPLOW(step2[0] - step2[15], bd);
|
|
|
| step1[16] = step2[16];
|
| step1[17] = step2[17];
|
| @@ -2650,20 +2672,20 @@
|
| step1[19] = step2[19];
|
| temp1 = (-step2[20] + step2[27]) * cospi_16_64;
|
| temp2 = (step2[20] + step2[27]) * cospi_16_64;
|
| - step1[20] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[27] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = (-step2[21] + step2[26]) * cospi_16_64;
|
| temp2 = (step2[21] + step2[26]) * cospi_16_64;
|
| - step1[21] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[26] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = (-step2[22] + step2[25]) * cospi_16_64;
|
| temp2 = (step2[22] + step2[25]) * cospi_16_64;
|
| - step1[22] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[25] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| temp1 = (-step2[23] + step2[24]) * cospi_16_64;
|
| temp2 = (step2[23] + step2[24]) * cospi_16_64;
|
| - step1[23] = WRAPLOW(dct_const_round_shift(temp1));
|
| - step1[24] = WRAPLOW(dct_const_round_shift(temp2));
|
| + step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd);
|
| + step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd);
|
| step1[28] = step2[28];
|
| step1[29] = step2[29];
|
| step1[30] = step2[30];
|
| @@ -2670,42 +2692,42 @@
|
| step1[31] = step2[31];
|
|
|
| // final stage
|
| - output[0] = WRAPLOW(step1[0] + step1[31]);
|
| - output[1] = WRAPLOW(step1[1] + step1[30]);
|
| - output[2] = WRAPLOW(step1[2] + step1[29]);
|
| - output[3] = WRAPLOW(step1[3] + step1[28]);
|
| - output[4] = WRAPLOW(step1[4] + step1[27]);
|
| - output[5] = WRAPLOW(step1[5] + step1[26]);
|
| - output[6] = WRAPLOW(step1[6] + step1[25]);
|
| - output[7] = WRAPLOW(step1[7] + step1[24]);
|
| - output[8] = WRAPLOW(step1[8] + step1[23]);
|
| - output[9] = WRAPLOW(step1[9] + step1[22]);
|
| - output[10] = WRAPLOW(step1[10] + step1[21]);
|
| - output[11] = WRAPLOW(step1[11] + step1[20]);
|
| - output[12] = WRAPLOW(step1[12] + step1[19]);
|
| - output[13] = WRAPLOW(step1[13] + step1[18]);
|
| - output[14] = WRAPLOW(step1[14] + step1[17]);
|
| - output[15] = WRAPLOW(step1[15] + step1[16]);
|
| - output[16] = WRAPLOW(step1[15] - step1[16]);
|
| - output[17] = WRAPLOW(step1[14] - step1[17]);
|
| - output[18] = WRAPLOW(step1[13] - step1[18]);
|
| - output[19] = WRAPLOW(step1[12] - step1[19]);
|
| - output[20] = WRAPLOW(step1[11] - step1[20]);
|
| - output[21] = WRAPLOW(step1[10] - step1[21]);
|
| - output[22] = WRAPLOW(step1[9] - step1[22]);
|
| - output[23] = WRAPLOW(step1[8] - step1[23]);
|
| - output[24] = WRAPLOW(step1[7] - step1[24]);
|
| - output[25] = WRAPLOW(step1[6] - step1[25]);
|
| - output[26] = WRAPLOW(step1[5] - step1[26]);
|
| - output[27] = WRAPLOW(step1[4] - step1[27]);
|
| - output[28] = WRAPLOW(step1[3] - step1[28]);
|
| - output[29] = WRAPLOW(step1[2] - step1[29]);
|
| - output[30] = WRAPLOW(step1[1] - step1[30]);
|
| - output[31] = WRAPLOW(step1[0] - step1[31]);
|
| + output[0] = WRAPLOW(step1[0] + step1[31], bd);
|
| + output[1] = WRAPLOW(step1[1] + step1[30], bd);
|
| + output[2] = WRAPLOW(step1[2] + step1[29], bd);
|
| + output[3] = WRAPLOW(step1[3] + step1[28], bd);
|
| + output[4] = WRAPLOW(step1[4] + step1[27], bd);
|
| + output[5] = WRAPLOW(step1[5] + step1[26], bd);
|
| + output[6] = WRAPLOW(step1[6] + step1[25], bd);
|
| + output[7] = WRAPLOW(step1[7] + step1[24], bd);
|
| + output[8] = WRAPLOW(step1[8] + step1[23], bd);
|
| + output[9] = WRAPLOW(step1[9] + step1[22], bd);
|
| + output[10] = WRAPLOW(step1[10] + step1[21], bd);
|
| + output[11] = WRAPLOW(step1[11] + step1[20], bd);
|
| + output[12] = WRAPLOW(step1[12] + step1[19], bd);
|
| + output[13] = WRAPLOW(step1[13] + step1[18], bd);
|
| + output[14] = WRAPLOW(step1[14] + step1[17], bd);
|
| + output[15] = WRAPLOW(step1[15] + step1[16], bd);
|
| + output[16] = WRAPLOW(step1[15] - step1[16], bd);
|
| + output[17] = WRAPLOW(step1[14] - step1[17], bd);
|
| + output[18] = WRAPLOW(step1[13] - step1[18], bd);
|
| + output[19] = WRAPLOW(step1[12] - step1[19], bd);
|
| + output[20] = WRAPLOW(step1[11] - step1[20], bd);
|
| + output[21] = WRAPLOW(step1[10] - step1[21], bd);
|
| + output[22] = WRAPLOW(step1[9] - step1[22], bd);
|
| + output[23] = WRAPLOW(step1[8] - step1[23], bd);
|
| + output[24] = WRAPLOW(step1[7] - step1[24], bd);
|
| + output[25] = WRAPLOW(step1[6] - step1[25], bd);
|
| + output[26] = WRAPLOW(step1[5] - step1[26], bd);
|
| + output[27] = WRAPLOW(step1[4] - step1[27], bd);
|
| + output[28] = WRAPLOW(step1[3] - step1[28], bd);
|
| + output[29] = WRAPLOW(step1[2] - step1[29], bd);
|
| + output[30] = WRAPLOW(step1[1] - step1[30], bd);
|
| + output[31] = WRAPLOW(step1[0] - step1[31], bd);
|
| }
|
|
|
| -void vp9_high_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[32 * 32];
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -2725,7 +2747,7 @@
|
| zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
|
|
| if (zero_coeff[0] | zero_coeff[1])
|
| - high_idct32(input, outptr, bd);
|
| + highbd_idct32(input, outptr, bd);
|
| else
|
| vpx_memset(outptr, 0, sizeof(tran_low_t) * 32);
|
| input += 32;
|
| @@ -2736,15 +2758,16 @@
|
| for (i = 0; i < 32; ++i) {
|
| for (j = 0; j < 32; ++j)
|
| temp_in[j] = out[j * 32 + i];
|
| - high_idct32(temp_in, temp_out, bd);
|
| - for (j = 0; j < 32; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + highbd_idct32(temp_in, temp_out, bd);
|
| + for (j = 0; j < 32; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| tran_low_t out[32 * 32] = {0};
|
| tran_low_t *outptr = out;
|
| int i, j;
|
| @@ -2754,7 +2777,7 @@
|
| // Rows
|
| // Only upper-left 8x8 has non-zero coeff.
|
| for (i = 0; i < 8; ++i) {
|
| - high_idct32(input, outptr, bd);
|
| + highbd_idct32(input, outptr, bd);
|
| input += 32;
|
| outptr += 32;
|
| }
|
| @@ -2762,50 +2785,51 @@
|
| for (i = 0; i < 32; ++i) {
|
| for (j = 0; j < 32; ++j)
|
| temp_in[j] = out[j * 32 + i];
|
| - high_idct32(temp_in, temp_out, bd);
|
| - for (j = 0; j < 32; ++j)
|
| - dest[j * stride + i] = clip_pixel_bd_high(
|
| + highbd_idct32(temp_in, temp_out, bd);
|
| + for (j = 0; j < 32; ++j) {
|
| + dest[j * stride + i] = highbd_clip_pixel_add(
|
| dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
|
| + }
|
| }
|
| }
|
|
|
| -void vp9_high_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| - int stride, int bd) {
|
| +void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
|
| + int stride, int bd) {
|
| int i, j;
|
| int a1;
|
| uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
|
|
|
| - tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64));
|
| - out = WRAPLOW(dct_const_round_shift(out * cospi_16_64));
|
| + tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd);
|
| + out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd);
|
| a1 = ROUND_POWER_OF_TWO(out, 6);
|
|
|
| for (j = 0; j < 32; ++j) {
|
| for (i = 0; i < 32; ++i)
|
| - dest[i] = clip_pixel_bd_high(dest[i], a1, bd);
|
| + dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
|
| dest += stride;
|
| }
|
| }
|
|
|
| // idct
|
| -void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| - int eob, int bd) {
|
| +void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd) {
|
| if (eob > 1)
|
| - vp9_high_idct4x4_16_add(input, dest, stride, bd);
|
| + vp9_highbd_idct4x4_16_add(input, dest, stride, bd);
|
| else
|
| - vp9_high_idct4x4_1_add(input, dest, stride, bd);
|
| + vp9_highbd_idct4x4_1_add(input, dest, stride, bd);
|
| }
|
|
|
|
|
| -void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| - int eob, int bd) {
|
| +void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd) {
|
| if (eob > 1)
|
| - vp9_high_iwht4x4_16_add(input, dest, stride, bd);
|
| + vp9_highbd_iwht4x4_16_add(input, dest, stride, bd);
|
| else
|
| - vp9_high_iwht4x4_1_add(input, dest, stride, bd);
|
| + vp9_highbd_iwht4x4_1_add(input, dest, stride, bd);
|
| }
|
|
|
| -void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| - int eob, int bd) {
|
| +void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd) {
|
| // If dc is 1, then input[0] is the reconstructed value, do not need
|
| // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
|
|
|
| @@ -2815,64 +2839,64 @@
|
| // Combine that with code here.
|
| // DC only DCT coefficient
|
| if (eob == 1) {
|
| - vp9_high_idct8x8_1_add(input, dest, stride, bd);
|
| + vp9_highbd_idct8x8_1_add(input, dest, stride, bd);
|
| } else if (eob <= 10) {
|
| - vp9_high_idct8x8_10_add(input, dest, stride, bd);
|
| + vp9_highbd_idct8x8_10_add(input, dest, stride, bd);
|
| } else {
|
| - vp9_high_idct8x8_64_add(input, dest, stride, bd);
|
| + vp9_highbd_idct8x8_64_add(input, dest, stride, bd);
|
| }
|
| }
|
|
|
| -void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| - int eob, int bd) {
|
| +void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
|
| + int stride, int eob, int bd) {
|
| // The calculation can be simplified if there are not many non-zero dct
|
| // coefficients. Use eobs to separate different cases.
|
| // DC only DCT coefficient.
|
| if (eob == 1) {
|
| - vp9_high_idct16x16_1_add(input, dest, stride, bd);
|
| + vp9_highbd_idct16x16_1_add(input, dest, stride, bd);
|
| } else if (eob <= 10) {
|
| - vp9_high_idct16x16_10_add(input, dest, stride, bd);
|
| + vp9_highbd_idct16x16_10_add(input, dest, stride, bd);
|
| } else {
|
| - vp9_high_idct16x16_256_add(input, dest, stride, bd);
|
| + vp9_highbd_idct16x16_256_add(input, dest, stride, bd);
|
| }
|
| }
|
|
|
| -void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| - int eob, int bd) {
|
| +void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
|
| + int stride, int eob, int bd) {
|
| // Non-zero coeff only in upper-left 8x8
|
| if (eob == 1) {
|
| - vp9_high_idct32x32_1_add(input, dest, stride, bd);
|
| + vp9_highbd_idct32x32_1_add(input, dest, stride, bd);
|
| } else if (eob <= 34) {
|
| - vp9_high_idct32x32_34_add(input, dest, stride, bd);
|
| + vp9_highbd_idct32x32_34_add(input, dest, stride, bd);
|
| } else {
|
| - vp9_high_idct32x32_1024_add(input, dest, stride, bd);
|
| + vp9_highbd_idct32x32_1024_add(input, dest, stride, bd);
|
| }
|
| }
|
|
|
| // iht
|
| -void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
|
| - uint8_t *dest, int stride, int eob, int bd) {
|
| +void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
|
| + uint8_t *dest, int stride, int eob, int bd) {
|
| if (tx_type == DCT_DCT)
|
| - vp9_high_idct4x4_add(input, dest, stride, eob, bd);
|
| + vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
|
| else
|
| - vp9_high_iht4x4_16_add(input, dest, stride, tx_type, bd);
|
| + vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
|
| }
|
|
|
| -void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
| - uint8_t *dest, int stride, int eob, int bd) {
|
| +void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
| + uint8_t *dest, int stride, int eob, int bd) {
|
| if (tx_type == DCT_DCT) {
|
| - vp9_high_idct8x8_add(input, dest, stride, eob, bd);
|
| + vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
|
| } else {
|
| - vp9_high_iht8x8_64_add(input, dest, stride, tx_type, bd);
|
| + vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
|
| }
|
| }
|
|
|
| -void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
| +void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
| uint8_t *dest, int stride, int eob, int bd) {
|
| if (tx_type == DCT_DCT) {
|
| - vp9_high_idct16x16_add(input, dest, stride, eob, bd);
|
| + vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
|
| } else {
|
| - vp9_high_iht16x16_256_add(input, dest, stride, tx_type, bd);
|
| + vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
|
| }
|
| }
|
| #endif // CONFIG_VP9_HIGHBITDEPTH
|
|
|