| Index: source/libvpx/vp9/common/vp9_idct.c
|
| ===================================================================
|
| --- source/libvpx/vp9/common/vp9_idct.c (revision 232232)
|
| +++ source/libvpx/vp9/common/vp9_idct.c (working copy)
|
| @@ -18,20 +18,20 @@
|
| #include "vp9/common/vp9_common.h"
|
| #include "vp9/common/vp9_idct.h"
|
|
|
| -void vp9_short_iwalsh4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds,
|
| 0.5 shifts per pixel. */
|
| int i;
|
| int16_t output[16];
|
| int a1, b1, c1, d1, e1;
|
| - int16_t *ip = input;
|
| + const int16_t *ip = input;
|
| int16_t *op = output;
|
|
|
| for (i = 0; i < 4; i++) {
|
| - a1 = ip[0] >> WHT_UPSCALE_FACTOR;
|
| - c1 = ip[1] >> WHT_UPSCALE_FACTOR;
|
| - d1 = ip[2] >> WHT_UPSCALE_FACTOR;
|
| - b1 = ip[3] >> WHT_UPSCALE_FACTOR;
|
| + a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
| + c1 = ip[1] >> UNIT_QUANT_SHIFT;
|
| + d1 = ip[2] >> UNIT_QUANT_SHIFT;
|
| + b1 = ip[3] >> UNIT_QUANT_SHIFT;
|
| a1 += c1;
|
| d1 -= b1;
|
| e1 = (a1 - d1) >> 1;
|
| @@ -60,24 +60,24 @@
|
| c1 = e1 - c1;
|
| a1 -= b1;
|
| d1 += c1;
|
| - dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1);
|
| - dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + b1);
|
| - dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + c1);
|
| - dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + d1);
|
| + dest[stride * 0] = clip_pixel(dest[stride * 0] + a1);
|
| + dest[stride * 1] = clip_pixel(dest[stride * 1] + b1);
|
| + dest[stride * 2] = clip_pixel(dest[stride * 2] + c1);
|
| + dest[stride * 3] = clip_pixel(dest[stride * 3] + d1);
|
|
|
| ip++;
|
| dest++;
|
| }
|
| }
|
|
|
| -void vp9_short_iwalsh4x4_1_add_c(int16_t *in, uint8_t *dest, int dest_stride) {
|
| +void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) {
|
| int i;
|
| int a1, e1;
|
| int16_t tmp[4];
|
| - int16_t *ip = in;
|
| + const int16_t *ip = in;
|
| int16_t *op = tmp;
|
|
|
| - a1 = ip[0] >> WHT_UPSCALE_FACTOR;
|
| + a1 = ip[0] >> UNIT_QUANT_SHIFT;
|
| e1 = a1 >> 1;
|
| a1 -= e1;
|
| op[0] = a1;
|
| @@ -96,7 +96,7 @@
|
| }
|
| }
|
|
|
| -void vp9_idct4_1d_c(int16_t *input, int16_t *output) {
|
| +static void idct4_1d(const int16_t *input, int16_t *output) {
|
| int16_t step[4];
|
| int temp1, temp2;
|
| // stage 1
|
| @@ -116,7 +116,7 @@
|
| output[3] = step[0] - step[3];
|
| }
|
|
|
| -void vp9_short_idct4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int16_t out[4 * 4];
|
| int16_t *outptr = out;
|
| int i, j;
|
| @@ -124,7 +124,7 @@
|
|
|
| // Rows
|
| for (i = 0; i < 4; ++i) {
|
| - vp9_idct4_1d(input, outptr);
|
| + idct4_1d(input, outptr);
|
| input += 4;
|
| outptr += 4;
|
| }
|
| @@ -133,14 +133,14 @@
|
| for (i = 0; i < 4; ++i) {
|
| for (j = 0; j < 4; ++j)
|
| temp_in[j] = out[j * 4 + i];
|
| - vp9_idct4_1d(temp_in, temp_out);
|
| + idct4_1d(temp_in, temp_out);
|
| for (j = 0; j < 4; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
|
|
| -void vp9_short_idct4x4_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) {
|
| int i;
|
| int a1;
|
| int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| @@ -156,7 +156,7 @@
|
| }
|
| }
|
|
|
| -static void idct8_1d(int16_t *input, int16_t *output) {
|
| +static void idct8_1d(const int16_t *input, int16_t *output) {
|
| int16_t step1[8], step2[8];
|
| int temp1, temp2;
|
| // stage 1
|
| @@ -174,7 +174,7 @@
|
| step1[6] = dct_const_round_shift(temp2);
|
|
|
| // stage 2 & stage 3 - even half
|
| - vp9_idct4_1d(step1, step1);
|
| + idct4_1d(step1, step1);
|
|
|
| // stage 2 - odd half
|
| step2[4] = step1[4] + step1[5];
|
| @@ -201,7 +201,7 @@
|
| output[7] = step1[0] - step1[7];
|
| }
|
|
|
| -void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int16_t out[8 * 8];
|
| int16_t *outptr = out;
|
| int i, j;
|
| @@ -220,12 +220,12 @@
|
| temp_in[j] = out[j * 8 + i];
|
| idct8_1d(temp_in, temp_out);
|
| for (j = 0; j < 8; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
|
|
| -void vp9_short_idct8x8_1_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int i, j;
|
| int a1;
|
| int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| @@ -234,11 +234,11 @@
|
| for (j = 0; j < 8; ++j) {
|
| for (i = 0; i < 8; ++i)
|
| dest[i] = clip_pixel(dest[i] + a1);
|
| - dest += dest_stride;
|
| + dest += stride;
|
| }
|
| }
|
|
|
| -static void iadst4_1d(int16_t *input, int16_t *output) {
|
| +static void iadst4_1d(const int16_t *input, int16_t *output) {
|
| int s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| int x0 = input[0];
|
| @@ -280,13 +280,13 @@
|
| output[3] = dct_const_round_shift(s3);
|
| }
|
|
|
| -void vp9_short_iht4x4_add_c(int16_t *input, uint8_t *dest, int dest_stride,
|
| - int tx_type) {
|
| +void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride,
|
| + int tx_type) {
|
| const transform_2d IHT_4[] = {
|
| - { vp9_idct4_1d, vp9_idct4_1d }, // DCT_DCT = 0
|
| - { iadst4_1d, vp9_idct4_1d }, // ADST_DCT = 1
|
| - { vp9_idct4_1d, iadst4_1d }, // DCT_ADST = 2
|
| - { iadst4_1d, iadst4_1d } // ADST_ADST = 3
|
| + { idct4_1d, idct4_1d }, // DCT_DCT = 0
|
| + { iadst4_1d, idct4_1d }, // ADST_DCT = 1
|
| + { idct4_1d, iadst4_1d }, // DCT_ADST = 2
|
| + { iadst4_1d, iadst4_1d } // ADST_ADST = 3
|
| };
|
|
|
| int i, j;
|
| @@ -307,11 +307,11 @@
|
| temp_in[j] = out[j * 4 + i];
|
| IHT_4[tx_type].cols(temp_in, temp_out);
|
| for (j = 0; j < 4; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
| -static void iadst8_1d(int16_t *input, int16_t *output) {
|
| +static void iadst8_1d(const int16_t *input, int16_t *output) {
|
| int s0, s1, s2, s3, s4, s5, s6, s7;
|
|
|
| int x0 = input[7];
|
| @@ -395,8 +395,8 @@
|
| { iadst8_1d, iadst8_1d } // ADST_ADST = 3
|
| };
|
|
|
| -void vp9_short_iht8x8_add_c(int16_t *input, uint8_t *dest, int dest_stride,
|
| - int tx_type) {
|
| +void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride,
|
| + int tx_type) {
|
| int i, j;
|
| int16_t out[8 * 8];
|
| int16_t *outptr = out;
|
| @@ -416,12 +416,12 @@
|
| temp_in[j] = out[j * 8 + i];
|
| ht.cols(temp_in, temp_out);
|
| for (j = 0; j < 8; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * dest_stride + i]); }
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| + + dest[j * stride + i]);
|
| + }
|
| }
|
|
|
| -void vp9_short_idct10_8x8_add_c(int16_t *input, uint8_t *dest,
|
| - int dest_stride) {
|
| +void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int16_t out[8 * 8] = { 0 };
|
| int16_t *outptr = out;
|
| int i, j;
|
| @@ -441,12 +441,12 @@
|
| temp_in[j] = out[j * 8 + i];
|
| idct8_1d(temp_in, temp_out);
|
| for (j = 0; j < 8; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
|
|
| -static void idct16_1d(int16_t *input, int16_t *output) {
|
| +static void idct16_1d(const int16_t *input, int16_t *output) {
|
| int16_t step1[16], step2[16];
|
| int temp1, temp2;
|
|
|
| @@ -611,7 +611,7 @@
|
| output[15] = step2[0] - step2[15];
|
| }
|
|
|
| -void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int16_t out[16 * 16];
|
| int16_t *outptr = out;
|
| int i, j;
|
| @@ -630,12 +630,12 @@
|
| temp_in[j] = out[j * 16 + i];
|
| idct16_1d(temp_in, temp_out);
|
| for (j = 0; j < 16; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
|
|
| -void iadst16_1d(int16_t *input, int16_t *output) {
|
| +static void iadst16_1d(const int16_t *input, int16_t *output) {
|
| int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
|
|
|
| int x0 = input[15];
|
| @@ -813,8 +813,8 @@
|
| { iadst16_1d, iadst16_1d } // ADST_ADST = 3
|
| };
|
|
|
| -void vp9_short_iht16x16_add_c(int16_t *input, uint8_t *dest, int dest_stride,
|
| - int tx_type) {
|
| +void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride,
|
| + int tx_type) {
|
| int i, j;
|
| int16_t out[16 * 16];
|
| int16_t *outptr = out;
|
| @@ -834,12 +834,11 @@
|
| temp_in[j] = out[j * 16 + i];
|
| ht.cols(temp_in, temp_out);
|
| for (j = 0; j < 16; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * dest_stride + i]); }
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| + + dest[j * stride + i]); }
|
| }
|
|
|
| -void vp9_short_idct10_16x16_add_c(int16_t *input, uint8_t *dest,
|
| - int dest_stride) {
|
| +void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int16_t out[16 * 16] = { 0 };
|
| int16_t *outptr = out;
|
| int i, j;
|
| @@ -859,13 +858,12 @@
|
| temp_in[j] = out[j*16 + i];
|
| idct16_1d(temp_in, temp_out);
|
| for (j = 0; j < 16; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
|
|
| -void vp9_short_idct16x16_1_add_c(int16_t *input, uint8_t *dest,
|
| - int dest_stride) {
|
| +void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int i, j;
|
| int a1;
|
| int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| @@ -874,11 +872,11 @@
|
| for (j = 0; j < 16; ++j) {
|
| for (i = 0; i < 16; ++i)
|
| dest[i] = clip_pixel(dest[i] + a1);
|
| - dest += dest_stride;
|
| + dest += stride;
|
| }
|
| }
|
|
|
| -static void idct32_1d(int16_t *input, int16_t *output) {
|
| +static void idct32_1d(const int16_t *input, int16_t *output) {
|
| int16_t step1[32], step2[32];
|
| int temp1, temp2;
|
|
|
| @@ -1245,7 +1243,7 @@
|
| output[31] = step1[0] - step1[31];
|
| }
|
|
|
| -void vp9_short_idct32x32_add_c(int16_t *input, uint8_t *dest, int dest_stride) {
|
| +void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| int16_t out[32 * 32];
|
| int16_t *outptr = out;
|
| int i, j;
|
| @@ -1253,6 +1251,44 @@
|
|
|
| // Rows
|
| for (i = 0; i < 32; ++i) {
|
| + int16_t zero_coeff[16];
|
| + for (j = 0; j < 16; ++j)
|
| + zero_coeff[j] = input[2 * j] | input[2 * j + 1];
|
| + for (j = 0; j < 8; ++j)
|
| + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
| + for (j = 0; j < 4; ++j)
|
| + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
| + for (j = 0; j < 2; ++j)
|
| + zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1];
|
| +
|
| + if (zero_coeff[0] | zero_coeff[1])
|
| + idct32_1d(input, outptr);
|
| + else
|
| + vpx_memset(outptr, 0, sizeof(int16_t) * 32);
|
| + input += 32;
|
| + outptr += 32;
|
| + }
|
| +
|
| + // Columns
|
| + for (i = 0; i < 32; ++i) {
|
| + for (j = 0; j < 32; ++j)
|
| + temp_in[j] = out[j * 32 + i];
|
| + idct32_1d(temp_in, temp_out);
|
| + for (j = 0; j < 32; ++j)
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| + + dest[j * stride + i]);
|
| + }
|
| +}
|
| +
|
| +void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| + int16_t out[32 * 32] = {0};
|
| + int16_t *outptr = out;
|
| + int i, j;
|
| + int16_t temp_in[32], temp_out[32];
|
| +
|
| + // Rows
|
| + // only upper-left 8x8 has non-zero coeff
|
| + for (i = 0; i < 8; ++i) {
|
| idct32_1d(input, outptr);
|
| input += 32;
|
| outptr += 32;
|
| @@ -1264,13 +1300,116 @@
|
| temp_in[j] = out[j * 32 + i];
|
| idct32_1d(temp_in, temp_out);
|
| for (j = 0; j < 32; ++j)
|
| - dest[j * dest_stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| - + dest[j * dest_stride + i]);
|
| + dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6)
|
| + + dest[j * stride + i]);
|
| }
|
| }
|
|
|
| -void vp9_short_idct1_32x32_c(int16_t *input, int16_t *output) {
|
| +void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) {
|
| + int i, j;
|
| + int a1;
|
| +
|
| int16_t out = dct_const_round_shift(input[0] * cospi_16_64);
|
| out = dct_const_round_shift(out * cospi_16_64);
|
| - output[0] = ROUND_POWER_OF_TWO(out, 6);
|
| + a1 = ROUND_POWER_OF_TWO(out, 6);
|
| +
|
| + for (j = 0; j < 32; ++j) {
|
| + for (i = 0; i < 32; ++i)
|
| + dest[i] = clip_pixel(dest[i] + a1);
|
| + dest += stride;
|
| + }
|
| }
|
| +
|
| +// idct
|
| +void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
|
| + if (eob > 1)
|
| + vp9_idct4x4_16_add(input, dest, stride);
|
| + else
|
| + vp9_idct4x4_1_add(input, dest, stride);
|
| +}
|
| +
|
| +
|
| +void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
|
| + if (eob > 1)
|
| + vp9_iwht4x4_16_add(input, dest, stride);
|
| + else
|
| + vp9_iwht4x4_1_add(input, dest, stride);
|
| +}
|
| +
|
| +void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) {
|
| + // If dc is 1, then input[0] is the reconstructed value, do not need
|
| + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
|
| +
|
| + // The calculation can be simplified if there are not many non-zero dct
|
| + // coefficients. Use eobs to decide what to do.
|
| + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
|
| + // Combine that with code here.
|
| + if (eob) {
|
| + if (eob == 1)
|
| + // DC only DCT coefficient
|
| + vp9_idct8x8_1_add(input, dest, stride);
|
| + else if (eob <= 10)
|
| + vp9_idct8x8_10_add(input, dest, stride);
|
| + else
|
| + vp9_idct8x8_64_add(input, dest, stride);
|
| + }
|
| +}
|
| +
|
| +void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride,
|
| + int eob) {
|
| + /* The calculation can be simplified if there are not many non-zero dct
|
| + * coefficients. Use eobs to separate different cases. */
|
| + if (eob) {
|
| + if (eob == 1)
|
| + /* DC only DCT coefficient. */
|
| + vp9_idct16x16_1_add(input, dest, stride);
|
| + else if (eob <= 10)
|
| + vp9_idct16x16_10_add(input, dest, stride);
|
| + else
|
| + vp9_idct16x16_256_add(input, dest, stride);
|
| + }
|
| +}
|
| +
|
| +void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
|
| + int eob) {
|
| + if (eob) {
|
| + if (eob == 1)
|
| + vp9_idct32x32_1_add(input, dest, stride);
|
| + else if (eob <= 34)
|
| + // non-zero coeff only in upper-left 8x8
|
| + vp9_idct32x32_34_add(input, dest, stride);
|
| + else
|
| + vp9_idct32x32_1024_add(input, dest, stride);
|
| + }
|
| +}
|
| +
|
| +// iht
|
| +void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
| + int stride, int eob) {
|
| + if (tx_type == DCT_DCT)
|
| + vp9_idct4x4_add(input, dest, stride, eob);
|
| + else
|
| + vp9_iht4x4_16_add(input, dest, stride, tx_type);
|
| +}
|
| +
|
| +void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
| + int stride, int eob) {
|
| + if (tx_type == DCT_DCT) {
|
| + vp9_idct8x8_add(input, dest, stride, eob);
|
| + } else {
|
| + if (eob > 0) {
|
| + vp9_iht8x8_64_add(input, dest, stride, tx_type);
|
| + }
|
| + }
|
| +}
|
| +
|
| +void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
| + int stride, int eob) {
|
| + if (tx_type == DCT_DCT) {
|
| + vp9_idct16x16_add(input, dest, stride, eob);
|
| + } else {
|
| + if (eob > 0) {
|
| + vp9_iht16x16_256_add(input, dest, stride, tx_type);
|
| + }
|
| + }
|
| +}
|
|
|