| Index: source/libvpx/vp9/common/vp9_idct.h
|
| ===================================================================
|
| --- source/libvpx/vp9/common/vp9_idct.h (revision 292072)
|
| +++ source/libvpx/vp9/common/vp9_idct.h (working copy)
|
| @@ -36,52 +36,69 @@
|
| #define dual_set_epi16(a, b) \
|
| _mm_set_epi16(b, b, b, b, a, a, a, a)
|
|
|
| +// Note:
|
| +// tran_low_t is the datatype used for final transform coefficients.
|
| +// tran_high_t is the datatype used for intermediate transform stages.
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| +typedef int64_t tran_high_t;
|
| +typedef int32_t tran_low_t;
|
| +#else
|
| +typedef int32_t tran_high_t;
|
| +typedef int16_t tran_low_t;
|
| +#endif
|
| +
|
| // Constants:
|
| // for (int i = 1; i< 32; ++i)
|
| // printf("static const int cospi_%d_64 = %.0f;\n", i,
|
| // round(16384 * cos(i*M_PI/64)));
|
| // Note: sin(k*Pi/64) = cos((32-k)*Pi/64)
|
| -static const int cospi_1_64 = 16364;
|
| -static const int cospi_2_64 = 16305;
|
| -static const int cospi_3_64 = 16207;
|
| -static const int cospi_4_64 = 16069;
|
| -static const int cospi_5_64 = 15893;
|
| -static const int cospi_6_64 = 15679;
|
| -static const int cospi_7_64 = 15426;
|
| -static const int cospi_8_64 = 15137;
|
| -static const int cospi_9_64 = 14811;
|
| -static const int cospi_10_64 = 14449;
|
| -static const int cospi_11_64 = 14053;
|
| -static const int cospi_12_64 = 13623;
|
| -static const int cospi_13_64 = 13160;
|
| -static const int cospi_14_64 = 12665;
|
| -static const int cospi_15_64 = 12140;
|
| -static const int cospi_16_64 = 11585;
|
| -static const int cospi_17_64 = 11003;
|
| -static const int cospi_18_64 = 10394;
|
| -static const int cospi_19_64 = 9760;
|
| -static const int cospi_20_64 = 9102;
|
| -static const int cospi_21_64 = 8423;
|
| -static const int cospi_22_64 = 7723;
|
| -static const int cospi_23_64 = 7005;
|
| -static const int cospi_24_64 = 6270;
|
| -static const int cospi_25_64 = 5520;
|
| -static const int cospi_26_64 = 4756;
|
| -static const int cospi_27_64 = 3981;
|
| -static const int cospi_28_64 = 3196;
|
| -static const int cospi_29_64 = 2404;
|
| -static const int cospi_30_64 = 1606;
|
| -static const int cospi_31_64 = 804;
|
| +static const tran_high_t cospi_1_64 = 16364;
|
| +static const tran_high_t cospi_2_64 = 16305;
|
| +static const tran_high_t cospi_3_64 = 16207;
|
| +static const tran_high_t cospi_4_64 = 16069;
|
| +static const tran_high_t cospi_5_64 = 15893;
|
| +static const tran_high_t cospi_6_64 = 15679;
|
| +static const tran_high_t cospi_7_64 = 15426;
|
| +static const tran_high_t cospi_8_64 = 15137;
|
| +static const tran_high_t cospi_9_64 = 14811;
|
| +static const tran_high_t cospi_10_64 = 14449;
|
| +static const tran_high_t cospi_11_64 = 14053;
|
| +static const tran_high_t cospi_12_64 = 13623;
|
| +static const tran_high_t cospi_13_64 = 13160;
|
| +static const tran_high_t cospi_14_64 = 12665;
|
| +static const tran_high_t cospi_15_64 = 12140;
|
| +static const tran_high_t cospi_16_64 = 11585;
|
| +static const tran_high_t cospi_17_64 = 11003;
|
| +static const tran_high_t cospi_18_64 = 10394;
|
| +static const tran_high_t cospi_19_64 = 9760;
|
| +static const tran_high_t cospi_20_64 = 9102;
|
| +static const tran_high_t cospi_21_64 = 8423;
|
| +static const tran_high_t cospi_22_64 = 7723;
|
| +static const tran_high_t cospi_23_64 = 7005;
|
| +static const tran_high_t cospi_24_64 = 6270;
|
| +static const tran_high_t cospi_25_64 = 5520;
|
| +static const tran_high_t cospi_26_64 = 4756;
|
| +static const tran_high_t cospi_27_64 = 3981;
|
| +static const tran_high_t cospi_28_64 = 3196;
|
| +static const tran_high_t cospi_29_64 = 2404;
|
| +static const tran_high_t cospi_30_64 = 1606;
|
| +static const tran_high_t cospi_31_64 = 804;
|
|
|
| // 16384 * sqrt(2) * sin(kPi/9) * 2 / 3
|
| -static const int sinpi_1_9 = 5283;
|
| -static const int sinpi_2_9 = 9929;
|
| -static const int sinpi_3_9 = 13377;
|
| -static const int sinpi_4_9 = 15212;
|
| +static const tran_high_t sinpi_1_9 = 5283;
|
| +static const tran_high_t sinpi_2_9 = 9929;
|
| +static const tran_high_t sinpi_3_9 = 13377;
|
| +static const tran_high_t sinpi_4_9 = 15212;
|
|
|
| -static INLINE int dct_const_round_shift(int input) {
|
| - int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
| -#if CONFIG_COEFFICIENT_RANGE_CHECKING
|
| +static INLINE tran_low_t dct_const_round_shift(tran_high_t input) {
|
| + tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| + // For valid highbitdepth VP9 streams, intermediate stage coefficients will
|
| + // stay within the ranges:
|
| + // - 8 bit: signed 16 bit integer
|
| + // - 10 bit: signed 18 bit integer
|
| + // - 12 bit: signed 20 bit integer
|
| +#elif CONFIG_COEFFICIENT_RANGE_CHECKING
|
| // For valid VP9 input streams, intermediate stage coefficients should always
|
| // stay within the range of a signed 16 bit integer. Coefficients can go out
|
| // of this range for invalid/corrupt VP9 streams. However, strictly checking
|
| @@ -91,32 +108,59 @@
|
| assert(INT16_MIN <= rv);
|
| assert(rv <= INT16_MAX);
|
| #endif
|
| - return (int16_t)rv;
|
| + return (tran_low_t)rv;
|
| }
|
|
|
| -typedef void (*transform_1d)(const int16_t*, int16_t*);
|
| +typedef void (*transform_1d)(const tran_low_t*, tran_low_t*);
|
|
|
| typedef struct {
|
| transform_1d cols, rows; // vertical and horizontal
|
| } transform_2d;
|
|
|
| -void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| +typedef void (*high_transform_1d)(const tran_low_t*, tran_low_t*, int bd);
|
|
|
| -void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob);
|
| -void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob);
|
| -void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int
|
| +typedef struct {
|
| + high_transform_1d cols, rows; // vertical and horizontal
|
| +} high_transform_2d;
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| +
|
| +void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob);
|
| +void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob);
|
| +void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob);
|
| +void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int
|
| eob);
|
| -void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride,
|
| +void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| int eob);
|
|
|
| -void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
| +void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
| int stride, int eob);
|
| -void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
| +void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
| int stride, int eob);
|
| -void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest,
|
| +void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
|
| int stride, int eob);
|
|
|
| -
|
| +#if CONFIG_VP9_HIGHBITDEPTH
|
| +void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd);
|
| +void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd);
|
| +void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd);
|
| +void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd);
|
| +void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
|
| + int eob, int bd);
|
| +void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
|
| + uint8_t *dest, int stride, int eob, int bd);
|
| +void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
|
| + uint8_t *dest, int stride, int eob, int bd);
|
| +void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
|
| + uint8_t *dest, int stride, int eob, int bd);
|
| +#endif // CONFIG_VP9_HIGHBITDEPTH
|
| #ifdef __cplusplus
|
| } // extern "C"
|
| #endif
|
|
|