Index: source/libvpx/vp9/common/vp9_idct.h |
=================================================================== |
--- source/libvpx/vp9/common/vp9_idct.h (revision 292072) |
+++ source/libvpx/vp9/common/vp9_idct.h (working copy) |
@@ -36,52 +36,69 @@ |
#define dual_set_epi16(a, b) \ |
_mm_set_epi16(b, b, b, b, a, a, a, a) |
+// Note: |
+// tran_low_t is the datatype used for final transform coefficients. |
+// tran_high_t is the datatype used for intermediate transform stages. |
+#if CONFIG_VP9_HIGHBITDEPTH |
+typedef int64_t tran_high_t; |
+typedef int32_t tran_low_t; |
+#else |
+typedef int32_t tran_high_t; |
+typedef int16_t tran_low_t; |
+#endif |
+ |
// Constants: |
// for (int i = 1; i< 32; ++i) |
// printf("static const int cospi_%d_64 = %.0f;\n", i, |
// round(16384 * cos(i*M_PI/64))); |
// Note: sin(k*Pi/64) = cos((32-k)*Pi/64) |
-static const int cospi_1_64 = 16364; |
-static const int cospi_2_64 = 16305; |
-static const int cospi_3_64 = 16207; |
-static const int cospi_4_64 = 16069; |
-static const int cospi_5_64 = 15893; |
-static const int cospi_6_64 = 15679; |
-static const int cospi_7_64 = 15426; |
-static const int cospi_8_64 = 15137; |
-static const int cospi_9_64 = 14811; |
-static const int cospi_10_64 = 14449; |
-static const int cospi_11_64 = 14053; |
-static const int cospi_12_64 = 13623; |
-static const int cospi_13_64 = 13160; |
-static const int cospi_14_64 = 12665; |
-static const int cospi_15_64 = 12140; |
-static const int cospi_16_64 = 11585; |
-static const int cospi_17_64 = 11003; |
-static const int cospi_18_64 = 10394; |
-static const int cospi_19_64 = 9760; |
-static const int cospi_20_64 = 9102; |
-static const int cospi_21_64 = 8423; |
-static const int cospi_22_64 = 7723; |
-static const int cospi_23_64 = 7005; |
-static const int cospi_24_64 = 6270; |
-static const int cospi_25_64 = 5520; |
-static const int cospi_26_64 = 4756; |
-static const int cospi_27_64 = 3981; |
-static const int cospi_28_64 = 3196; |
-static const int cospi_29_64 = 2404; |
-static const int cospi_30_64 = 1606; |
-static const int cospi_31_64 = 804; |
+static const tran_high_t cospi_1_64 = 16364; |
+static const tran_high_t cospi_2_64 = 16305; |
+static const tran_high_t cospi_3_64 = 16207; |
+static const tran_high_t cospi_4_64 = 16069; |
+static const tran_high_t cospi_5_64 = 15893; |
+static const tran_high_t cospi_6_64 = 15679; |
+static const tran_high_t cospi_7_64 = 15426; |
+static const tran_high_t cospi_8_64 = 15137; |
+static const tran_high_t cospi_9_64 = 14811; |
+static const tran_high_t cospi_10_64 = 14449; |
+static const tran_high_t cospi_11_64 = 14053; |
+static const tran_high_t cospi_12_64 = 13623; |
+static const tran_high_t cospi_13_64 = 13160; |
+static const tran_high_t cospi_14_64 = 12665; |
+static const tran_high_t cospi_15_64 = 12140; |
+static const tran_high_t cospi_16_64 = 11585; |
+static const tran_high_t cospi_17_64 = 11003; |
+static const tran_high_t cospi_18_64 = 10394; |
+static const tran_high_t cospi_19_64 = 9760; |
+static const tran_high_t cospi_20_64 = 9102; |
+static const tran_high_t cospi_21_64 = 8423; |
+static const tran_high_t cospi_22_64 = 7723; |
+static const tran_high_t cospi_23_64 = 7005; |
+static const tran_high_t cospi_24_64 = 6270; |
+static const tran_high_t cospi_25_64 = 5520; |
+static const tran_high_t cospi_26_64 = 4756; |
+static const tran_high_t cospi_27_64 = 3981; |
+static const tran_high_t cospi_28_64 = 3196; |
+static const tran_high_t cospi_29_64 = 2404; |
+static const tran_high_t cospi_30_64 = 1606; |
+static const tran_high_t cospi_31_64 = 804; |
// 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 |
-static const int sinpi_1_9 = 5283; |
-static const int sinpi_2_9 = 9929; |
-static const int sinpi_3_9 = 13377; |
-static const int sinpi_4_9 = 15212; |
+static const tran_high_t sinpi_1_9 = 5283; |
+static const tran_high_t sinpi_2_9 = 9929; |
+static const tran_high_t sinpi_3_9 = 13377; |
+static const tran_high_t sinpi_4_9 = 15212; |
-static INLINE int dct_const_round_shift(int input) { |
- int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
-#if CONFIG_COEFFICIENT_RANGE_CHECKING |
+static INLINE tran_low_t dct_const_round_shift(tran_high_t input) { |
+ tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+ // For valid highbitdepth VP9 streams, intermediate stage coefficients will |
+ // stay within the ranges: |
+ // - 8 bit: signed 16 bit integer |
+ // - 10 bit: signed 18 bit integer |
+ // - 12 bit: signed 20 bit integer |
+#elif CONFIG_COEFFICIENT_RANGE_CHECKING |
// For valid VP9 input streams, intermediate stage coefficients should always |
// stay within the range of a signed 16 bit integer. Coefficients can go out |
// of this range for invalid/corrupt VP9 streams. However, strictly checking |
@@ -91,32 +108,59 @@ |
assert(INT16_MIN <= rv); |
assert(rv <= INT16_MAX); |
#endif |
- return (int16_t)rv; |
+ return (tran_low_t)rv; |
} |
-typedef void (*transform_1d)(const int16_t*, int16_t*); |
+typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); |
typedef struct { |
transform_1d cols, rows; // vertical and horizontal |
} transform_2d; |
-void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob); |
+#if CONFIG_VP9_HIGHBITDEPTH |
+typedef void (*high_transform_1d)(const tran_low_t*, tran_low_t*, int bd); |
-void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob); |
-void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob); |
-void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int |
+typedef struct { |
+ high_transform_1d cols, rows; // vertical and horizontal |
+} high_transform_2d; |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
+ |
+void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob); |
+void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob); |
+void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob); |
+void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int |
eob); |
-void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, |
+void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, |
int eob); |
-void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
+void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
int stride, int eob); |
-void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
+void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
int stride, int eob); |
-void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
+void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
int stride, int eob); |
- |
+#if CONFIG_VP9_HIGHBITDEPTH |
+void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob, int bd); |
+void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob, int bd); |
+void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob, int bd); |
+void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob, int bd); |
+void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, |
+ int eob, int bd); |
+void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, |
+ uint8_t *dest, int stride, int eob, int bd); |
+void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, |
+ uint8_t *dest, int stride, int eob, int bd); |
+void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
+ uint8_t *dest, int stride, int eob, int bd); |
+#endif // CONFIG_VP9_HIGHBITDEPTH |
#ifdef __cplusplus |
} // extern "C" |
#endif |