OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 18 matching lines...) Expand all Loading... |
29 | 29 |
30 #define UNIT_QUANT_SHIFT 2 | 30 #define UNIT_QUANT_SHIFT 2 |
31 #define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) | 31 #define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) |
32 | 32 |
33 #define pair_set_epi16(a, b) \ | 33 #define pair_set_epi16(a, b) \ |
34 _mm_set_epi16(b, a, b, a, b, a, b, a) | 34 _mm_set_epi16(b, a, b, a, b, a, b, a) |
35 | 35 |
36 #define dual_set_epi16(a, b) \ | 36 #define dual_set_epi16(a, b) \ |
37 _mm_set_epi16(b, b, b, b, a, a, a, a) | 37 _mm_set_epi16(b, b, b, b, a, a, a, a) |
38 | 38 |
| 39 // Note: |
| 40 // tran_low_t is the datatype used for final transform coefficients. |
| 41 // tran_high_t is the datatype used for intermediate transform stages. |
| 42 #if CONFIG_VP9_HIGHBITDEPTH |
| 43 typedef int64_t tran_high_t; |
| 44 typedef int32_t tran_low_t; |
| 45 #else |
| 46 typedef int32_t tran_high_t; |
| 47 typedef int16_t tran_low_t; |
| 48 #endif |
| 49 |
39 // Constants: | 50 // Constants: |
40 // for (int i = 1; i< 32; ++i) | 51 // for (int i = 1; i< 32; ++i) |
41 // printf("static const int cospi_%d_64 = %.0f;\n", i, | 52 // printf("static const int cospi_%d_64 = %.0f;\n", i, |
42 // round(16384 * cos(i*M_PI/64))); | 53 // round(16384 * cos(i*M_PI/64))); |
43 // Note: sin(k*Pi/64) = cos((32-k)*Pi/64) | 54 // Note: sin(k*Pi/64) = cos((32-k)*Pi/64) |
44 static const int cospi_1_64 = 16364; | 55 static const tran_high_t cospi_1_64 = 16364; |
45 static const int cospi_2_64 = 16305; | 56 static const tran_high_t cospi_2_64 = 16305; |
46 static const int cospi_3_64 = 16207; | 57 static const tran_high_t cospi_3_64 = 16207; |
47 static const int cospi_4_64 = 16069; | 58 static const tran_high_t cospi_4_64 = 16069; |
48 static const int cospi_5_64 = 15893; | 59 static const tran_high_t cospi_5_64 = 15893; |
49 static const int cospi_6_64 = 15679; | 60 static const tran_high_t cospi_6_64 = 15679; |
50 static const int cospi_7_64 = 15426; | 61 static const tran_high_t cospi_7_64 = 15426; |
51 static const int cospi_8_64 = 15137; | 62 static const tran_high_t cospi_8_64 = 15137; |
52 static const int cospi_9_64 = 14811; | 63 static const tran_high_t cospi_9_64 = 14811; |
53 static const int cospi_10_64 = 14449; | 64 static const tran_high_t cospi_10_64 = 14449; |
54 static const int cospi_11_64 = 14053; | 65 static const tran_high_t cospi_11_64 = 14053; |
55 static const int cospi_12_64 = 13623; | 66 static const tran_high_t cospi_12_64 = 13623; |
56 static const int cospi_13_64 = 13160; | 67 static const tran_high_t cospi_13_64 = 13160; |
57 static const int cospi_14_64 = 12665; | 68 static const tran_high_t cospi_14_64 = 12665; |
58 static const int cospi_15_64 = 12140; | 69 static const tran_high_t cospi_15_64 = 12140; |
59 static const int cospi_16_64 = 11585; | 70 static const tran_high_t cospi_16_64 = 11585; |
60 static const int cospi_17_64 = 11003; | 71 static const tran_high_t cospi_17_64 = 11003; |
61 static const int cospi_18_64 = 10394; | 72 static const tran_high_t cospi_18_64 = 10394; |
62 static const int cospi_19_64 = 9760; | 73 static const tran_high_t cospi_19_64 = 9760; |
63 static const int cospi_20_64 = 9102; | 74 static const tran_high_t cospi_20_64 = 9102; |
64 static const int cospi_21_64 = 8423; | 75 static const tran_high_t cospi_21_64 = 8423; |
65 static const int cospi_22_64 = 7723; | 76 static const tran_high_t cospi_22_64 = 7723; |
66 static const int cospi_23_64 = 7005; | 77 static const tran_high_t cospi_23_64 = 7005; |
67 static const int cospi_24_64 = 6270; | 78 static const tran_high_t cospi_24_64 = 6270; |
68 static const int cospi_25_64 = 5520; | 79 static const tran_high_t cospi_25_64 = 5520; |
69 static const int cospi_26_64 = 4756; | 80 static const tran_high_t cospi_26_64 = 4756; |
70 static const int cospi_27_64 = 3981; | 81 static const tran_high_t cospi_27_64 = 3981; |
71 static const int cospi_28_64 = 3196; | 82 static const tran_high_t cospi_28_64 = 3196; |
72 static const int cospi_29_64 = 2404; | 83 static const tran_high_t cospi_29_64 = 2404; |
73 static const int cospi_30_64 = 1606; | 84 static const tran_high_t cospi_30_64 = 1606; |
74 static const int cospi_31_64 = 804; | 85 static const tran_high_t cospi_31_64 = 804; |
75 | 86 |
76 // 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 | 87 // 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 |
77 static const int sinpi_1_9 = 5283; | 88 static const tran_high_t sinpi_1_9 = 5283; |
78 static const int sinpi_2_9 = 9929; | 89 static const tran_high_t sinpi_2_9 = 9929; |
79 static const int sinpi_3_9 = 13377; | 90 static const tran_high_t sinpi_3_9 = 13377; |
80 static const int sinpi_4_9 = 15212; | 91 static const tran_high_t sinpi_4_9 = 15212; |
81 | 92 |
82 static INLINE int dct_const_round_shift(int input) { | 93 static INLINE tran_low_t dct_const_round_shift(tran_high_t input) { |
83 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); | 94 tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
84 #if CONFIG_COEFFICIENT_RANGE_CHECKING | 95 #if CONFIG_VP9_HIGHBITDEPTH |
| 96 // For valid highbitdepth VP9 streams, intermediate stage coefficients will |
| 97 // stay within the ranges: |
| 98 // - 8 bit: signed 16 bit integer |
| 99 // - 10 bit: signed 18 bit integer |
| 100 // - 12 bit: signed 20 bit integer |
| 101 #elif CONFIG_COEFFICIENT_RANGE_CHECKING |
85 // For valid VP9 input streams, intermediate stage coefficients should always | 102 // For valid VP9 input streams, intermediate stage coefficients should always |
86 // stay within the range of a signed 16 bit integer. Coefficients can go out | 103 // stay within the range of a signed 16 bit integer. Coefficients can go out |
87 // of this range for invalid/corrupt VP9 streams. However, strictly checking | 104 // of this range for invalid/corrupt VP9 streams. However, strictly checking |
88 // this range for every intermediate coefficient can burdensome for a decoder, | 105 // this range for every intermediate coefficient can burdensome for a decoder, |
89 // therefore the following assertion is only enabled when configured with | 106 // therefore the following assertion is only enabled when configured with |
90 // --enable-coefficient-range-checking. | 107 // --enable-coefficient-range-checking. |
91 assert(INT16_MIN <= rv); | 108 assert(INT16_MIN <= rv); |
92 assert(rv <= INT16_MAX); | 109 assert(rv <= INT16_MAX); |
93 #endif | 110 #endif |
94 return (int16_t)rv; | 111 return (tran_low_t)rv; |
95 } | 112 } |
96 | 113 |
97 typedef void (*transform_1d)(const int16_t*, int16_t*); | 114 typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); |
98 | 115 |
99 typedef struct { | 116 typedef struct { |
100 transform_1d cols, rows; // vertical and horizontal | 117 transform_1d cols, rows; // vertical and horizontal |
101 } transform_2d; | 118 } transform_2d; |
102 | 119 |
103 void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob); | 120 #if CONFIG_VP9_HIGHBITDEPTH |
| 121 typedef void (*high_transform_1d)(const tran_low_t*, tran_low_t*, int bd); |
104 | 122 |
105 void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob); | 123 typedef struct { |
106 void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob); | 124 high_transform_1d cols, rows; // vertical and horizontal |
107 void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, int | 125 } high_transform_2d; |
| 126 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 127 |
| 128 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 129 int eob); |
| 130 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 131 int eob); |
| 132 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 133 int eob); |
| 134 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int |
108 eob); | 135 eob); |
109 void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, | 136 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, |
110 int eob); | 137 int eob); |
111 | 138 |
112 void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, | 139 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
113 int stride, int eob); | 140 int stride, int eob); |
114 void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, | 141 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
115 int stride, int eob); | 142 int stride, int eob); |
116 void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, | 143 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
117 int stride, int eob); | 144 int stride, int eob); |
118 | 145 |
119 | 146 #if CONFIG_VP9_HIGHBITDEPTH |
| 147 void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 148 int eob, int bd); |
| 149 void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 150 int eob, int bd); |
| 151 void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 152 int eob, int bd); |
| 153 void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 154 int eob, int bd); |
| 155 void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, |
| 156 int eob, int bd); |
| 157 void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, |
| 158 uint8_t *dest, int stride, int eob, int bd); |
| 159 void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, |
| 160 uint8_t *dest, int stride, int eob, int bd); |
| 161 void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
| 162 uint8_t *dest, int stride, int eob, int bd); |
| 163 #endif // CONFIG_VP9_HIGHBITDEPTH |
120 #ifdef __cplusplus | 164 #ifdef __cplusplus |
121 } // extern "C" | 165 } // extern "C" |
122 #endif | 166 #endif |
123 | 167 |
124 #endif // VP9_COMMON_VP9_IDCT_H_ | 168 #endif // VP9_COMMON_VP9_IDCT_H_ |
OLD | NEW |