OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | |
12 #include <math.h> | 11 #include <math.h> |
13 | 12 |
14 #include "./vpx_config.h" | |
15 #include "./vp9_rtcd.h" | 13 #include "./vp9_rtcd.h" |
16 #include "vp9/common/vp9_systemdependent.h" | 14 #include "vp9/common/vp9_systemdependent.h" |
17 #include "vp9/common/vp9_blockd.h" | 15 #include "vp9/common/vp9_blockd.h" |
18 #include "vp9/common/vp9_common.h" | |
19 #include "vp9/common/vp9_idct.h" | 16 #include "vp9/common/vp9_idct.h" |
20 | 17 |
21 #if CONFIG_EMULATE_HARDWARE_HIGHBITDEPTH | 18 #if CONFIG_EMULATE_HARDWARE |
22 // When CONFIG_EMULATE_HW_HIGHBITDEPTH is 1 the transform performs strict | 19 // When CONFIG_EMULATE_HARDWARE is 1 the transform performs a |
23 // overflow wrapping to match expected hardware implementations. | 20 // non-normative method to handle overflows. A stream that causes |
| 21 // overflows in the inverse transform is considered invalid in VP9, |
| 22 // and a hardware implementer is free to choose any reasonable |
| 23 // method to handle overflows. However to aid in hardware |
| 24 // verification they can use a specific implementation of the |
| 25 // WRAPLOW() macro below that is identical to their intended |
| 26 // hardware implementation (and also use configure options to trigger |
| 27 // the C-implementation of the transform). |
| 28 // |
| 29 // The particular WRAPLOW implementation below performs strict |
| 30 // overflow wrapping to match common hardware implementations. |
24 // bd of 8 uses trans_low with 16bits, need to remove 16bits | 31 // bd of 8 uses trans_low with 16bits, need to remove 16bits |
25 // bd of 10 uses trans_low with 18bits, need to remove 14bits | 32 // bd of 10 uses trans_low with 18bits, need to remove 14bits |
26 // bd of 12 uses trans_low with 20bits, need to remove 12bits | 33 // bd of 12 uses trans_low with 20bits, need to remove 12bits |
27 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits | 34 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits |
28 #define WRAPLOW(x) ((((int32_t)x) << (24 - bd)) >> (24 - bd)) | 35 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) |
29 #else | 36 #else |
30 #define WRAPLOW(x) (x) | 37 #define WRAPLOW(x, bd) (x) |
31 #endif // CONFIG_EMULATE_HARDWARE_HIGHBITDEPTH | 38 #endif // CONFIG_EMULATE_HARDWARE |
32 | 39 |
33 #if CONFIG_VP9_HIGHBITDEPTH | 40 #if CONFIG_VP9_HIGHBITDEPTH |
34 static INLINE tran_low_t clamp_high(tran_high_t value, tran_low_t low, | 41 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, |
35 tran_low_t high) { | 42 int bd) { |
36 return value < low ? low : (value > high ? high : value); | 43 trans = WRAPLOW(trans, bd); |
37 } | 44 return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); |
38 | |
39 static INLINE tran_low_t clip_pixel_bd_high(tran_high_t dest, | |
40 tran_high_t trans, int bd) { | |
41 trans = WRAPLOW(trans); | |
42 switch (bd) { | |
43 case 8: | |
44 default: | |
45 return clamp_high(WRAPLOW(dest + trans), 0, 255); | |
46 case 10: | |
47 return clamp_high(WRAPLOW(dest + trans), 0, 1023); | |
48 case 12: | |
49 return clamp_high(WRAPLOW(dest + trans), 0, 4095); | |
50 } | |
51 } | 45 } |
52 #endif // CONFIG_VP9_HIGHBITDEPTH | 46 #endif // CONFIG_VP9_HIGHBITDEPTH |
53 | 47 |
| 48 static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { |
| 49 trans = WRAPLOW(trans, 8); |
| 50 return clip_pixel(WRAPLOW(dest + trans, 8)); |
| 51 } |
| 52 |
54 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 53 void vp9_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
55 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 54 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
56 0.5 shifts per pixel. */ | 55 0.5 shifts per pixel. */ |
57 int i; | 56 int i; |
58 tran_low_t output[16]; | 57 tran_low_t output[16]; |
59 tran_high_t a1, b1, c1, d1, e1; | 58 tran_high_t a1, b1, c1, d1, e1; |
60 const tran_low_t *ip = input; | 59 const tran_low_t *ip = input; |
61 tran_low_t *op = output; | 60 tran_low_t *op = output; |
62 | 61 |
63 for (i = 0; i < 4; i++) { | 62 for (i = 0; i < 4; i++) { |
64 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 63 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
65 c1 = ip[1] >> UNIT_QUANT_SHIFT; | 64 c1 = ip[1] >> UNIT_QUANT_SHIFT; |
66 d1 = ip[2] >> UNIT_QUANT_SHIFT; | 65 d1 = ip[2] >> UNIT_QUANT_SHIFT; |
67 b1 = ip[3] >> UNIT_QUANT_SHIFT; | 66 b1 = ip[3] >> UNIT_QUANT_SHIFT; |
68 a1 += c1; | 67 a1 += c1; |
69 d1 -= b1; | 68 d1 -= b1; |
70 e1 = (a1 - d1) >> 1; | 69 e1 = (a1 - d1) >> 1; |
71 b1 = e1 - b1; | 70 b1 = e1 - b1; |
72 c1 = e1 - c1; | 71 c1 = e1 - c1; |
73 a1 -= b1; | 72 a1 -= b1; |
74 d1 += c1; | 73 d1 += c1; |
75 op[0] = a1; | 74 op[0] = WRAPLOW(a1, 8); |
76 op[1] = b1; | 75 op[1] = WRAPLOW(b1, 8); |
77 op[2] = c1; | 76 op[2] = WRAPLOW(c1, 8); |
78 op[3] = d1; | 77 op[3] = WRAPLOW(d1, 8); |
79 ip += 4; | 78 ip += 4; |
80 op += 4; | 79 op += 4; |
81 } | 80 } |
82 | 81 |
83 ip = output; | 82 ip = output; |
84 for (i = 0; i < 4; i++) { | 83 for (i = 0; i < 4; i++) { |
85 a1 = ip[4 * 0]; | 84 a1 = ip[4 * 0]; |
86 c1 = ip[4 * 1]; | 85 c1 = ip[4 * 1]; |
87 d1 = ip[4 * 2]; | 86 d1 = ip[4 * 2]; |
88 b1 = ip[4 * 3]; | 87 b1 = ip[4 * 3]; |
89 a1 += c1; | 88 a1 += c1; |
90 d1 -= b1; | 89 d1 -= b1; |
91 e1 = (a1 - d1) >> 1; | 90 e1 = (a1 - d1) >> 1; |
92 b1 = e1 - b1; | 91 b1 = e1 - b1; |
93 c1 = e1 - c1; | 92 c1 = e1 - c1; |
94 a1 -= b1; | 93 a1 -= b1; |
95 d1 += c1; | 94 d1 += c1; |
96 dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); | 95 dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); |
97 dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); | 96 dest[stride * 1] = clip_pixel_add(dest[stride * 1], b1); |
98 dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); | 97 dest[stride * 2] = clip_pixel_add(dest[stride * 2], c1); |
99 dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); | 98 dest[stride * 3] = clip_pixel_add(dest[stride * 3], d1); |
100 | 99 |
101 ip++; | 100 ip++; |
102 dest++; | 101 dest++; |
103 } | 102 } |
104 } | 103 } |
105 | 104 |
106 void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { | 105 void vp9_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest, int dest_stride) { |
107 int i; | 106 int i; |
108 tran_high_t a1, e1; | 107 tran_high_t a1, e1; |
109 tran_low_t tmp[4]; | 108 tran_low_t tmp[4]; |
110 const tran_low_t *ip = in; | 109 const tran_low_t *ip = in; |
111 tran_low_t *op = tmp; | 110 tran_low_t *op = tmp; |
112 | 111 |
113 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 112 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
114 e1 = a1 >> 1; | 113 e1 = a1 >> 1; |
115 a1 -= e1; | 114 a1 -= e1; |
116 op[0] = a1; | 115 op[0] = WRAPLOW(a1, 8); |
117 op[1] = op[2] = op[3] = e1; | 116 op[1] = op[2] = op[3] = WRAPLOW(e1, 8); |
118 | 117 |
119 ip = tmp; | 118 ip = tmp; |
120 for (i = 0; i < 4; i++) { | 119 for (i = 0; i < 4; i++) { |
121 e1 = ip[0] >> 1; | 120 e1 = ip[0] >> 1; |
122 a1 = ip[0] - e1; | 121 a1 = ip[0] - e1; |
123 dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); | 122 dest[dest_stride * 0] = clip_pixel_add(dest[dest_stride * 0], a1); |
124 dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); | 123 dest[dest_stride * 1] = clip_pixel_add(dest[dest_stride * 1], e1); |
125 dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); | 124 dest[dest_stride * 2] = clip_pixel_add(dest[dest_stride * 2], e1); |
126 dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); | 125 dest[dest_stride * 3] = clip_pixel_add(dest[dest_stride * 3], e1); |
127 ip++; | 126 ip++; |
128 dest++; | 127 dest++; |
129 } | 128 } |
130 } | 129 } |
131 | 130 |
132 static void idct4(const tran_low_t *input, tran_low_t *output) { | 131 static void idct4(const tran_low_t *input, tran_low_t *output) { |
133 tran_low_t step[4]; | 132 tran_low_t step[4]; |
134 tran_high_t temp1, temp2; | 133 tran_high_t temp1, temp2; |
135 // stage 1 | 134 // stage 1 |
136 temp1 = (input[0] + input[2]) * cospi_16_64; | 135 temp1 = (input[0] + input[2]) * cospi_16_64; |
137 temp2 = (input[0] - input[2]) * cospi_16_64; | 136 temp2 = (input[0] - input[2]) * cospi_16_64; |
138 step[0] = dct_const_round_shift(temp1); | 137 step[0] = WRAPLOW(dct_const_round_shift(temp1), 8); |
139 step[1] = dct_const_round_shift(temp2); | 138 step[1] = WRAPLOW(dct_const_round_shift(temp2), 8); |
140 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 139 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
141 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 140 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
142 step[2] = dct_const_round_shift(temp1); | 141 step[2] = WRAPLOW(dct_const_round_shift(temp1), 8); |
143 step[3] = dct_const_round_shift(temp2); | 142 step[3] = WRAPLOW(dct_const_round_shift(temp2), 8); |
144 | 143 |
145 // stage 2 | 144 // stage 2 |
146 output[0] = step[0] + step[3]; | 145 output[0] = WRAPLOW(step[0] + step[3], 8); |
147 output[1] = step[1] + step[2]; | 146 output[1] = WRAPLOW(step[1] + step[2], 8); |
148 output[2] = step[1] - step[2]; | 147 output[2] = WRAPLOW(step[1] - step[2], 8); |
149 output[3] = step[0] - step[3]; | 148 output[3] = WRAPLOW(step[0] - step[3], 8); |
150 } | 149 } |
151 | 150 |
152 void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 151 void vp9_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
153 tran_low_t out[4 * 4]; | 152 tran_low_t out[4 * 4]; |
154 tran_low_t *outptr = out; | 153 tran_low_t *outptr = out; |
155 int i, j; | 154 int i, j; |
156 tran_low_t temp_in[4], temp_out[4]; | 155 tran_low_t temp_in[4], temp_out[4]; |
157 | 156 |
158 // Rows | 157 // Rows |
159 for (i = 0; i < 4; ++i) { | 158 for (i = 0; i < 4; ++i) { |
160 idct4(input, outptr); | 159 idct4(input, outptr); |
161 input += 4; | 160 input += 4; |
162 outptr += 4; | 161 outptr += 4; |
163 } | 162 } |
164 | 163 |
165 // Columns | 164 // Columns |
166 for (i = 0; i < 4; ++i) { | 165 for (i = 0; i < 4; ++i) { |
167 for (j = 0; j < 4; ++j) | 166 for (j = 0; j < 4; ++j) |
168 temp_in[j] = out[j * 4 + i]; | 167 temp_in[j] = out[j * 4 + i]; |
169 idct4(temp_in, temp_out); | 168 idct4(temp_in, temp_out); |
170 for (j = 0; j < 4; ++j) | 169 for (j = 0; j < 4; ++j) { |
171 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 170 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
172 + dest[j * stride + i]); | 171 ROUND_POWER_OF_TWO(temp_out[j], 4)); |
| 172 } |
173 } | 173 } |
174 } | 174 } |
175 | 175 |
176 void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, | 176 void vp9_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, |
177 int dest_stride) { | 177 int dest_stride) { |
178 int i; | 178 int i; |
179 tran_high_t a1; | 179 tran_high_t a1; |
180 tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64); | 180 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
181 out = dct_const_round_shift(out * cospi_16_64); | 181 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
182 a1 = ROUND_POWER_OF_TWO(out, 4); | 182 a1 = ROUND_POWER_OF_TWO(out, 4); |
183 | 183 |
184 for (i = 0; i < 4; i++) { | 184 for (i = 0; i < 4; i++) { |
185 dest[0] = clip_pixel(dest[0] + a1); | 185 dest[0] = clip_pixel_add(dest[0], a1); |
186 dest[1] = clip_pixel(dest[1] + a1); | 186 dest[1] = clip_pixel_add(dest[1], a1); |
187 dest[2] = clip_pixel(dest[2] + a1); | 187 dest[2] = clip_pixel_add(dest[2], a1); |
188 dest[3] = clip_pixel(dest[3] + a1); | 188 dest[3] = clip_pixel_add(dest[3], a1); |
189 dest += dest_stride; | 189 dest += dest_stride; |
190 } | 190 } |
191 } | 191 } |
192 | 192 |
193 static void idct8(const tran_low_t *input, tran_low_t *output) { | 193 static void idct8(const tran_low_t *input, tran_low_t *output) { |
194 tran_low_t step1[8], step2[8]; | 194 tran_low_t step1[8], step2[8]; |
195 tran_high_t temp1, temp2; | 195 tran_high_t temp1, temp2; |
196 // stage 1 | 196 // stage 1 |
197 step1[0] = input[0]; | 197 step1[0] = input[0]; |
198 step1[2] = input[4]; | 198 step1[2] = input[4]; |
199 step1[1] = input[2]; | 199 step1[1] = input[2]; |
200 step1[3] = input[6]; | 200 step1[3] = input[6]; |
201 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 201 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
202 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 202 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
203 step1[4] = dct_const_round_shift(temp1); | 203 step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); |
204 step1[7] = dct_const_round_shift(temp2); | 204 step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); |
205 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 205 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
206 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 206 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
207 step1[5] = dct_const_round_shift(temp1); | 207 step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); |
208 step1[6] = dct_const_round_shift(temp2); | 208 step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); |
209 | 209 |
210 // stage 2 & stage 3 - even half | 210 // stage 2 & stage 3 - even half |
211 idct4(step1, step1); | 211 idct4(step1, step1); |
212 | 212 |
213 // stage 2 - odd half | 213 // stage 2 - odd half |
214 step2[4] = step1[4] + step1[5]; | 214 step2[4] = WRAPLOW(step1[4] + step1[5], 8); |
215 step2[5] = step1[4] - step1[5]; | 215 step2[5] = WRAPLOW(step1[4] - step1[5], 8); |
216 step2[6] = -step1[6] + step1[7]; | 216 step2[6] = WRAPLOW(-step1[6] + step1[7], 8); |
217 step2[7] = step1[6] + step1[7]; | 217 step2[7] = WRAPLOW(step1[6] + step1[7], 8); |
218 | 218 |
219 // stage 3 -odd half | 219 // stage 3 -odd half |
220 step1[4] = step2[4]; | 220 step1[4] = step2[4]; |
221 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 221 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
222 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 222 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
223 step1[5] = dct_const_round_shift(temp1); | 223 step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); |
224 step1[6] = dct_const_round_shift(temp2); | 224 step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); |
225 step1[7] = step2[7]; | 225 step1[7] = step2[7]; |
226 | 226 |
227 // stage 4 | 227 // stage 4 |
228 output[0] = step1[0] + step1[7]; | 228 output[0] = WRAPLOW(step1[0] + step1[7], 8); |
229 output[1] = step1[1] + step1[6]; | 229 output[1] = WRAPLOW(step1[1] + step1[6], 8); |
230 output[2] = step1[2] + step1[5]; | 230 output[2] = WRAPLOW(step1[2] + step1[5], 8); |
231 output[3] = step1[3] + step1[4]; | 231 output[3] = WRAPLOW(step1[3] + step1[4], 8); |
232 output[4] = step1[3] - step1[4]; | 232 output[4] = WRAPLOW(step1[3] - step1[4], 8); |
233 output[5] = step1[2] - step1[5]; | 233 output[5] = WRAPLOW(step1[2] - step1[5], 8); |
234 output[6] = step1[1] - step1[6]; | 234 output[6] = WRAPLOW(step1[1] - step1[6], 8); |
235 output[7] = step1[0] - step1[7]; | 235 output[7] = WRAPLOW(step1[0] - step1[7], 8); |
236 } | 236 } |
237 | 237 |
238 void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 238 void vp9_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
239 tran_low_t out[8 * 8]; | 239 tran_low_t out[8 * 8]; |
240 tran_low_t *outptr = out; | 240 tran_low_t *outptr = out; |
241 int i, j; | 241 int i, j; |
242 tran_low_t temp_in[8], temp_out[8]; | 242 tran_low_t temp_in[8], temp_out[8]; |
243 | 243 |
244 // First transform rows | 244 // First transform rows |
245 for (i = 0; i < 8; ++i) { | 245 for (i = 0; i < 8; ++i) { |
246 idct8(input, outptr); | 246 idct8(input, outptr); |
247 input += 8; | 247 input += 8; |
248 outptr += 8; | 248 outptr += 8; |
249 } | 249 } |
250 | 250 |
251 // Then transform columns | 251 // Then transform columns |
252 for (i = 0; i < 8; ++i) { | 252 for (i = 0; i < 8; ++i) { |
253 for (j = 0; j < 8; ++j) | 253 for (j = 0; j < 8; ++j) |
254 temp_in[j] = out[j * 8 + i]; | 254 temp_in[j] = out[j * 8 + i]; |
255 idct8(temp_in, temp_out); | 255 idct8(temp_in, temp_out); |
256 for (j = 0; j < 8; ++j) | 256 for (j = 0; j < 8; ++j) { |
257 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 257 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
258 + dest[j * stride + i]); | 258 ROUND_POWER_OF_TWO(temp_out[j], 5)); |
| 259 } |
259 } | 260 } |
260 } | 261 } |
261 | 262 |
262 void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 263 void vp9_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
263 int i, j; | 264 int i, j; |
264 tran_high_t a1; | 265 tran_high_t a1; |
265 tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64); | 266 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
266 out = dct_const_round_shift(out * cospi_16_64); | 267 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
267 a1 = ROUND_POWER_OF_TWO(out, 5); | 268 a1 = ROUND_POWER_OF_TWO(out, 5); |
268 for (j = 0; j < 8; ++j) { | 269 for (j = 0; j < 8; ++j) { |
269 for (i = 0; i < 8; ++i) | 270 for (i = 0; i < 8; ++i) |
270 dest[i] = clip_pixel(dest[i] + a1); | 271 dest[i] = clip_pixel_add(dest[i], a1); |
271 dest += stride; | 272 dest += stride; |
272 } | 273 } |
273 } | 274 } |
274 | 275 |
275 static void iadst4(const tran_low_t *input, tran_low_t *output) { | 276 static void iadst4(const tran_low_t *input, tran_low_t *output) { |
276 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 277 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
277 | 278 |
278 tran_high_t x0 = input[0]; | 279 tran_high_t x0 = input[0]; |
279 tran_high_t x1 = input[1]; | 280 tran_high_t x1 = input[1]; |
280 tran_high_t x2 = input[2]; | 281 tran_high_t x2 = input[2]; |
(...skipping 20 matching lines...) Expand all Loading... |
301 | 302 |
302 s0 = x0 + x3; | 303 s0 = x0 + x3; |
303 s1 = x1 + x3; | 304 s1 = x1 + x3; |
304 s2 = x2; | 305 s2 = x2; |
305 s3 = x0 + x1 - x3; | 306 s3 = x0 + x1 - x3; |
306 | 307 |
307 // 1-D transform scaling factor is sqrt(2). | 308 // 1-D transform scaling factor is sqrt(2). |
308 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 309 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
309 // + 1b (addition) = 29b. | 310 // + 1b (addition) = 29b. |
310 // Hence the output bit depth is 15b. | 311 // Hence the output bit depth is 15b. |
311 output[0] = dct_const_round_shift(s0); | 312 output[0] = WRAPLOW(dct_const_round_shift(s0), 8); |
312 output[1] = dct_const_round_shift(s1); | 313 output[1] = WRAPLOW(dct_const_round_shift(s1), 8); |
313 output[2] = dct_const_round_shift(s2); | 314 output[2] = WRAPLOW(dct_const_round_shift(s2), 8); |
314 output[3] = dct_const_round_shift(s3); | 315 output[3] = WRAPLOW(dct_const_round_shift(s3), 8); |
315 } | 316 } |
316 | 317 |
317 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, | 318 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
318 int tx_type) { | 319 int tx_type) { |
319 const transform_2d IHT_4[] = { | 320 const transform_2d IHT_4[] = { |
320 { idct4, idct4 }, // DCT_DCT = 0 | 321 { idct4, idct4 }, // DCT_DCT = 0 |
321 { iadst4, idct4 }, // ADST_DCT = 1 | 322 { iadst4, idct4 }, // ADST_DCT = 1 |
322 { idct4, iadst4 }, // DCT_ADST = 2 | 323 { idct4, iadst4 }, // DCT_ADST = 2 |
323 { iadst4, iadst4 } // ADST_ADST = 3 | 324 { iadst4, iadst4 } // ADST_ADST = 3 |
324 }; | 325 }; |
325 | 326 |
326 int i, j; | 327 int i, j; |
327 tran_low_t out[4 * 4]; | 328 tran_low_t out[4 * 4]; |
328 tran_low_t *outptr = out; | 329 tran_low_t *outptr = out; |
329 tran_low_t temp_in[4], temp_out[4]; | 330 tran_low_t temp_in[4], temp_out[4]; |
330 | 331 |
331 // inverse transform row vectors | 332 // inverse transform row vectors |
332 for (i = 0; i < 4; ++i) { | 333 for (i = 0; i < 4; ++i) { |
333 IHT_4[tx_type].rows(input, outptr); | 334 IHT_4[tx_type].rows(input, outptr); |
334 input += 4; | 335 input += 4; |
335 outptr += 4; | 336 outptr += 4; |
336 } | 337 } |
337 | 338 |
338 // inverse transform column vectors | 339 // inverse transform column vectors |
339 for (i = 0; i < 4; ++i) { | 340 for (i = 0; i < 4; ++i) { |
340 for (j = 0; j < 4; ++j) | 341 for (j = 0; j < 4; ++j) |
341 temp_in[j] = out[j * 4 + i]; | 342 temp_in[j] = out[j * 4 + i]; |
342 IHT_4[tx_type].cols(temp_in, temp_out); | 343 IHT_4[tx_type].cols(temp_in, temp_out); |
343 for (j = 0; j < 4; ++j) | 344 for (j = 0; j < 4; ++j) { |
344 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) | 345 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
345 + dest[j * stride + i]); | 346 ROUND_POWER_OF_TWO(temp_out[j], 4)); |
| 347 } |
346 } | 348 } |
347 } | 349 } |
| 350 |
348 static void iadst8(const tran_low_t *input, tran_low_t *output) { | 351 static void iadst8(const tran_low_t *input, tran_low_t *output) { |
349 int s0, s1, s2, s3, s4, s5, s6, s7; | 352 int s0, s1, s2, s3, s4, s5, s6, s7; |
350 | 353 |
351 tran_high_t x0 = input[7]; | 354 tran_high_t x0 = input[7]; |
352 tran_high_t x1 = input[0]; | 355 tran_high_t x1 = input[0]; |
353 tran_high_t x2 = input[5]; | 356 tran_high_t x2 = input[5]; |
354 tran_high_t x3 = input[2]; | 357 tran_high_t x3 = input[2]; |
355 tran_high_t x4 = input[3]; | 358 tran_high_t x4 = input[3]; |
356 tran_high_t x5 = input[4]; | 359 tran_high_t x5 = input[4]; |
357 tran_high_t x6 = input[1]; | 360 tran_high_t x6 = input[1]; |
358 tran_high_t x7 = input[6]; | 361 tran_high_t x7 = input[6]; |
359 | 362 |
360 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { | 363 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
361 output[0] = output[1] = output[2] = output[3] = output[4] | 364 output[0] = output[1] = output[2] = output[3] = output[4] |
362 = output[5] = output[6] = output[7] = 0; | 365 = output[5] = output[6] = output[7] = 0; |
363 return; | 366 return; |
364 } | 367 } |
365 | 368 |
366 // stage 1 | 369 // stage 1 |
367 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 370 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
368 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 371 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
369 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 372 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
370 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 373 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
371 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 374 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
372 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 375 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
373 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 376 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
374 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 377 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
375 | 378 |
376 x0 = dct_const_round_shift(s0 + s4); | 379 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); |
377 x1 = dct_const_round_shift(s1 + s5); | 380 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); |
378 x2 = dct_const_round_shift(s2 + s6); | 381 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); |
379 x3 = dct_const_round_shift(s3 + s7); | 382 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); |
380 x4 = dct_const_round_shift(s0 - s4); | 383 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); |
381 x5 = dct_const_round_shift(s1 - s5); | 384 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); |
382 x6 = dct_const_round_shift(s2 - s6); | 385 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); |
383 x7 = dct_const_round_shift(s3 - s7); | 386 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); |
384 | 387 |
385 // stage 2 | 388 // stage 2 |
386 s0 = x0; | 389 s0 = x0; |
387 s1 = x1; | 390 s1 = x1; |
388 s2 = x2; | 391 s2 = x2; |
389 s3 = x3; | 392 s3 = x3; |
390 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 393 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
391 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 394 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
392 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; | 395 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; |
393 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 396 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
394 | 397 |
395 x0 = s0 + s2; | 398 x0 = WRAPLOW(s0 + s2, 8); |
396 x1 = s1 + s3; | 399 x1 = WRAPLOW(s1 + s3, 8); |
397 x2 = s0 - s2; | 400 x2 = WRAPLOW(s0 - s2, 8); |
398 x3 = s1 - s3; | 401 x3 = WRAPLOW(s1 - s3, 8); |
399 x4 = dct_const_round_shift(s4 + s6); | 402 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); |
400 x5 = dct_const_round_shift(s5 + s7); | 403 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); |
401 x6 = dct_const_round_shift(s4 - s6); | 404 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); |
402 x7 = dct_const_round_shift(s5 - s7); | 405 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); |
403 | 406 |
404 // stage 3 | 407 // stage 3 |
405 s2 = cospi_16_64 * (x2 + x3); | 408 s2 = cospi_16_64 * (x2 + x3); |
406 s3 = cospi_16_64 * (x2 - x3); | 409 s3 = cospi_16_64 * (x2 - x3); |
407 s6 = cospi_16_64 * (x6 + x7); | 410 s6 = cospi_16_64 * (x6 + x7); |
408 s7 = cospi_16_64 * (x6 - x7); | 411 s7 = cospi_16_64 * (x6 - x7); |
409 | 412 |
410 x2 = dct_const_round_shift(s2); | 413 x2 = WRAPLOW(dct_const_round_shift(s2), 8); |
411 x3 = dct_const_round_shift(s3); | 414 x3 = WRAPLOW(dct_const_round_shift(s3), 8); |
412 x6 = dct_const_round_shift(s6); | 415 x6 = WRAPLOW(dct_const_round_shift(s6), 8); |
413 x7 = dct_const_round_shift(s7); | 416 x7 = WRAPLOW(dct_const_round_shift(s7), 8); |
414 | 417 |
415 output[0] = x0; | 418 output[0] = WRAPLOW(x0, 8); |
416 output[1] = -x4; | 419 output[1] = WRAPLOW(-x4, 8); |
417 output[2] = x6; | 420 output[2] = WRAPLOW(x6, 8); |
418 output[3] = -x2; | 421 output[3] = WRAPLOW(-x2, 8); |
419 output[4] = x3; | 422 output[4] = WRAPLOW(x3, 8); |
420 output[5] = -x7; | 423 output[5] = WRAPLOW(-x7, 8); |
421 output[6] = x5; | 424 output[6] = WRAPLOW(x5, 8); |
422 output[7] = -x1; | 425 output[7] = WRAPLOW(-x1, 8); |
423 } | 426 } |
424 | 427 |
425 static const transform_2d IHT_8[] = { | 428 static const transform_2d IHT_8[] = { |
426 { idct8, idct8 }, // DCT_DCT = 0 | 429 { idct8, idct8 }, // DCT_DCT = 0 |
427 { iadst8, idct8 }, // ADST_DCT = 1 | 430 { iadst8, idct8 }, // ADST_DCT = 1 |
428 { idct8, iadst8 }, // DCT_ADST = 2 | 431 { idct8, iadst8 }, // DCT_ADST = 2 |
429 { iadst8, iadst8 } // ADST_ADST = 3 | 432 { iadst8, iadst8 } // ADST_ADST = 3 |
430 }; | 433 }; |
431 | 434 |
432 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, | 435 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
433 int tx_type) { | 436 int tx_type) { |
434 int i, j; | 437 int i, j; |
435 tran_low_t out[8 * 8]; | 438 tran_low_t out[8 * 8]; |
436 tran_low_t *outptr = out; | 439 tran_low_t *outptr = out; |
437 tran_low_t temp_in[8], temp_out[8]; | 440 tran_low_t temp_in[8], temp_out[8]; |
438 const transform_2d ht = IHT_8[tx_type]; | 441 const transform_2d ht = IHT_8[tx_type]; |
439 | 442 |
440 // inverse transform row vectors | 443 // inverse transform row vectors |
441 for (i = 0; i < 8; ++i) { | 444 for (i = 0; i < 8; ++i) { |
442 ht.rows(input, outptr); | 445 ht.rows(input, outptr); |
443 input += 8; | 446 input += 8; |
444 outptr += 8; | 447 outptr += 8; |
445 } | 448 } |
446 | 449 |
447 // inverse transform column vectors | 450 // inverse transform column vectors |
448 for (i = 0; i < 8; ++i) { | 451 for (i = 0; i < 8; ++i) { |
449 for (j = 0; j < 8; ++j) | 452 for (j = 0; j < 8; ++j) |
450 temp_in[j] = out[j * 8 + i]; | 453 temp_in[j] = out[j * 8 + i]; |
451 ht.cols(temp_in, temp_out); | 454 ht.cols(temp_in, temp_out); |
452 for (j = 0; j < 8; ++j) | 455 for (j = 0; j < 8; ++j) { |
453 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 456 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
454 + dest[j * stride + i]); | 457 ROUND_POWER_OF_TWO(temp_out[j], 5)); |
| 458 } |
455 } | 459 } |
456 } | 460 } |
457 | 461 |
458 void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 462 void vp9_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
459 tran_low_t out[8 * 8] = { 0 }; | 463 tran_low_t out[8 * 8] = { 0 }; |
460 tran_low_t *outptr = out; | 464 tran_low_t *outptr = out; |
461 int i, j; | 465 int i, j; |
462 tran_low_t temp_in[8], temp_out[8]; | 466 tran_low_t temp_in[8], temp_out[8]; |
463 | 467 |
464 // First transform rows | 468 // First transform rows |
465 // only first 4 row has non-zero coefs | 469 // only first 4 row has non-zero coefs |
466 for (i = 0; i < 4; ++i) { | 470 for (i = 0; i < 4; ++i) { |
467 idct8(input, outptr); | 471 idct8(input, outptr); |
468 input += 8; | 472 input += 8; |
469 outptr += 8; | 473 outptr += 8; |
470 } | 474 } |
471 | 475 |
472 // Then transform columns | 476 // Then transform columns |
473 for (i = 0; i < 8; ++i) { | 477 for (i = 0; i < 8; ++i) { |
474 for (j = 0; j < 8; ++j) | 478 for (j = 0; j < 8; ++j) |
475 temp_in[j] = out[j * 8 + i]; | 479 temp_in[j] = out[j * 8 + i]; |
476 idct8(temp_in, temp_out); | 480 idct8(temp_in, temp_out); |
477 for (j = 0; j < 8; ++j) | 481 for (j = 0; j < 8; ++j) { |
478 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) | 482 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
479 + dest[j * stride + i]); | 483 ROUND_POWER_OF_TWO(temp_out[j], 5)); |
| 484 } |
480 } | 485 } |
481 } | 486 } |
482 | 487 |
483 static void idct16(const tran_low_t *input, tran_low_t *output) { | 488 static void idct16(const tran_low_t *input, tran_low_t *output) { |
484 tran_low_t step1[16], step2[16]; | 489 tran_low_t step1[16], step2[16]; |
485 tran_high_t temp1, temp2; | 490 tran_high_t temp1, temp2; |
486 | 491 |
487 // stage 1 | 492 // stage 1 |
488 step1[0] = input[0/2]; | 493 step1[0] = input[0/2]; |
489 step1[1] = input[16/2]; | 494 step1[1] = input[16/2]; |
(...skipping 17 matching lines...) Expand all Loading... |
507 step2[1] = step1[1]; | 512 step2[1] = step1[1]; |
508 step2[2] = step1[2]; | 513 step2[2] = step1[2]; |
509 step2[3] = step1[3]; | 514 step2[3] = step1[3]; |
510 step2[4] = step1[4]; | 515 step2[4] = step1[4]; |
511 step2[5] = step1[5]; | 516 step2[5] = step1[5]; |
512 step2[6] = step1[6]; | 517 step2[6] = step1[6]; |
513 step2[7] = step1[7]; | 518 step2[7] = step1[7]; |
514 | 519 |
515 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 520 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
516 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 521 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
517 step2[8] = dct_const_round_shift(temp1); | 522 step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); |
518 step2[15] = dct_const_round_shift(temp2); | 523 step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); |
519 | 524 |
520 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 525 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
521 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 526 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
522 step2[9] = dct_const_round_shift(temp1); | 527 step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); |
523 step2[14] = dct_const_round_shift(temp2); | 528 step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); |
524 | 529 |
525 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 530 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
526 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 531 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
527 step2[10] = dct_const_round_shift(temp1); | 532 step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); |
528 step2[13] = dct_const_round_shift(temp2); | 533 step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); |
529 | 534 |
530 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 535 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
531 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 536 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
532 step2[11] = dct_const_round_shift(temp1); | 537 step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); |
533 step2[12] = dct_const_round_shift(temp2); | 538 step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); |
534 | 539 |
535 // stage 3 | 540 // stage 3 |
536 step1[0] = step2[0]; | 541 step1[0] = step2[0]; |
537 step1[1] = step2[1]; | 542 step1[1] = step2[1]; |
538 step1[2] = step2[2]; | 543 step1[2] = step2[2]; |
539 step1[3] = step2[3]; | 544 step1[3] = step2[3]; |
540 | 545 |
541 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 546 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
542 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 547 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
543 step1[4] = dct_const_round_shift(temp1); | 548 step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); |
544 step1[7] = dct_const_round_shift(temp2); | 549 step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); |
545 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 550 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
546 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 551 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
547 step1[5] = dct_const_round_shift(temp1); | 552 step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); |
548 step1[6] = dct_const_round_shift(temp2); | 553 step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); |
549 | 554 |
550 step1[8] = step2[8] + step2[9]; | 555 step1[8] = WRAPLOW(step2[8] + step2[9], 8); |
551 step1[9] = step2[8] - step2[9]; | 556 step1[9] = WRAPLOW(step2[8] - step2[9], 8); |
552 step1[10] = -step2[10] + step2[11]; | 557 step1[10] = WRAPLOW(-step2[10] + step2[11], 8); |
553 step1[11] = step2[10] + step2[11]; | 558 step1[11] = WRAPLOW(step2[10] + step2[11], 8); |
554 step1[12] = step2[12] + step2[13]; | 559 step1[12] = WRAPLOW(step2[12] + step2[13], 8); |
555 step1[13] = step2[12] - step2[13]; | 560 step1[13] = WRAPLOW(step2[12] - step2[13], 8); |
556 step1[14] = -step2[14] + step2[15]; | 561 step1[14] = WRAPLOW(-step2[14] + step2[15], 8); |
557 step1[15] = step2[14] + step2[15]; | 562 step1[15] = WRAPLOW(step2[14] + step2[15], 8); |
558 | 563 |
559 // stage 4 | 564 // stage 4 |
560 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 565 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
561 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 566 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
562 step2[0] = dct_const_round_shift(temp1); | 567 step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); |
563 step2[1] = dct_const_round_shift(temp2); | 568 step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); |
564 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 569 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
565 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 570 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
566 step2[2] = dct_const_round_shift(temp1); | 571 step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); |
567 step2[3] = dct_const_round_shift(temp2); | 572 step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); |
568 step2[4] = step1[4] + step1[5]; | 573 step2[4] = WRAPLOW(step1[4] + step1[5], 8); |
569 step2[5] = step1[4] - step1[5]; | 574 step2[5] = WRAPLOW(step1[4] - step1[5], 8); |
570 step2[6] = -step1[6] + step1[7]; | 575 step2[6] = WRAPLOW(-step1[6] + step1[7], 8); |
571 step2[7] = step1[6] + step1[7]; | 576 step2[7] = WRAPLOW(step1[6] + step1[7], 8); |
572 | 577 |
573 step2[8] = step1[8]; | 578 step2[8] = step1[8]; |
574 step2[15] = step1[15]; | 579 step2[15] = step1[15]; |
575 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 580 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
576 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 581 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
577 step2[9] = dct_const_round_shift(temp1); | 582 step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); |
578 step2[14] = dct_const_round_shift(temp2); | 583 step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); |
579 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 584 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
580 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 585 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
581 step2[10] = dct_const_round_shift(temp1); | 586 step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); |
582 step2[13] = dct_const_round_shift(temp2); | 587 step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); |
583 step2[11] = step1[11]; | 588 step2[11] = step1[11]; |
584 step2[12] = step1[12]; | 589 step2[12] = step1[12]; |
585 | 590 |
586 // stage 5 | 591 // stage 5 |
587 step1[0] = step2[0] + step2[3]; | 592 step1[0] = WRAPLOW(step2[0] + step2[3], 8); |
588 step1[1] = step2[1] + step2[2]; | 593 step1[1] = WRAPLOW(step2[1] + step2[2], 8); |
589 step1[2] = step2[1] - step2[2]; | 594 step1[2] = WRAPLOW(step2[1] - step2[2], 8); |
590 step1[3] = step2[0] - step2[3]; | 595 step1[3] = WRAPLOW(step2[0] - step2[3], 8); |
591 step1[4] = step2[4]; | 596 step1[4] = step2[4]; |
592 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 597 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
593 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 598 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
594 step1[5] = dct_const_round_shift(temp1); | 599 step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); |
595 step1[6] = dct_const_round_shift(temp2); | 600 step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); |
596 step1[7] = step2[7]; | 601 step1[7] = step2[7]; |
597 | 602 |
598 step1[8] = step2[8] + step2[11]; | 603 step1[8] = WRAPLOW(step2[8] + step2[11], 8); |
599 step1[9] = step2[9] + step2[10]; | 604 step1[9] = WRAPLOW(step2[9] + step2[10], 8); |
600 step1[10] = step2[9] - step2[10]; | 605 step1[10] = WRAPLOW(step2[9] - step2[10], 8); |
601 step1[11] = step2[8] - step2[11]; | 606 step1[11] = WRAPLOW(step2[8] - step2[11], 8); |
602 step1[12] = -step2[12] + step2[15]; | 607 step1[12] = WRAPLOW(-step2[12] + step2[15], 8); |
603 step1[13] = -step2[13] + step2[14]; | 608 step1[13] = WRAPLOW(-step2[13] + step2[14], 8); |
604 step1[14] = step2[13] + step2[14]; | 609 step1[14] = WRAPLOW(step2[13] + step2[14], 8); |
605 step1[15] = step2[12] + step2[15]; | 610 step1[15] = WRAPLOW(step2[12] + step2[15], 8); |
606 | 611 |
607 // stage 6 | 612 // stage 6 |
608 step2[0] = step1[0] + step1[7]; | 613 step2[0] = WRAPLOW(step1[0] + step1[7], 8); |
609 step2[1] = step1[1] + step1[6]; | 614 step2[1] = WRAPLOW(step1[1] + step1[6], 8); |
610 step2[2] = step1[2] + step1[5]; | 615 step2[2] = WRAPLOW(step1[2] + step1[5], 8); |
611 step2[3] = step1[3] + step1[4]; | 616 step2[3] = WRAPLOW(step1[3] + step1[4], 8); |
612 step2[4] = step1[3] - step1[4]; | 617 step2[4] = WRAPLOW(step1[3] - step1[4], 8); |
613 step2[5] = step1[2] - step1[5]; | 618 step2[5] = WRAPLOW(step1[2] - step1[5], 8); |
614 step2[6] = step1[1] - step1[6]; | 619 step2[6] = WRAPLOW(step1[1] - step1[6], 8); |
615 step2[7] = step1[0] - step1[7]; | 620 step2[7] = WRAPLOW(step1[0] - step1[7], 8); |
616 step2[8] = step1[8]; | 621 step2[8] = step1[8]; |
617 step2[9] = step1[9]; | 622 step2[9] = step1[9]; |
618 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 623 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
619 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 624 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
620 step2[10] = dct_const_round_shift(temp1); | 625 step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); |
621 step2[13] = dct_const_round_shift(temp2); | 626 step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); |
622 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 627 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
623 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 628 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
624 step2[11] = dct_const_round_shift(temp1); | 629 step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); |
625 step2[12] = dct_const_round_shift(temp2); | 630 step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); |
626 step2[14] = step1[14]; | 631 step2[14] = step1[14]; |
627 step2[15] = step1[15]; | 632 step2[15] = step1[15]; |
628 | 633 |
629 // stage 7 | 634 // stage 7 |
630 output[0] = step2[0] + step2[15]; | 635 output[0] = WRAPLOW(step2[0] + step2[15], 8); |
631 output[1] = step2[1] + step2[14]; | 636 output[1] = WRAPLOW(step2[1] + step2[14], 8); |
632 output[2] = step2[2] + step2[13]; | 637 output[2] = WRAPLOW(step2[2] + step2[13], 8); |
633 output[3] = step2[3] + step2[12]; | 638 output[3] = WRAPLOW(step2[3] + step2[12], 8); |
634 output[4] = step2[4] + step2[11]; | 639 output[4] = WRAPLOW(step2[4] + step2[11], 8); |
635 output[5] = step2[5] + step2[10]; | 640 output[5] = WRAPLOW(step2[5] + step2[10], 8); |
636 output[6] = step2[6] + step2[9]; | 641 output[6] = WRAPLOW(step2[6] + step2[9], 8); |
637 output[7] = step2[7] + step2[8]; | 642 output[7] = WRAPLOW(step2[7] + step2[8], 8); |
638 output[8] = step2[7] - step2[8]; | 643 output[8] = WRAPLOW(step2[7] - step2[8], 8); |
639 output[9] = step2[6] - step2[9]; | 644 output[9] = WRAPLOW(step2[6] - step2[9], 8); |
640 output[10] = step2[5] - step2[10]; | 645 output[10] = WRAPLOW(step2[5] - step2[10], 8); |
641 output[11] = step2[4] - step2[11]; | 646 output[11] = WRAPLOW(step2[4] - step2[11], 8); |
642 output[12] = step2[3] - step2[12]; | 647 output[12] = WRAPLOW(step2[3] - step2[12], 8); |
643 output[13] = step2[2] - step2[13]; | 648 output[13] = WRAPLOW(step2[2] - step2[13], 8); |
644 output[14] = step2[1] - step2[14]; | 649 output[14] = WRAPLOW(step2[1] - step2[14], 8); |
645 output[15] = step2[0] - step2[15]; | 650 output[15] = WRAPLOW(step2[0] - step2[15], 8); |
646 } | 651 } |
647 | 652 |
648 void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, | 653 void vp9_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, |
649 int stride) { | 654 int stride) { |
650 tran_low_t out[16 * 16]; | 655 tran_low_t out[16 * 16]; |
651 tran_low_t *outptr = out; | 656 tran_low_t *outptr = out; |
652 int i, j; | 657 int i, j; |
653 tran_low_t temp_in[16], temp_out[16]; | 658 tran_low_t temp_in[16], temp_out[16]; |
654 | 659 |
655 // First transform rows | 660 // First transform rows |
656 for (i = 0; i < 16; ++i) { | 661 for (i = 0; i < 16; ++i) { |
657 idct16(input, outptr); | 662 idct16(input, outptr); |
658 input += 16; | 663 input += 16; |
659 outptr += 16; | 664 outptr += 16; |
660 } | 665 } |
661 | 666 |
662 // Then transform columns | 667 // Then transform columns |
663 for (i = 0; i < 16; ++i) { | 668 for (i = 0; i < 16; ++i) { |
664 for (j = 0; j < 16; ++j) | 669 for (j = 0; j < 16; ++j) |
665 temp_in[j] = out[j * 16 + i]; | 670 temp_in[j] = out[j * 16 + i]; |
666 idct16(temp_in, temp_out); | 671 idct16(temp_in, temp_out); |
667 for (j = 0; j < 16; ++j) | 672 for (j = 0; j < 16; ++j) { |
668 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 673 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
669 + dest[j * stride + i]); | 674 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 675 } |
670 } | 676 } |
671 } | 677 } |
672 | 678 |
673 static void iadst16(const tran_low_t *input, tran_low_t *output) { | 679 static void iadst16(const tran_low_t *input, tran_low_t *output) { |
674 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 680 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
675 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 681 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
676 | 682 |
677 tran_high_t x0 = input[15]; | 683 tran_high_t x0 = input[15]; |
678 tran_high_t x1 = input[0]; | 684 tran_high_t x1 = input[0]; |
679 tran_high_t x2 = input[13]; | 685 tran_high_t x2 = input[13]; |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
711 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; | 717 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
712 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; | 718 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
713 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; | 719 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
714 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; | 720 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
715 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; | 721 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
716 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; | 722 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
717 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; | 723 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
718 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; | 724 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
719 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; | 725 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
720 | 726 |
721 x0 = dct_const_round_shift(s0 + s8); | 727 x0 = WRAPLOW(dct_const_round_shift(s0 + s8), 8); |
722 x1 = dct_const_round_shift(s1 + s9); | 728 x1 = WRAPLOW(dct_const_round_shift(s1 + s9), 8); |
723 x2 = dct_const_round_shift(s2 + s10); | 729 x2 = WRAPLOW(dct_const_round_shift(s2 + s10), 8); |
724 x3 = dct_const_round_shift(s3 + s11); | 730 x3 = WRAPLOW(dct_const_round_shift(s3 + s11), 8); |
725 x4 = dct_const_round_shift(s4 + s12); | 731 x4 = WRAPLOW(dct_const_round_shift(s4 + s12), 8); |
726 x5 = dct_const_round_shift(s5 + s13); | 732 x5 = WRAPLOW(dct_const_round_shift(s5 + s13), 8); |
727 x6 = dct_const_round_shift(s6 + s14); | 733 x6 = WRAPLOW(dct_const_round_shift(s6 + s14), 8); |
728 x7 = dct_const_round_shift(s7 + s15); | 734 x7 = WRAPLOW(dct_const_round_shift(s7 + s15), 8); |
729 x8 = dct_const_round_shift(s0 - s8); | 735 x8 = WRAPLOW(dct_const_round_shift(s0 - s8), 8); |
730 x9 = dct_const_round_shift(s1 - s9); | 736 x9 = WRAPLOW(dct_const_round_shift(s1 - s9), 8); |
731 x10 = dct_const_round_shift(s2 - s10); | 737 x10 = WRAPLOW(dct_const_round_shift(s2 - s10), 8); |
732 x11 = dct_const_round_shift(s3 - s11); | 738 x11 = WRAPLOW(dct_const_round_shift(s3 - s11), 8); |
733 x12 = dct_const_round_shift(s4 - s12); | 739 x12 = WRAPLOW(dct_const_round_shift(s4 - s12), 8); |
734 x13 = dct_const_round_shift(s5 - s13); | 740 x13 = WRAPLOW(dct_const_round_shift(s5 - s13), 8); |
735 x14 = dct_const_round_shift(s6 - s14); | 741 x14 = WRAPLOW(dct_const_round_shift(s6 - s14), 8); |
736 x15 = dct_const_round_shift(s7 - s15); | 742 x15 = WRAPLOW(dct_const_round_shift(s7 - s15), 8); |
737 | 743 |
738 // stage 2 | 744 // stage 2 |
739 s0 = x0; | 745 s0 = x0; |
740 s1 = x1; | 746 s1 = x1; |
741 s2 = x2; | 747 s2 = x2; |
742 s3 = x3; | 748 s3 = x3; |
743 s4 = x4; | 749 s4 = x4; |
744 s5 = x5; | 750 s5 = x5; |
745 s6 = x6; | 751 s6 = x6; |
746 s7 = x7; | 752 s7 = x7; |
747 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; | 753 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
748 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; | 754 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
749 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; | 755 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
750 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; | 756 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
751 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; | 757 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; |
752 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; | 758 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
753 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; | 759 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; |
754 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; | 760 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
755 | 761 |
756 x0 = s0 + s4; | 762 x0 = WRAPLOW(s0 + s4, 8); |
757 x1 = s1 + s5; | 763 x1 = WRAPLOW(s1 + s5, 8); |
758 x2 = s2 + s6; | 764 x2 = WRAPLOW(s2 + s6, 8); |
759 x3 = s3 + s7; | 765 x3 = WRAPLOW(s3 + s7, 8); |
760 x4 = s0 - s4; | 766 x4 = WRAPLOW(s0 - s4, 8); |
761 x5 = s1 - s5; | 767 x5 = WRAPLOW(s1 - s5, 8); |
762 x6 = s2 - s6; | 768 x6 = WRAPLOW(s2 - s6, 8); |
763 x7 = s3 - s7; | 769 x7 = WRAPLOW(s3 - s7, 8); |
764 x8 = dct_const_round_shift(s8 + s12); | 770 x8 = WRAPLOW(dct_const_round_shift(s8 + s12), 8); |
765 x9 = dct_const_round_shift(s9 + s13); | 771 x9 = WRAPLOW(dct_const_round_shift(s9 + s13), 8); |
766 x10 = dct_const_round_shift(s10 + s14); | 772 x10 = WRAPLOW(dct_const_round_shift(s10 + s14), 8); |
767 x11 = dct_const_round_shift(s11 + s15); | 773 x11 = WRAPLOW(dct_const_round_shift(s11 + s15), 8); |
768 x12 = dct_const_round_shift(s8 - s12); | 774 x12 = WRAPLOW(dct_const_round_shift(s8 - s12), 8); |
769 x13 = dct_const_round_shift(s9 - s13); | 775 x13 = WRAPLOW(dct_const_round_shift(s9 - s13), 8); |
770 x14 = dct_const_round_shift(s10 - s14); | 776 x14 = WRAPLOW(dct_const_round_shift(s10 - s14), 8); |
771 x15 = dct_const_round_shift(s11 - s15); | 777 x15 = WRAPLOW(dct_const_round_shift(s11 - s15), 8); |
772 | 778 |
773 // stage 3 | 779 // stage 3 |
774 s0 = x0; | 780 s0 = x0; |
775 s1 = x1; | 781 s1 = x1; |
776 s2 = x2; | 782 s2 = x2; |
777 s3 = x3; | 783 s3 = x3; |
778 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; | 784 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
779 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; | 785 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
780 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; | 786 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; |
781 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; | 787 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
782 s8 = x8; | 788 s8 = x8; |
783 s9 = x9; | 789 s9 = x9; |
784 s10 = x10; | 790 s10 = x10; |
785 s11 = x11; | 791 s11 = x11; |
786 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; | 792 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
787 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; | 793 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
788 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; | 794 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; |
789 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; | 795 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
790 | 796 |
791 x0 = s0 + s2; | 797 x0 = WRAPLOW(check_range(s0 + s2), 8); |
792 x1 = s1 + s3; | 798 x1 = WRAPLOW(check_range(s1 + s3), 8); |
793 x2 = s0 - s2; | 799 x2 = WRAPLOW(check_range(s0 - s2), 8); |
794 x3 = s1 - s3; | 800 x3 = WRAPLOW(check_range(s1 - s3), 8); |
795 x4 = dct_const_round_shift(s4 + s6); | 801 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); |
796 x5 = dct_const_round_shift(s5 + s7); | 802 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); |
797 x6 = dct_const_round_shift(s4 - s6); | 803 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); |
798 x7 = dct_const_round_shift(s5 - s7); | 804 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); |
799 x8 = s8 + s10; | 805 x8 = WRAPLOW(check_range(s8 + s10), 8); |
800 x9 = s9 + s11; | 806 x9 = WRAPLOW(check_range(s9 + s11), 8); |
801 x10 = s8 - s10; | 807 x10 = WRAPLOW(check_range(s8 - s10), 8); |
802 x11 = s9 - s11; | 808 x11 = WRAPLOW(check_range(s9 - s11), 8); |
803 x12 = dct_const_round_shift(s12 + s14); | 809 x12 = WRAPLOW(dct_const_round_shift(s12 + s14), 8); |
804 x13 = dct_const_round_shift(s13 + s15); | 810 x13 = WRAPLOW(dct_const_round_shift(s13 + s15), 8); |
805 x14 = dct_const_round_shift(s12 - s14); | 811 x14 = WRAPLOW(dct_const_round_shift(s12 - s14), 8); |
806 x15 = dct_const_round_shift(s13 - s15); | 812 x15 = WRAPLOW(dct_const_round_shift(s13 - s15), 8); |
807 | 813 |
808 // stage 4 | 814 // stage 4 |
809 s2 = (- cospi_16_64) * (x2 + x3); | 815 s2 = (- cospi_16_64) * (x2 + x3); |
810 s3 = cospi_16_64 * (x2 - x3); | 816 s3 = cospi_16_64 * (x2 - x3); |
811 s6 = cospi_16_64 * (x6 + x7); | 817 s6 = cospi_16_64 * (x6 + x7); |
812 s7 = cospi_16_64 * (- x6 + x7); | 818 s7 = cospi_16_64 * (- x6 + x7); |
813 s10 = cospi_16_64 * (x10 + x11); | 819 s10 = cospi_16_64 * (x10 + x11); |
814 s11 = cospi_16_64 * (- x10 + x11); | 820 s11 = cospi_16_64 * (- x10 + x11); |
815 s14 = (- cospi_16_64) * (x14 + x15); | 821 s14 = (- cospi_16_64) * (x14 + x15); |
816 s15 = cospi_16_64 * (x14 - x15); | 822 s15 = cospi_16_64 * (x14 - x15); |
817 | 823 |
818 x2 = dct_const_round_shift(s2); | 824 x2 = WRAPLOW(dct_const_round_shift(s2), 8); |
819 x3 = dct_const_round_shift(s3); | 825 x3 = WRAPLOW(dct_const_round_shift(s3), 8); |
820 x6 = dct_const_round_shift(s6); | 826 x6 = WRAPLOW(dct_const_round_shift(s6), 8); |
821 x7 = dct_const_round_shift(s7); | 827 x7 = WRAPLOW(dct_const_round_shift(s7), 8); |
822 x10 = dct_const_round_shift(s10); | 828 x10 = WRAPLOW(dct_const_round_shift(s10), 8); |
823 x11 = dct_const_round_shift(s11); | 829 x11 = WRAPLOW(dct_const_round_shift(s11), 8); |
824 x14 = dct_const_round_shift(s14); | 830 x14 = WRAPLOW(dct_const_round_shift(s14), 8); |
825 x15 = dct_const_round_shift(s15); | 831 x15 = WRAPLOW(dct_const_round_shift(s15), 8); |
826 | 832 |
827 output[0] = x0; | 833 output[0] = WRAPLOW(x0, 8); |
828 output[1] = -x8; | 834 output[1] = WRAPLOW(-x8, 8); |
829 output[2] = x12; | 835 output[2] = WRAPLOW(x12, 8); |
830 output[3] = -x4; | 836 output[3] = WRAPLOW(-x4, 8); |
831 output[4] = x6; | 837 output[4] = WRAPLOW(x6, 8); |
832 output[5] = x14; | 838 output[5] = WRAPLOW(x14, 8); |
833 output[6] = x10; | 839 output[6] = WRAPLOW(x10, 8); |
834 output[7] = x2; | 840 output[7] = WRAPLOW(x2, 8); |
835 output[8] = x3; | 841 output[8] = WRAPLOW(x3, 8); |
836 output[9] = x11; | 842 output[9] = WRAPLOW(x11, 8); |
837 output[10] = x15; | 843 output[10] = WRAPLOW(x15, 8); |
838 output[11] = x7; | 844 output[11] = WRAPLOW(x7, 8); |
839 output[12] = x5; | 845 output[12] = WRAPLOW(x5, 8); |
840 output[13] = -x13; | 846 output[13] = WRAPLOW(-x13, 8); |
841 output[14] = x9; | 847 output[14] = WRAPLOW(x9, 8); |
842 output[15] = -x1; | 848 output[15] = WRAPLOW(-x1, 8); |
843 } | 849 } |
844 | 850 |
845 static const transform_2d IHT_16[] = { | 851 static const transform_2d IHT_16[] = { |
846 { idct16, idct16 }, // DCT_DCT = 0 | 852 { idct16, idct16 }, // DCT_DCT = 0 |
847 { iadst16, idct16 }, // ADST_DCT = 1 | 853 { iadst16, idct16 }, // ADST_DCT = 1 |
848 { idct16, iadst16 }, // DCT_ADST = 2 | 854 { idct16, iadst16 }, // DCT_ADST = 2 |
849 { iadst16, iadst16 } // ADST_ADST = 3 | 855 { iadst16, iadst16 } // ADST_ADST = 3 |
850 }; | 856 }; |
851 | 857 |
852 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, | 858 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
853 int tx_type) { | 859 int tx_type) { |
854 int i, j; | 860 int i, j; |
855 tran_low_t out[16 * 16]; | 861 tran_low_t out[16 * 16]; |
856 tran_low_t *outptr = out; | 862 tran_low_t *outptr = out; |
857 tran_low_t temp_in[16], temp_out[16]; | 863 tran_low_t temp_in[16], temp_out[16]; |
858 const transform_2d ht = IHT_16[tx_type]; | 864 const transform_2d ht = IHT_16[tx_type]; |
859 | 865 |
860 // Rows | 866 // Rows |
861 for (i = 0; i < 16; ++i) { | 867 for (i = 0; i < 16; ++i) { |
862 ht.rows(input, outptr); | 868 ht.rows(input, outptr); |
863 input += 16; | 869 input += 16; |
864 outptr += 16; | 870 outptr += 16; |
865 } | 871 } |
866 | 872 |
867 // Columns | 873 // Columns |
868 for (i = 0; i < 16; ++i) { | 874 for (i = 0; i < 16; ++i) { |
869 for (j = 0; j < 16; ++j) | 875 for (j = 0; j < 16; ++j) |
870 temp_in[j] = out[j * 16 + i]; | 876 temp_in[j] = out[j * 16 + i]; |
871 ht.cols(temp_in, temp_out); | 877 ht.cols(temp_in, temp_out); |
872 for (j = 0; j < 16; ++j) | 878 for (j = 0; j < 16; ++j) { |
873 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 879 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
874 + dest[j * stride + i]); | 880 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 881 } |
875 } | 882 } |
876 } | 883 } |
877 | 884 |
878 void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, | 885 void vp9_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, |
879 int stride) { | 886 int stride) { |
880 tran_low_t out[16 * 16] = { 0 }; | 887 tran_low_t out[16 * 16] = { 0 }; |
881 tran_low_t *outptr = out; | 888 tran_low_t *outptr = out; |
882 int i, j; | 889 int i, j; |
883 tran_low_t temp_in[16], temp_out[16]; | 890 tran_low_t temp_in[16], temp_out[16]; |
884 | 891 |
885 // First transform rows. Since all non-zero dct coefficients are in | 892 // First transform rows. Since all non-zero dct coefficients are in |
886 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 893 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
887 for (i = 0; i < 4; ++i) { | 894 for (i = 0; i < 4; ++i) { |
888 idct16(input, outptr); | 895 idct16(input, outptr); |
889 input += 16; | 896 input += 16; |
890 outptr += 16; | 897 outptr += 16; |
891 } | 898 } |
892 | 899 |
893 // Then transform columns | 900 // Then transform columns |
894 for (i = 0; i < 16; ++i) { | 901 for (i = 0; i < 16; ++i) { |
895 for (j = 0; j < 16; ++j) | 902 for (j = 0; j < 16; ++j) |
896 temp_in[j] = out[j*16 + i]; | 903 temp_in[j] = out[j*16 + i]; |
897 idct16(temp_in, temp_out); | 904 idct16(temp_in, temp_out); |
898 for (j = 0; j < 16; ++j) | 905 for (j = 0; j < 16; ++j) { |
899 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 906 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
900 + dest[j * stride + i]); | 907 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 908 } |
901 } | 909 } |
902 } | 910 } |
903 | 911 |
904 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 912 void vp9_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
905 int i, j; | 913 int i, j; |
906 tran_high_t a1; | 914 tran_high_t a1; |
907 tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64); | 915 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
908 out = dct_const_round_shift(out * cospi_16_64); | 916 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
909 a1 = ROUND_POWER_OF_TWO(out, 6); | 917 a1 = ROUND_POWER_OF_TWO(out, 6); |
910 for (j = 0; j < 16; ++j) { | 918 for (j = 0; j < 16; ++j) { |
911 for (i = 0; i < 16; ++i) | 919 for (i = 0; i < 16; ++i) |
912 dest[i] = clip_pixel(dest[i] + a1); | 920 dest[i] = clip_pixel_add(dest[i], a1); |
913 dest += stride; | 921 dest += stride; |
914 } | 922 } |
915 } | 923 } |
916 | 924 |
917 static void idct32(const tran_low_t *input, tran_low_t *output) { | 925 static void idct32(const tran_low_t *input, tran_low_t *output) { |
918 tran_low_t step1[32], step2[32]; | 926 tran_low_t step1[32], step2[32]; |
919 tran_high_t temp1, temp2; | 927 tran_high_t temp1, temp2; |
920 | 928 |
921 // stage 1 | 929 // stage 1 |
922 step1[0] = input[0]; | 930 step1[0] = input[0]; |
923 step1[1] = input[16]; | 931 step1[1] = input[16]; |
924 step1[2] = input[8]; | 932 step1[2] = input[8]; |
925 step1[3] = input[24]; | 933 step1[3] = input[24]; |
926 step1[4] = input[4]; | 934 step1[4] = input[4]; |
927 step1[5] = input[20]; | 935 step1[5] = input[20]; |
928 step1[6] = input[12]; | 936 step1[6] = input[12]; |
929 step1[7] = input[28]; | 937 step1[7] = input[28]; |
930 step1[8] = input[2]; | 938 step1[8] = input[2]; |
931 step1[9] = input[18]; | 939 step1[9] = input[18]; |
932 step1[10] = input[10]; | 940 step1[10] = input[10]; |
933 step1[11] = input[26]; | 941 step1[11] = input[26]; |
934 step1[12] = input[6]; | 942 step1[12] = input[6]; |
935 step1[13] = input[22]; | 943 step1[13] = input[22]; |
936 step1[14] = input[14]; | 944 step1[14] = input[14]; |
937 step1[15] = input[30]; | 945 step1[15] = input[30]; |
938 | 946 |
939 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; | 947 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; |
940 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; | 948 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; |
941 step1[16] = dct_const_round_shift(temp1); | 949 step1[16] = WRAPLOW(dct_const_round_shift(temp1), 8); |
942 step1[31] = dct_const_round_shift(temp2); | 950 step1[31] = WRAPLOW(dct_const_round_shift(temp2), 8); |
943 | 951 |
944 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; | 952 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; |
945 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; | 953 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; |
946 step1[17] = dct_const_round_shift(temp1); | 954 step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8); |
947 step1[30] = dct_const_round_shift(temp2); | 955 step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8); |
948 | 956 |
949 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; | 957 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; |
950 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; | 958 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; |
951 step1[18] = dct_const_round_shift(temp1); | 959 step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); |
952 step1[29] = dct_const_round_shift(temp2); | 960 step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); |
953 | 961 |
954 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; | 962 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; |
955 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; | 963 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; |
956 step1[19] = dct_const_round_shift(temp1); | 964 step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8); |
957 step1[28] = dct_const_round_shift(temp2); | 965 step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8); |
958 | 966 |
959 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; | 967 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; |
960 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; | 968 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; |
961 step1[20] = dct_const_round_shift(temp1); | 969 step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); |
962 step1[27] = dct_const_round_shift(temp2); | 970 step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); |
963 | 971 |
964 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; | 972 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; |
965 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; | 973 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; |
966 step1[21] = dct_const_round_shift(temp1); | 974 step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); |
967 step1[26] = dct_const_round_shift(temp2); | 975 step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); |
968 | 976 |
969 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; | 977 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; |
970 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; | 978 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; |
971 step1[22] = dct_const_round_shift(temp1); | 979 step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); |
972 step1[25] = dct_const_round_shift(temp2); | 980 step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); |
973 | 981 |
974 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; | 982 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; |
975 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; | 983 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; |
976 step1[23] = dct_const_round_shift(temp1); | 984 step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8); |
977 step1[24] = dct_const_round_shift(temp2); | 985 step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8); |
978 | 986 |
979 // stage 2 | 987 // stage 2 |
980 step2[0] = step1[0]; | 988 step2[0] = step1[0]; |
981 step2[1] = step1[1]; | 989 step2[1] = step1[1]; |
982 step2[2] = step1[2]; | 990 step2[2] = step1[2]; |
983 step2[3] = step1[3]; | 991 step2[3] = step1[3]; |
984 step2[4] = step1[4]; | 992 step2[4] = step1[4]; |
985 step2[5] = step1[5]; | 993 step2[5] = step1[5]; |
986 step2[6] = step1[6]; | 994 step2[6] = step1[6]; |
987 step2[7] = step1[7]; | 995 step2[7] = step1[7]; |
988 | 996 |
989 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 997 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
990 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 998 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
991 step2[8] = dct_const_round_shift(temp1); | 999 step2[8] = WRAPLOW(dct_const_round_shift(temp1), 8); |
992 step2[15] = dct_const_round_shift(temp2); | 1000 step2[15] = WRAPLOW(dct_const_round_shift(temp2), 8); |
993 | 1001 |
994 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 1002 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
995 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 1003 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
996 step2[9] = dct_const_round_shift(temp1); | 1004 step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); |
997 step2[14] = dct_const_round_shift(temp2); | 1005 step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); |
998 | 1006 |
999 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 1007 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
1000 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 1008 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
1001 step2[10] = dct_const_round_shift(temp1); | 1009 step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1002 step2[13] = dct_const_round_shift(temp2); | 1010 step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1003 | 1011 |
1004 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 1012 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
1005 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 1013 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
1006 step2[11] = dct_const_round_shift(temp1); | 1014 step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1007 step2[12] = dct_const_round_shift(temp2); | 1015 step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1008 | 1016 |
1009 step2[16] = step1[16] + step1[17]; | 1017 step2[16] = WRAPLOW(step1[16] + step1[17], 8); |
1010 step2[17] = step1[16] - step1[17]; | 1018 step2[17] = WRAPLOW(step1[16] - step1[17], 8); |
1011 step2[18] = -step1[18] + step1[19]; | 1019 step2[18] = WRAPLOW(-step1[18] + step1[19], 8); |
1012 step2[19] = step1[18] + step1[19]; | 1020 step2[19] = WRAPLOW(step1[18] + step1[19], 8); |
1013 step2[20] = step1[20] + step1[21]; | 1021 step2[20] = WRAPLOW(step1[20] + step1[21], 8); |
1014 step2[21] = step1[20] - step1[21]; | 1022 step2[21] = WRAPLOW(step1[20] - step1[21], 8); |
1015 step2[22] = -step1[22] + step1[23]; | 1023 step2[22] = WRAPLOW(-step1[22] + step1[23], 8); |
1016 step2[23] = step1[22] + step1[23]; | 1024 step2[23] = WRAPLOW(step1[22] + step1[23], 8); |
1017 step2[24] = step1[24] + step1[25]; | 1025 step2[24] = WRAPLOW(step1[24] + step1[25], 8); |
1018 step2[25] = step1[24] - step1[25]; | 1026 step2[25] = WRAPLOW(step1[24] - step1[25], 8); |
1019 step2[26] = -step1[26] + step1[27]; | 1027 step2[26] = WRAPLOW(-step1[26] + step1[27], 8); |
1020 step2[27] = step1[26] + step1[27]; | 1028 step2[27] = WRAPLOW(step1[26] + step1[27], 8); |
1021 step2[28] = step1[28] + step1[29]; | 1029 step2[28] = WRAPLOW(step1[28] + step1[29], 8); |
1022 step2[29] = step1[28] - step1[29]; | 1030 step2[29] = WRAPLOW(step1[28] - step1[29], 8); |
1023 step2[30] = -step1[30] + step1[31]; | 1031 step2[30] = WRAPLOW(-step1[30] + step1[31], 8); |
1024 step2[31] = step1[30] + step1[31]; | 1032 step2[31] = WRAPLOW(step1[30] + step1[31], 8); |
1025 | 1033 |
1026 // stage 3 | 1034 // stage 3 |
1027 step1[0] = step2[0]; | 1035 step1[0] = step2[0]; |
1028 step1[1] = step2[1]; | 1036 step1[1] = step2[1]; |
1029 step1[2] = step2[2]; | 1037 step1[2] = step2[2]; |
1030 step1[3] = step2[3]; | 1038 step1[3] = step2[3]; |
1031 | 1039 |
1032 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 1040 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
1033 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 1041 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
1034 step1[4] = dct_const_round_shift(temp1); | 1042 step1[4] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1035 step1[7] = dct_const_round_shift(temp2); | 1043 step1[7] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1036 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 1044 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
1037 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 1045 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
1038 step1[5] = dct_const_round_shift(temp1); | 1046 step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1039 step1[6] = dct_const_round_shift(temp2); | 1047 step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1040 | 1048 |
1041 step1[8] = step2[8] + step2[9]; | 1049 step1[8] = WRAPLOW(step2[8] + step2[9], 8); |
1042 step1[9] = step2[8] - step2[9]; | 1050 step1[9] = WRAPLOW(step2[8] - step2[9], 8); |
1043 step1[10] = -step2[10] + step2[11]; | 1051 step1[10] = WRAPLOW(-step2[10] + step2[11], 8); |
1044 step1[11] = step2[10] + step2[11]; | 1052 step1[11] = WRAPLOW(step2[10] + step2[11], 8); |
1045 step1[12] = step2[12] + step2[13]; | 1053 step1[12] = WRAPLOW(step2[12] + step2[13], 8); |
1046 step1[13] = step2[12] - step2[13]; | 1054 step1[13] = WRAPLOW(step2[12] - step2[13], 8); |
1047 step1[14] = -step2[14] + step2[15]; | 1055 step1[14] = WRAPLOW(-step2[14] + step2[15], 8); |
1048 step1[15] = step2[14] + step2[15]; | 1056 step1[15] = WRAPLOW(step2[14] + step2[15], 8); |
1049 | 1057 |
1050 step1[16] = step2[16]; | 1058 step1[16] = step2[16]; |
1051 step1[31] = step2[31]; | 1059 step1[31] = step2[31]; |
1052 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; | 1060 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; |
1053 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; | 1061 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; |
1054 step1[17] = dct_const_round_shift(temp1); | 1062 step1[17] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1055 step1[30] = dct_const_round_shift(temp2); | 1063 step1[30] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1056 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; | 1064 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; |
1057 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; | 1065 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; |
1058 step1[18] = dct_const_round_shift(temp1); | 1066 step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1059 step1[29] = dct_const_round_shift(temp2); | 1067 step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1060 step1[19] = step2[19]; | 1068 step1[19] = step2[19]; |
1061 step1[20] = step2[20]; | 1069 step1[20] = step2[20]; |
1062 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; | 1070 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; |
1063 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; | 1071 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; |
1064 step1[21] = dct_const_round_shift(temp1); | 1072 step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1065 step1[26] = dct_const_round_shift(temp2); | 1073 step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1066 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; | 1074 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; |
1067 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; | 1075 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; |
1068 step1[22] = dct_const_round_shift(temp1); | 1076 step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1069 step1[25] = dct_const_round_shift(temp2); | 1077 step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1070 step1[23] = step2[23]; | 1078 step1[23] = step2[23]; |
1071 step1[24] = step2[24]; | 1079 step1[24] = step2[24]; |
1072 step1[27] = step2[27]; | 1080 step1[27] = step2[27]; |
1073 step1[28] = step2[28]; | 1081 step1[28] = step2[28]; |
1074 | 1082 |
1075 // stage 4 | 1083 // stage 4 |
1076 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 1084 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
1077 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 1085 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
1078 step2[0] = dct_const_round_shift(temp1); | 1086 step2[0] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1079 step2[1] = dct_const_round_shift(temp2); | 1087 step2[1] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1080 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 1088 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
1081 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 1089 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
1082 step2[2] = dct_const_round_shift(temp1); | 1090 step2[2] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1083 step2[3] = dct_const_round_shift(temp2); | 1091 step2[3] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1084 step2[4] = step1[4] + step1[5]; | 1092 step2[4] = WRAPLOW(step1[4] + step1[5], 8); |
1085 step2[5] = step1[4] - step1[5]; | 1093 step2[5] = WRAPLOW(step1[4] - step1[5], 8); |
1086 step2[6] = -step1[6] + step1[7]; | 1094 step2[6] = WRAPLOW(-step1[6] + step1[7], 8); |
1087 step2[7] = step1[6] + step1[7]; | 1095 step2[7] = WRAPLOW(step1[6] + step1[7], 8); |
1088 | 1096 |
1089 step2[8] = step1[8]; | 1097 step2[8] = step1[8]; |
1090 step2[15] = step1[15]; | 1098 step2[15] = step1[15]; |
1091 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 1099 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
1092 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 1100 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
1093 step2[9] = dct_const_round_shift(temp1); | 1101 step2[9] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1094 step2[14] = dct_const_round_shift(temp2); | 1102 step2[14] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1095 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 1103 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
1096 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 1104 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
1097 step2[10] = dct_const_round_shift(temp1); | 1105 step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1098 step2[13] = dct_const_round_shift(temp2); | 1106 step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1099 step2[11] = step1[11]; | 1107 step2[11] = step1[11]; |
1100 step2[12] = step1[12]; | 1108 step2[12] = step1[12]; |
1101 | 1109 |
1102 step2[16] = step1[16] + step1[19]; | 1110 step2[16] = WRAPLOW(step1[16] + step1[19], 8); |
1103 step2[17] = step1[17] + step1[18]; | 1111 step2[17] = WRAPLOW(step1[17] + step1[18], 8); |
1104 step2[18] = step1[17] - step1[18]; | 1112 step2[18] = WRAPLOW(step1[17] - step1[18], 8); |
1105 step2[19] = step1[16] - step1[19]; | 1113 step2[19] = WRAPLOW(step1[16] - step1[19], 8); |
1106 step2[20] = -step1[20] + step1[23]; | 1114 step2[20] = WRAPLOW(-step1[20] + step1[23], 8); |
1107 step2[21] = -step1[21] + step1[22]; | 1115 step2[21] = WRAPLOW(-step1[21] + step1[22], 8); |
1108 step2[22] = step1[21] + step1[22]; | 1116 step2[22] = WRAPLOW(step1[21] + step1[22], 8); |
1109 step2[23] = step1[20] + step1[23]; | 1117 step2[23] = WRAPLOW(step1[20] + step1[23], 8); |
1110 | 1118 |
1111 step2[24] = step1[24] + step1[27]; | 1119 step2[24] = WRAPLOW(step1[24] + step1[27], 8); |
1112 step2[25] = step1[25] + step1[26]; | 1120 step2[25] = WRAPLOW(step1[25] + step1[26], 8); |
1113 step2[26] = step1[25] - step1[26]; | 1121 step2[26] = WRAPLOW(step1[25] - step1[26], 8); |
1114 step2[27] = step1[24] - step1[27]; | 1122 step2[27] = WRAPLOW(step1[24] - step1[27], 8); |
1115 step2[28] = -step1[28] + step1[31]; | 1123 step2[28] = WRAPLOW(-step1[28] + step1[31], 8); |
1116 step2[29] = -step1[29] + step1[30]; | 1124 step2[29] = WRAPLOW(-step1[29] + step1[30], 8); |
1117 step2[30] = step1[29] + step1[30]; | 1125 step2[30] = WRAPLOW(step1[29] + step1[30], 8); |
1118 step2[31] = step1[28] + step1[31]; | 1126 step2[31] = WRAPLOW(step1[28] + step1[31], 8); |
1119 | 1127 |
1120 // stage 5 | 1128 // stage 5 |
1121 step1[0] = step2[0] + step2[3]; | 1129 step1[0] = WRAPLOW(step2[0] + step2[3], 8); |
1122 step1[1] = step2[1] + step2[2]; | 1130 step1[1] = WRAPLOW(step2[1] + step2[2], 8); |
1123 step1[2] = step2[1] - step2[2]; | 1131 step1[2] = WRAPLOW(step2[1] - step2[2], 8); |
1124 step1[3] = step2[0] - step2[3]; | 1132 step1[3] = WRAPLOW(step2[0] - step2[3], 8); |
1125 step1[4] = step2[4]; | 1133 step1[4] = step2[4]; |
1126 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1134 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
1127 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1135 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
1128 step1[5] = dct_const_round_shift(temp1); | 1136 step1[5] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1129 step1[6] = dct_const_round_shift(temp2); | 1137 step1[6] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1130 step1[7] = step2[7]; | 1138 step1[7] = step2[7]; |
1131 | 1139 |
1132 step1[8] = step2[8] + step2[11]; | 1140 step1[8] = WRAPLOW(step2[8] + step2[11], 8); |
1133 step1[9] = step2[9] + step2[10]; | 1141 step1[9] = WRAPLOW(step2[9] + step2[10], 8); |
1134 step1[10] = step2[9] - step2[10]; | 1142 step1[10] = WRAPLOW(step2[9] - step2[10], 8); |
1135 step1[11] = step2[8] - step2[11]; | 1143 step1[11] = WRAPLOW(step2[8] - step2[11], 8); |
1136 step1[12] = -step2[12] + step2[15]; | 1144 step1[12] = WRAPLOW(-step2[12] + step2[15], 8); |
1137 step1[13] = -step2[13] + step2[14]; | 1145 step1[13] = WRAPLOW(-step2[13] + step2[14], 8); |
1138 step1[14] = step2[13] + step2[14]; | 1146 step1[14] = WRAPLOW(step2[13] + step2[14], 8); |
1139 step1[15] = step2[12] + step2[15]; | 1147 step1[15] = WRAPLOW(step2[12] + step2[15], 8); |
1140 | 1148 |
1141 step1[16] = step2[16]; | 1149 step1[16] = step2[16]; |
1142 step1[17] = step2[17]; | 1150 step1[17] = step2[17]; |
1143 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; | 1151 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; |
1144 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; | 1152 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; |
1145 step1[18] = dct_const_round_shift(temp1); | 1153 step1[18] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1146 step1[29] = dct_const_round_shift(temp2); | 1154 step1[29] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1147 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; | 1155 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; |
1148 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; | 1156 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; |
1149 step1[19] = dct_const_round_shift(temp1); | 1157 step1[19] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1150 step1[28] = dct_const_round_shift(temp2); | 1158 step1[28] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1151 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; | 1159 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; |
1152 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; | 1160 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; |
1153 step1[20] = dct_const_round_shift(temp1); | 1161 step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1154 step1[27] = dct_const_round_shift(temp2); | 1162 step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1155 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; | 1163 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; |
1156 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; | 1164 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; |
1157 step1[21] = dct_const_round_shift(temp1); | 1165 step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1158 step1[26] = dct_const_round_shift(temp2); | 1166 step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1159 step1[22] = step2[22]; | 1167 step1[22] = step2[22]; |
1160 step1[23] = step2[23]; | 1168 step1[23] = step2[23]; |
1161 step1[24] = step2[24]; | 1169 step1[24] = step2[24]; |
1162 step1[25] = step2[25]; | 1170 step1[25] = step2[25]; |
1163 step1[30] = step2[30]; | 1171 step1[30] = step2[30]; |
1164 step1[31] = step2[31]; | 1172 step1[31] = step2[31]; |
1165 | 1173 |
1166 // stage 6 | 1174 // stage 6 |
1167 step2[0] = step1[0] + step1[7]; | 1175 step2[0] = WRAPLOW(step1[0] + step1[7], 8); |
1168 step2[1] = step1[1] + step1[6]; | 1176 step2[1] = WRAPLOW(step1[1] + step1[6], 8); |
1169 step2[2] = step1[2] + step1[5]; | 1177 step2[2] = WRAPLOW(step1[2] + step1[5], 8); |
1170 step2[3] = step1[3] + step1[4]; | 1178 step2[3] = WRAPLOW(step1[3] + step1[4], 8); |
1171 step2[4] = step1[3] - step1[4]; | 1179 step2[4] = WRAPLOW(step1[3] - step1[4], 8); |
1172 step2[5] = step1[2] - step1[5]; | 1180 step2[5] = WRAPLOW(step1[2] - step1[5], 8); |
1173 step2[6] = step1[1] - step1[6]; | 1181 step2[6] = WRAPLOW(step1[1] - step1[6], 8); |
1174 step2[7] = step1[0] - step1[7]; | 1182 step2[7] = WRAPLOW(step1[0] - step1[7], 8); |
1175 step2[8] = step1[8]; | 1183 step2[8] = step1[8]; |
1176 step2[9] = step1[9]; | 1184 step2[9] = step1[9]; |
1177 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 1185 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
1178 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 1186 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
1179 step2[10] = dct_const_round_shift(temp1); | 1187 step2[10] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1180 step2[13] = dct_const_round_shift(temp2); | 1188 step2[13] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1181 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 1189 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
1182 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 1190 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
1183 step2[11] = dct_const_round_shift(temp1); | 1191 step2[11] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1184 step2[12] = dct_const_round_shift(temp2); | 1192 step2[12] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1185 step2[14] = step1[14]; | 1193 step2[14] = step1[14]; |
1186 step2[15] = step1[15]; | 1194 step2[15] = step1[15]; |
1187 | 1195 |
1188 step2[16] = step1[16] + step1[23]; | 1196 step2[16] = WRAPLOW(step1[16] + step1[23], 8); |
1189 step2[17] = step1[17] + step1[22]; | 1197 step2[17] = WRAPLOW(step1[17] + step1[22], 8); |
1190 step2[18] = step1[18] + step1[21]; | 1198 step2[18] = WRAPLOW(step1[18] + step1[21], 8); |
1191 step2[19] = step1[19] + step1[20]; | 1199 step2[19] = WRAPLOW(step1[19] + step1[20], 8); |
1192 step2[20] = step1[19] - step1[20]; | 1200 step2[20] = WRAPLOW(step1[19] - step1[20], 8); |
1193 step2[21] = step1[18] - step1[21]; | 1201 step2[21] = WRAPLOW(step1[18] - step1[21], 8); |
1194 step2[22] = step1[17] - step1[22]; | 1202 step2[22] = WRAPLOW(step1[17] - step1[22], 8); |
1195 step2[23] = step1[16] - step1[23]; | 1203 step2[23] = WRAPLOW(step1[16] - step1[23], 8); |
1196 | 1204 |
1197 step2[24] = -step1[24] + step1[31]; | 1205 step2[24] = WRAPLOW(-step1[24] + step1[31], 8); |
1198 step2[25] = -step1[25] + step1[30]; | 1206 step2[25] = WRAPLOW(-step1[25] + step1[30], 8); |
1199 step2[26] = -step1[26] + step1[29]; | 1207 step2[26] = WRAPLOW(-step1[26] + step1[29], 8); |
1200 step2[27] = -step1[27] + step1[28]; | 1208 step2[27] = WRAPLOW(-step1[27] + step1[28], 8); |
1201 step2[28] = step1[27] + step1[28]; | 1209 step2[28] = WRAPLOW(step1[27] + step1[28], 8); |
1202 step2[29] = step1[26] + step1[29]; | 1210 step2[29] = WRAPLOW(step1[26] + step1[29], 8); |
1203 step2[30] = step1[25] + step1[30]; | 1211 step2[30] = WRAPLOW(step1[25] + step1[30], 8); |
1204 step2[31] = step1[24] + step1[31]; | 1212 step2[31] = WRAPLOW(step1[24] + step1[31], 8); |
1205 | 1213 |
1206 // stage 7 | 1214 // stage 7 |
1207 step1[0] = step2[0] + step2[15]; | 1215 step1[0] = WRAPLOW(step2[0] + step2[15], 8); |
1208 step1[1] = step2[1] + step2[14]; | 1216 step1[1] = WRAPLOW(step2[1] + step2[14], 8); |
1209 step1[2] = step2[2] + step2[13]; | 1217 step1[2] = WRAPLOW(step2[2] + step2[13], 8); |
1210 step1[3] = step2[3] + step2[12]; | 1218 step1[3] = WRAPLOW(step2[3] + step2[12], 8); |
1211 step1[4] = step2[4] + step2[11]; | 1219 step1[4] = WRAPLOW(step2[4] + step2[11], 8); |
1212 step1[5] = step2[5] + step2[10]; | 1220 step1[5] = WRAPLOW(step2[5] + step2[10], 8); |
1213 step1[6] = step2[6] + step2[9]; | 1221 step1[6] = WRAPLOW(step2[6] + step2[9], 8); |
1214 step1[7] = step2[7] + step2[8]; | 1222 step1[7] = WRAPLOW(step2[7] + step2[8], 8); |
1215 step1[8] = step2[7] - step2[8]; | 1223 step1[8] = WRAPLOW(step2[7] - step2[8], 8); |
1216 step1[9] = step2[6] - step2[9]; | 1224 step1[9] = WRAPLOW(step2[6] - step2[9], 8); |
1217 step1[10] = step2[5] - step2[10]; | 1225 step1[10] = WRAPLOW(step2[5] - step2[10], 8); |
1218 step1[11] = step2[4] - step2[11]; | 1226 step1[11] = WRAPLOW(step2[4] - step2[11], 8); |
1219 step1[12] = step2[3] - step2[12]; | 1227 step1[12] = WRAPLOW(step2[3] - step2[12], 8); |
1220 step1[13] = step2[2] - step2[13]; | 1228 step1[13] = WRAPLOW(step2[2] - step2[13], 8); |
1221 step1[14] = step2[1] - step2[14]; | 1229 step1[14] = WRAPLOW(step2[1] - step2[14], 8); |
1222 step1[15] = step2[0] - step2[15]; | 1230 step1[15] = WRAPLOW(step2[0] - step2[15], 8); |
1223 | 1231 |
1224 step1[16] = step2[16]; | 1232 step1[16] = step2[16]; |
1225 step1[17] = step2[17]; | 1233 step1[17] = step2[17]; |
1226 step1[18] = step2[18]; | 1234 step1[18] = step2[18]; |
1227 step1[19] = step2[19]; | 1235 step1[19] = step2[19]; |
1228 temp1 = (-step2[20] + step2[27]) * cospi_16_64; | 1236 temp1 = (-step2[20] + step2[27]) * cospi_16_64; |
1229 temp2 = (step2[20] + step2[27]) * cospi_16_64; | 1237 temp2 = (step2[20] + step2[27]) * cospi_16_64; |
1230 step1[20] = dct_const_round_shift(temp1); | 1238 step1[20] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1231 step1[27] = dct_const_round_shift(temp2); | 1239 step1[27] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1232 temp1 = (-step2[21] + step2[26]) * cospi_16_64; | 1240 temp1 = (-step2[21] + step2[26]) * cospi_16_64; |
1233 temp2 = (step2[21] + step2[26]) * cospi_16_64; | 1241 temp2 = (step2[21] + step2[26]) * cospi_16_64; |
1234 step1[21] = dct_const_round_shift(temp1); | 1242 step1[21] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1235 step1[26] = dct_const_round_shift(temp2); | 1243 step1[26] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1236 temp1 = (-step2[22] + step2[25]) * cospi_16_64; | 1244 temp1 = (-step2[22] + step2[25]) * cospi_16_64; |
1237 temp2 = (step2[22] + step2[25]) * cospi_16_64; | 1245 temp2 = (step2[22] + step2[25]) * cospi_16_64; |
1238 step1[22] = dct_const_round_shift(temp1); | 1246 step1[22] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1239 step1[25] = dct_const_round_shift(temp2); | 1247 step1[25] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1240 temp1 = (-step2[23] + step2[24]) * cospi_16_64; | 1248 temp1 = (-step2[23] + step2[24]) * cospi_16_64; |
1241 temp2 = (step2[23] + step2[24]) * cospi_16_64; | 1249 temp2 = (step2[23] + step2[24]) * cospi_16_64; |
1242 step1[23] = dct_const_round_shift(temp1); | 1250 step1[23] = WRAPLOW(dct_const_round_shift(temp1), 8); |
1243 step1[24] = dct_const_round_shift(temp2); | 1251 step1[24] = WRAPLOW(dct_const_round_shift(temp2), 8); |
1244 step1[28] = step2[28]; | 1252 step1[28] = step2[28]; |
1245 step1[29] = step2[29]; | 1253 step1[29] = step2[29]; |
1246 step1[30] = step2[30]; | 1254 step1[30] = step2[30]; |
1247 step1[31] = step2[31]; | 1255 step1[31] = step2[31]; |
1248 | 1256 |
1249 // final stage | 1257 // final stage |
1250 output[0] = step1[0] + step1[31]; | 1258 output[0] = WRAPLOW(step1[0] + step1[31], 8); |
1251 output[1] = step1[1] + step1[30]; | 1259 output[1] = WRAPLOW(step1[1] + step1[30], 8); |
1252 output[2] = step1[2] + step1[29]; | 1260 output[2] = WRAPLOW(step1[2] + step1[29], 8); |
1253 output[3] = step1[3] + step1[28]; | 1261 output[3] = WRAPLOW(step1[3] + step1[28], 8); |
1254 output[4] = step1[4] + step1[27]; | 1262 output[4] = WRAPLOW(step1[4] + step1[27], 8); |
1255 output[5] = step1[5] + step1[26]; | 1263 output[5] = WRAPLOW(step1[5] + step1[26], 8); |
1256 output[6] = step1[6] + step1[25]; | 1264 output[6] = WRAPLOW(step1[6] + step1[25], 8); |
1257 output[7] = step1[7] + step1[24]; | 1265 output[7] = WRAPLOW(step1[7] + step1[24], 8); |
1258 output[8] = step1[8] + step1[23]; | 1266 output[8] = WRAPLOW(step1[8] + step1[23], 8); |
1259 output[9] = step1[9] + step1[22]; | 1267 output[9] = WRAPLOW(step1[9] + step1[22], 8); |
1260 output[10] = step1[10] + step1[21]; | 1268 output[10] = WRAPLOW(step1[10] + step1[21], 8); |
1261 output[11] = step1[11] + step1[20]; | 1269 output[11] = WRAPLOW(step1[11] + step1[20], 8); |
1262 output[12] = step1[12] + step1[19]; | 1270 output[12] = WRAPLOW(step1[12] + step1[19], 8); |
1263 output[13] = step1[13] + step1[18]; | 1271 output[13] = WRAPLOW(step1[13] + step1[18], 8); |
1264 output[14] = step1[14] + step1[17]; | 1272 output[14] = WRAPLOW(step1[14] + step1[17], 8); |
1265 output[15] = step1[15] + step1[16]; | 1273 output[15] = WRAPLOW(step1[15] + step1[16], 8); |
1266 output[16] = step1[15] - step1[16]; | 1274 output[16] = WRAPLOW(step1[15] - step1[16], 8); |
1267 output[17] = step1[14] - step1[17]; | 1275 output[17] = WRAPLOW(step1[14] - step1[17], 8); |
1268 output[18] = step1[13] - step1[18]; | 1276 output[18] = WRAPLOW(step1[13] - step1[18], 8); |
1269 output[19] = step1[12] - step1[19]; | 1277 output[19] = WRAPLOW(step1[12] - step1[19], 8); |
1270 output[20] = step1[11] - step1[20]; | 1278 output[20] = WRAPLOW(step1[11] - step1[20], 8); |
1271 output[21] = step1[10] - step1[21]; | 1279 output[21] = WRAPLOW(step1[10] - step1[21], 8); |
1272 output[22] = step1[9] - step1[22]; | 1280 output[22] = WRAPLOW(step1[9] - step1[22], 8); |
1273 output[23] = step1[8] - step1[23]; | 1281 output[23] = WRAPLOW(step1[8] - step1[23], 8); |
1274 output[24] = step1[7] - step1[24]; | 1282 output[24] = WRAPLOW(step1[7] - step1[24], 8); |
1275 output[25] = step1[6] - step1[25]; | 1283 output[25] = WRAPLOW(step1[6] - step1[25], 8); |
1276 output[26] = step1[5] - step1[26]; | 1284 output[26] = WRAPLOW(step1[5] - step1[26], 8); |
1277 output[27] = step1[4] - step1[27]; | 1285 output[27] = WRAPLOW(step1[4] - step1[27], 8); |
1278 output[28] = step1[3] - step1[28]; | 1286 output[28] = WRAPLOW(step1[3] - step1[28], 8); |
1279 output[29] = step1[2] - step1[29]; | 1287 output[29] = WRAPLOW(step1[2] - step1[29], 8); |
1280 output[30] = step1[1] - step1[30]; | 1288 output[30] = WRAPLOW(step1[1] - step1[30], 8); |
1281 output[31] = step1[0] - step1[31]; | 1289 output[31] = WRAPLOW(step1[0] - step1[31], 8); |
1282 } | 1290 } |
1283 | 1291 |
1284 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, | 1292 void vp9_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, |
1285 int stride) { | 1293 int stride) { |
1286 tran_low_t out[32 * 32]; | 1294 tran_low_t out[32 * 32]; |
1287 tran_low_t *outptr = out; | 1295 tran_low_t *outptr = out; |
1288 int i, j; | 1296 int i, j; |
1289 tran_low_t temp_in[32], temp_out[32]; | 1297 tran_low_t temp_in[32], temp_out[32]; |
1290 | 1298 |
1291 // Rows | 1299 // Rows |
(...skipping 14 matching lines...) Expand all Loading... |
1306 vpx_memset(outptr, 0, sizeof(tran_low_t) * 32); | 1314 vpx_memset(outptr, 0, sizeof(tran_low_t) * 32); |
1307 input += 32; | 1315 input += 32; |
1308 outptr += 32; | 1316 outptr += 32; |
1309 } | 1317 } |
1310 | 1318 |
1311 // Columns | 1319 // Columns |
1312 for (i = 0; i < 32; ++i) { | 1320 for (i = 0; i < 32; ++i) { |
1313 for (j = 0; j < 32; ++j) | 1321 for (j = 0; j < 32; ++j) |
1314 temp_in[j] = out[j * 32 + i]; | 1322 temp_in[j] = out[j * 32 + i]; |
1315 idct32(temp_in, temp_out); | 1323 idct32(temp_in, temp_out); |
1316 for (j = 0; j < 32; ++j) | 1324 for (j = 0; j < 32; ++j) { |
1317 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 1325 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1318 + dest[j * stride + i]); | 1326 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 1327 } |
1319 } | 1328 } |
1320 } | 1329 } |
1321 | 1330 |
1322 void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, | 1331 void vp9_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, |
1323 int stride) { | 1332 int stride) { |
1324 tran_low_t out[32 * 32] = {0}; | 1333 tran_low_t out[32 * 32] = {0}; |
1325 tran_low_t *outptr = out; | 1334 tran_low_t *outptr = out; |
1326 int i, j; | 1335 int i, j; |
1327 tran_low_t temp_in[32], temp_out[32]; | 1336 tran_low_t temp_in[32], temp_out[32]; |
1328 | 1337 |
1329 // Rows | 1338 // Rows |
1330 // only upper-left 8x8 has non-zero coeff | 1339 // only upper-left 8x8 has non-zero coeff |
1331 for (i = 0; i < 8; ++i) { | 1340 for (i = 0; i < 8; ++i) { |
1332 idct32(input, outptr); | 1341 idct32(input, outptr); |
1333 input += 32; | 1342 input += 32; |
1334 outptr += 32; | 1343 outptr += 32; |
1335 } | 1344 } |
1336 | 1345 |
1337 // Columns | 1346 // Columns |
1338 for (i = 0; i < 32; ++i) { | 1347 for (i = 0; i < 32; ++i) { |
1339 for (j = 0; j < 32; ++j) | 1348 for (j = 0; j < 32; ++j) |
1340 temp_in[j] = out[j * 32 + i]; | 1349 temp_in[j] = out[j * 32 + i]; |
1341 idct32(temp_in, temp_out); | 1350 idct32(temp_in, temp_out); |
1342 for (j = 0; j < 32; ++j) | 1351 for (j = 0; j < 32; ++j) { |
1343 dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) | 1352 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], |
1344 + dest[j * stride + i]); | 1353 ROUND_POWER_OF_TWO(temp_out[j], 6)); |
| 1354 } |
1345 } | 1355 } |
1346 } | 1356 } |
1347 | 1357 |
1348 void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { | 1358 void vp9_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { |
1349 int i, j; | 1359 int i, j; |
1350 tran_high_t a1; | 1360 tran_high_t a1; |
1351 | 1361 |
1352 tran_low_t out = dct_const_round_shift(input[0] * cospi_16_64); | 1362 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), 8); |
1353 out = dct_const_round_shift(out * cospi_16_64); | 1363 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), 8); |
1354 a1 = ROUND_POWER_OF_TWO(out, 6); | 1364 a1 = ROUND_POWER_OF_TWO(out, 6); |
1355 | 1365 |
1356 for (j = 0; j < 32; ++j) { | 1366 for (j = 0; j < 32; ++j) { |
1357 for (i = 0; i < 32; ++i) | 1367 for (i = 0; i < 32; ++i) |
1358 dest[i] = clip_pixel(dest[i] + a1); | 1368 dest[i] = clip_pixel_add(dest[i], a1); |
1359 dest += stride; | 1369 dest += stride; |
1360 } | 1370 } |
1361 } | 1371 } |
1362 | 1372 |
1363 // idct | 1373 // idct |
1364 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, | 1374 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
1365 int eob) { | 1375 int eob) { |
1366 if (eob > 1) | 1376 if (eob > 1) |
1367 vp9_idct4x4_16_add(input, dest, stride); | 1377 vp9_idct4x4_16_add(input, dest, stride); |
1368 else | 1378 else |
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1441 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, | 1451 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, |
1442 int stride, int eob) { | 1452 int stride, int eob) { |
1443 if (tx_type == DCT_DCT) { | 1453 if (tx_type == DCT_DCT) { |
1444 vp9_idct16x16_add(input, dest, stride, eob); | 1454 vp9_idct16x16_add(input, dest, stride, eob); |
1445 } else { | 1455 } else { |
1446 vp9_iht16x16_256_add(input, dest, stride, tx_type); | 1456 vp9_iht16x16_256_add(input, dest, stride, tx_type); |
1447 } | 1457 } |
1448 } | 1458 } |
1449 | 1459 |
1450 #if CONFIG_VP9_HIGHBITDEPTH | 1460 #if CONFIG_VP9_HIGHBITDEPTH |
1451 void vp9_high_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1461 void vp9_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1452 int stride, int bd) { | 1462 int stride, int bd) { |
1453 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, | 1463 /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
1454 0.5 shifts per pixel. */ | 1464 0.5 shifts per pixel. */ |
1455 int i; | 1465 int i; |
1456 tran_low_t output[16]; | 1466 tran_low_t output[16]; |
1457 tran_high_t a1, b1, c1, d1, e1; | 1467 tran_high_t a1, b1, c1, d1, e1; |
1458 const tran_low_t *ip = input; | 1468 const tran_low_t *ip = input; |
1459 tran_low_t *op = output; | 1469 tran_low_t *op = output; |
1460 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1470 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1461 | 1471 |
1462 for (i = 0; i < 4; i++) { | 1472 for (i = 0; i < 4; i++) { |
1463 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 1473 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
1464 c1 = ip[1] >> UNIT_QUANT_SHIFT; | 1474 c1 = ip[1] >> UNIT_QUANT_SHIFT; |
1465 d1 = ip[2] >> UNIT_QUANT_SHIFT; | 1475 d1 = ip[2] >> UNIT_QUANT_SHIFT; |
1466 b1 = ip[3] >> UNIT_QUANT_SHIFT; | 1476 b1 = ip[3] >> UNIT_QUANT_SHIFT; |
1467 a1 += c1; | 1477 a1 += c1; |
1468 d1 -= b1; | 1478 d1 -= b1; |
1469 e1 = (a1 - d1) >> 1; | 1479 e1 = (a1 - d1) >> 1; |
1470 b1 = e1 - b1; | 1480 b1 = e1 - b1; |
1471 c1 = e1 - c1; | 1481 c1 = e1 - c1; |
1472 a1 -= b1; | 1482 a1 -= b1; |
1473 d1 += c1; | 1483 d1 += c1; |
1474 op[0] = WRAPLOW(a1); | 1484 op[0] = WRAPLOW(a1, bd); |
1475 op[1] = WRAPLOW(b1); | 1485 op[1] = WRAPLOW(b1, bd); |
1476 op[2] = WRAPLOW(c1); | 1486 op[2] = WRAPLOW(c1, bd); |
1477 op[3] = WRAPLOW(d1); | 1487 op[3] = WRAPLOW(d1, bd); |
1478 ip += 4; | 1488 ip += 4; |
1479 op += 4; | 1489 op += 4; |
1480 } | 1490 } |
1481 | 1491 |
1482 ip = output; | 1492 ip = output; |
1483 for (i = 0; i < 4; i++) { | 1493 for (i = 0; i < 4; i++) { |
1484 a1 = ip[4 * 0]; | 1494 a1 = ip[4 * 0]; |
1485 c1 = ip[4 * 1]; | 1495 c1 = ip[4 * 1]; |
1486 d1 = ip[4 * 2]; | 1496 d1 = ip[4 * 2]; |
1487 b1 = ip[4 * 3]; | 1497 b1 = ip[4 * 3]; |
1488 a1 += c1; | 1498 a1 += c1; |
1489 d1 -= b1; | 1499 d1 -= b1; |
1490 e1 = (a1 - d1) >> 1; | 1500 e1 = (a1 - d1) >> 1; |
1491 b1 = e1 - b1; | 1501 b1 = e1 - b1; |
1492 c1 = e1 - c1; | 1502 c1 = e1 - c1; |
1493 a1 -= b1; | 1503 a1 -= b1; |
1494 d1 += c1; | 1504 d1 += c1; |
1495 dest[stride * 0] = clip_pixel_bd_high(dest[stride * 0], a1, bd); | 1505 dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); |
1496 dest[stride * 1] = clip_pixel_bd_high(dest[stride * 1], b1, bd); | 1506 dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], b1, bd); |
1497 dest[stride * 2] = clip_pixel_bd_high(dest[stride * 2], c1, bd); | 1507 dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], c1, bd); |
1498 dest[stride * 3] = clip_pixel_bd_high(dest[stride * 3], d1, bd); | 1508 dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], d1, bd); |
1499 | 1509 |
1500 ip++; | 1510 ip++; |
1501 dest++; | 1511 dest++; |
1502 } | 1512 } |
1503 } | 1513 } |
1504 | 1514 |
1505 static void high_idct4(const tran_low_t *input, tran_low_t *output, int bd) { | 1515 void vp9_highbd_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, |
1506 tran_low_t step[4]; | 1516 int dest_stride, int bd) { |
1507 tran_high_t temp1, temp2; | |
1508 (void) bd; | |
1509 // stage 1 | |
1510 temp1 = (input[0] + input[2]) * cospi_16_64; | |
1511 temp2 = (input[0] - input[2]) * cospi_16_64; | |
1512 step[0] = WRAPLOW(dct_const_round_shift(temp1)); | |
1513 step[1] = WRAPLOW(dct_const_round_shift(temp2)); | |
1514 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | |
1515 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | |
1516 step[2] = WRAPLOW(dct_const_round_shift(temp1)); | |
1517 step[3] = WRAPLOW(dct_const_round_shift(temp2)); | |
1518 | |
1519 // stage 2 | |
1520 output[0] = WRAPLOW(step[0] + step[3]); | |
1521 output[1] = WRAPLOW(step[1] + step[2]); | |
1522 output[2] = WRAPLOW(step[1] - step[2]); | |
1523 output[3] = WRAPLOW(step[0] - step[3]); | |
1524 } | |
1525 | |
1526 void vp9_high_iwht4x4_1_add_c(const tran_low_t *in, uint8_t *dest8, | |
1527 int dest_stride, int bd) { | |
1528 int i; | 1517 int i; |
1529 tran_high_t a1, e1; | 1518 tran_high_t a1, e1; |
1530 tran_low_t tmp[4]; | 1519 tran_low_t tmp[4]; |
1531 const tran_low_t *ip = in; | 1520 const tran_low_t *ip = in; |
1532 tran_low_t *op = tmp; | 1521 tran_low_t *op = tmp; |
1533 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1522 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1534 (void) bd; | 1523 (void) bd; |
1535 | 1524 |
1536 a1 = ip[0] >> UNIT_QUANT_SHIFT; | 1525 a1 = ip[0] >> UNIT_QUANT_SHIFT; |
1537 e1 = a1 >> 1; | 1526 e1 = a1 >> 1; |
1538 a1 -= e1; | 1527 a1 -= e1; |
1539 op[0] = WRAPLOW(a1); | 1528 op[0] = WRAPLOW(a1, bd); |
1540 op[1] = op[2] = op[3] = WRAPLOW(e1); | 1529 op[1] = op[2] = op[3] = WRAPLOW(e1, bd); |
1541 | 1530 |
1542 ip = tmp; | 1531 ip = tmp; |
1543 for (i = 0; i < 4; i++) { | 1532 for (i = 0; i < 4; i++) { |
1544 e1 = ip[0] >> 1; | 1533 e1 = ip[0] >> 1; |
1545 a1 = ip[0] - e1; | 1534 a1 = ip[0] - e1; |
1546 dest[dest_stride * 0] = clip_pixel_bd_high(dest[dest_stride * 0], a1, bd); | 1535 dest[dest_stride * 0] = highbd_clip_pixel_add( |
1547 dest[dest_stride * 1] = clip_pixel_bd_high(dest[dest_stride * 1], e1, bd); | 1536 dest[dest_stride * 0], a1, bd); |
1548 dest[dest_stride * 2] = clip_pixel_bd_high(dest[dest_stride * 2], e1, bd); | 1537 dest[dest_stride * 1] = highbd_clip_pixel_add( |
1549 dest[dest_stride * 3] = clip_pixel_bd_high(dest[dest_stride * 3], e1, bd); | 1538 dest[dest_stride * 1], e1, bd); |
| 1539 dest[dest_stride * 2] = highbd_clip_pixel_add( |
| 1540 dest[dest_stride * 2], e1, bd); |
| 1541 dest[dest_stride * 3] = highbd_clip_pixel_add( |
| 1542 dest[dest_stride * 3], e1, bd); |
1550 ip++; | 1543 ip++; |
1551 dest++; | 1544 dest++; |
1552 } | 1545 } |
1553 } | 1546 } |
1554 | 1547 |
1555 void vp9_high_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1548 static void highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { |
1556 int stride, int bd) { | 1549 tran_low_t step[4]; |
| 1550 tran_high_t temp1, temp2; |
| 1551 (void) bd; |
| 1552 // stage 1 |
| 1553 temp1 = (input[0] + input[2]) * cospi_16_64; |
| 1554 temp2 = (input[0] - input[2]) * cospi_16_64; |
| 1555 step[0] = WRAPLOW(dct_const_round_shift(temp1), bd); |
| 1556 step[1] = WRAPLOW(dct_const_round_shift(temp2), bd); |
| 1557 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
| 1558 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
| 1559 step[2] = WRAPLOW(dct_const_round_shift(temp1), bd); |
| 1560 step[3] = WRAPLOW(dct_const_round_shift(temp2), bd); |
| 1561 |
| 1562 // stage 2 |
| 1563 output[0] = WRAPLOW(step[0] + step[3], bd); |
| 1564 output[1] = WRAPLOW(step[1] + step[2], bd); |
| 1565 output[2] = WRAPLOW(step[1] - step[2], bd); |
| 1566 output[3] = WRAPLOW(step[0] - step[3], bd); |
| 1567 } |
| 1568 |
| 1569 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1570 int stride, int bd) { |
1557 tran_low_t out[4 * 4]; | 1571 tran_low_t out[4 * 4]; |
1558 tran_low_t *outptr = out; | 1572 tran_low_t *outptr = out; |
1559 int i, j; | 1573 int i, j; |
1560 tran_low_t temp_in[4], temp_out[4]; | 1574 tran_low_t temp_in[4], temp_out[4]; |
1561 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1575 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1562 | 1576 |
1563 // Rows | 1577 // Rows |
1564 for (i = 0; i < 4; ++i) { | 1578 for (i = 0; i < 4; ++i) { |
1565 high_idct4(input, outptr, bd); | 1579 highbd_idct4(input, outptr, bd); |
1566 input += 4; | 1580 input += 4; |
1567 outptr += 4; | 1581 outptr += 4; |
1568 } | 1582 } |
1569 | 1583 |
1570 // Columns | 1584 // Columns |
1571 for (i = 0; i < 4; ++i) { | 1585 for (i = 0; i < 4; ++i) { |
1572 for (j = 0; j < 4; ++j) | 1586 for (j = 0; j < 4; ++j) |
1573 temp_in[j] = out[j * 4 + i]; | 1587 temp_in[j] = out[j * 4 + i]; |
1574 high_idct4(temp_in, temp_out, bd); | 1588 highbd_idct4(temp_in, temp_out, bd); |
1575 for (j = 0; j < 4; ++j) | 1589 for (j = 0; j < 4; ++j) { |
1576 dest[j * stride + i] = clip_pixel_bd_high( | 1590 dest[j * stride + i] = highbd_clip_pixel_add( |
1577 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1591 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
| 1592 } |
1578 } | 1593 } |
1579 } | 1594 } |
1580 | 1595 |
1581 void vp9_high_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1596 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, |
1582 int dest_stride, int bd) { | 1597 int dest_stride, int bd) { |
1583 int i; | 1598 int i; |
1584 tran_high_t a1; | 1599 tran_high_t a1; |
1585 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); | 1600 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); |
1586 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1601 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1587 | 1602 |
1588 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); | 1603 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); |
1589 a1 = ROUND_POWER_OF_TWO(out, 4); | 1604 a1 = ROUND_POWER_OF_TWO(out, 4); |
1590 | 1605 |
1591 for (i = 0; i < 4; i++) { | 1606 for (i = 0; i < 4; i++) { |
1592 dest[0] = clip_pixel_bd_high(dest[0], a1, bd); | 1607 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); |
1593 dest[1] = clip_pixel_bd_high(dest[1], a1, bd); | 1608 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); |
1594 dest[2] = clip_pixel_bd_high(dest[2], a1, bd); | 1609 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); |
1595 dest[3] = clip_pixel_bd_high(dest[3], a1, bd); | 1610 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); |
1596 dest += dest_stride; | 1611 dest += dest_stride; |
1597 } | 1612 } |
1598 } | 1613 } |
1599 | 1614 |
1600 static void high_idct8(const tran_low_t *input, tran_low_t *output, int bd) { | 1615 static void highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { |
1601 tran_low_t step1[8], step2[8]; | 1616 tran_low_t step1[8], step2[8]; |
1602 tran_high_t temp1, temp2; | 1617 tran_high_t temp1, temp2; |
1603 // stage 1 | 1618 // stage 1 |
1604 step1[0] = input[0]; | 1619 step1[0] = input[0]; |
1605 step1[2] = input[4]; | 1620 step1[2] = input[4]; |
1606 step1[1] = input[2]; | 1621 step1[1] = input[2]; |
1607 step1[3] = input[6]; | 1622 step1[3] = input[6]; |
1608 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 1623 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
1609 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 1624 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
1610 step1[4] = WRAPLOW(dct_const_round_shift(temp1)); | 1625 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1611 step1[7] = WRAPLOW(dct_const_round_shift(temp2)); | 1626 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1612 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 1627 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
1613 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 1628 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
1614 step1[5] = WRAPLOW(dct_const_round_shift(temp1)); | 1629 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1615 step1[6] = WRAPLOW(dct_const_round_shift(temp2)); | 1630 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1616 | 1631 |
1617 // stage 2 & stage 3 - even half | 1632 // stage 2 & stage 3 - even half |
1618 high_idct4(step1, step1, bd); | 1633 highbd_idct4(step1, step1, bd); |
1619 | 1634 |
1620 // stage 2 - odd half | 1635 // stage 2 - odd half |
1621 step2[4] = WRAPLOW(step1[4] + step1[5]); | 1636 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
1622 step2[5] = WRAPLOW(step1[4] - step1[5]); | 1637 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
1623 step2[6] = WRAPLOW(-step1[6] + step1[7]); | 1638 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
1624 step2[7] = WRAPLOW(step1[6] + step1[7]); | 1639 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
1625 | 1640 |
1626 // stage 3 - odd half | 1641 // stage 3 - odd half |
1627 step1[4] = step2[4]; | 1642 step1[4] = step2[4]; |
1628 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1643 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
1629 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1644 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
1630 step1[5] = WRAPLOW(dct_const_round_shift(temp1)); | 1645 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1631 step1[6] = WRAPLOW(dct_const_round_shift(temp2)); | 1646 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1632 step1[7] = step2[7]; | 1647 step1[7] = step2[7]; |
1633 | 1648 |
1634 // stage 4 | 1649 // stage 4 |
1635 output[0] = WRAPLOW(step1[0] + step1[7]); | 1650 output[0] = WRAPLOW(step1[0] + step1[7], bd); |
1636 output[1] = WRAPLOW(step1[1] + step1[6]); | 1651 output[1] = WRAPLOW(step1[1] + step1[6], bd); |
1637 output[2] = WRAPLOW(step1[2] + step1[5]); | 1652 output[2] = WRAPLOW(step1[2] + step1[5], bd); |
1638 output[3] = WRAPLOW(step1[3] + step1[4]); | 1653 output[3] = WRAPLOW(step1[3] + step1[4], bd); |
1639 output[4] = WRAPLOW(step1[3] - step1[4]); | 1654 output[4] = WRAPLOW(step1[3] - step1[4], bd); |
1640 output[5] = WRAPLOW(step1[2] - step1[5]); | 1655 output[5] = WRAPLOW(step1[2] - step1[5], bd); |
1641 output[6] = WRAPLOW(step1[1] - step1[6]); | 1656 output[6] = WRAPLOW(step1[1] - step1[6], bd); |
1642 output[7] = WRAPLOW(step1[0] - step1[7]); | 1657 output[7] = WRAPLOW(step1[0] - step1[7], bd); |
1643 } | 1658 } |
1644 | 1659 |
1645 void vp9_high_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, | 1660 void vp9_highbd_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, |
1646 int stride, int bd) { | 1661 int stride, int bd) { |
1647 tran_low_t out[8 * 8]; | 1662 tran_low_t out[8 * 8]; |
1648 tran_low_t *outptr = out; | 1663 tran_low_t *outptr = out; |
1649 int i, j; | 1664 int i, j; |
1650 tran_low_t temp_in[8], temp_out[8]; | 1665 tran_low_t temp_in[8], temp_out[8]; |
1651 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1666 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1652 | 1667 |
1653 // First transform rows. | 1668 // First transform rows. |
1654 for (i = 0; i < 8; ++i) { | 1669 for (i = 0; i < 8; ++i) { |
1655 high_idct8(input, outptr, bd); | 1670 highbd_idct8(input, outptr, bd); |
1656 input += 8; | 1671 input += 8; |
1657 outptr += 8; | 1672 outptr += 8; |
1658 } | 1673 } |
1659 | 1674 |
1660 // Then transform columns. | 1675 // Then transform columns. |
1661 for (i = 0; i < 8; ++i) { | 1676 for (i = 0; i < 8; ++i) { |
1662 for (j = 0; j < 8; ++j) | 1677 for (j = 0; j < 8; ++j) |
1663 temp_in[j] = out[j * 8 + i]; | 1678 temp_in[j] = out[j * 8 + i]; |
1664 high_idct8(temp_in, temp_out, bd); | 1679 highbd_idct8(temp_in, temp_out, bd); |
1665 for (j = 0; j < 8; ++j) | 1680 for (j = 0; j < 8; ++j) { |
1666 dest[j * stride + i] = clip_pixel_bd_high(dest[j * stride + i], | 1681 dest[j * stride + i] = highbd_clip_pixel_add( |
1667 ROUND_POWER_OF_TWO(temp_out[j], 5), | 1682 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
1668 bd); | 1683 } |
1669 } | 1684 } |
1670 } | 1685 } |
1671 | 1686 |
1672 void vp9_high_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1687 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, |
1673 int stride, int bd) { | 1688 int stride, int bd) { |
1674 int i, j; | 1689 int i, j; |
1675 tran_high_t a1; | 1690 tran_high_t a1; |
1676 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); | 1691 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); |
1677 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1692 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1678 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); | 1693 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); |
1679 a1 = ROUND_POWER_OF_TWO(out, 5); | 1694 a1 = ROUND_POWER_OF_TWO(out, 5); |
1680 for (j = 0; j < 8; ++j) { | 1695 for (j = 0; j < 8; ++j) { |
1681 for (i = 0; i < 8; ++i) | 1696 for (i = 0; i < 8; ++i) |
1682 dest[i] = clip_pixel_bd_high(dest[i], a1, bd); | 1697 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
1683 dest += stride; | 1698 dest += stride; |
1684 } | 1699 } |
1685 } | 1700 } |
1686 | 1701 |
1687 static void high_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { | 1702 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { |
1688 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1703 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1689 | 1704 |
1690 tran_high_t x0 = input[0]; | 1705 tran_high_t x0 = input[0]; |
1691 tran_high_t x1 = input[1]; | 1706 tran_high_t x1 = input[1]; |
1692 tran_high_t x2 = input[2]; | 1707 tran_high_t x2 = input[2]; |
1693 tran_high_t x3 = input[3]; | 1708 tran_high_t x3 = input[3]; |
1694 (void) bd; | 1709 (void) bd; |
1695 | 1710 |
1696 if (!(x0 | x1 | x2 | x3)) { | 1711 if (!(x0 | x1 | x2 | x3)) { |
1697 vpx_memset(output, 0, 4 * sizeof(*output)); | 1712 vpx_memset(output, 0, 4 * sizeof(*output)); |
(...skipping 16 matching lines...) Expand all Loading... |
1714 | 1729 |
1715 s0 = x0 + x3; | 1730 s0 = x0 + x3; |
1716 s1 = x1 + x3; | 1731 s1 = x1 + x3; |
1717 s2 = x2; | 1732 s2 = x2; |
1718 s3 = x0 + x1 - x3; | 1733 s3 = x0 + x1 - x3; |
1719 | 1734 |
1720 // 1-D transform scaling factor is sqrt(2). | 1735 // 1-D transform scaling factor is sqrt(2). |
1721 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1736 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
1722 // + 1b (addition) = 29b. | 1737 // + 1b (addition) = 29b. |
1723 // Hence the output bit depth is 15b. | 1738 // Hence the output bit depth is 15b. |
1724 output[0] = WRAPLOW(dct_const_round_shift(s0)); | 1739 output[0] = WRAPLOW(dct_const_round_shift(s0), bd); |
1725 output[1] = WRAPLOW(dct_const_round_shift(s1)); | 1740 output[1] = WRAPLOW(dct_const_round_shift(s1), bd); |
1726 output[2] = WRAPLOW(dct_const_round_shift(s2)); | 1741 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); |
1727 output[3] = WRAPLOW(dct_const_round_shift(s3)); | 1742 output[3] = WRAPLOW(dct_const_round_shift(s3), bd); |
1728 } | 1743 } |
1729 | 1744 |
1730 void vp9_high_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1745 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1731 int stride, int tx_type, int bd) { | 1746 int stride, int tx_type, int bd) { |
1732 const high_transform_2d IHT_4[] = { | 1747 const highbd_transform_2d IHT_4[] = { |
1733 { high_idct4, high_idct4 }, // DCT_DCT = 0 | 1748 { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 |
1734 { high_iadst4, high_idct4 }, // ADST_DCT = 1 | 1749 { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 |
1735 { high_idct4, high_iadst4 }, // DCT_ADST = 2 | 1750 { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 |
1736 { high_iadst4, high_iadst4 } // ADST_ADST = 3 | 1751 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 |
1737 }; | 1752 }; |
1738 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1753 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1739 | 1754 |
1740 int i, j; | 1755 int i, j; |
1741 tran_low_t out[4 * 4]; | 1756 tran_low_t out[4 * 4]; |
1742 tran_low_t *outptr = out; | 1757 tran_low_t *outptr = out; |
1743 tran_low_t temp_in[4], temp_out[4]; | 1758 tran_low_t temp_in[4], temp_out[4]; |
1744 | 1759 |
1745 // Inverse transform row vectors. | 1760 // Inverse transform row vectors. |
1746 for (i = 0; i < 4; ++i) { | 1761 for (i = 0; i < 4; ++i) { |
1747 IHT_4[tx_type].rows(input, outptr, bd); | 1762 IHT_4[tx_type].rows(input, outptr, bd); |
1748 input += 4; | 1763 input += 4; |
1749 outptr += 4; | 1764 outptr += 4; |
1750 } | 1765 } |
1751 | 1766 |
1752 // Inverse transform column vectors. | 1767 // Inverse transform column vectors. |
1753 for (i = 0; i < 4; ++i) { | 1768 for (i = 0; i < 4; ++i) { |
1754 for (j = 0; j < 4; ++j) | 1769 for (j = 0; j < 4; ++j) |
1755 temp_in[j] = out[j * 4 + i]; | 1770 temp_in[j] = out[j * 4 + i]; |
1756 IHT_4[tx_type].cols(temp_in, temp_out, bd); | 1771 IHT_4[tx_type].cols(temp_in, temp_out, bd); |
1757 for (j = 0; j < 4; ++j) | 1772 for (j = 0; j < 4; ++j) { |
1758 dest[j * stride + i] = clip_pixel_bd_high( | 1773 dest[j * stride + i] = highbd_clip_pixel_add( |
1759 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1774 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
| 1775 } |
1760 } | 1776 } |
1761 } | 1777 } |
1762 | 1778 |
1763 static void high_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { | 1779 static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { |
1764 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1780 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1765 | 1781 |
1766 tran_high_t x0 = input[7]; | 1782 tran_high_t x0 = input[7]; |
1767 tran_high_t x1 = input[0]; | 1783 tran_high_t x1 = input[0]; |
1768 tran_high_t x2 = input[5]; | 1784 tran_high_t x2 = input[5]; |
1769 tran_high_t x3 = input[2]; | 1785 tran_high_t x3 = input[2]; |
1770 tran_high_t x4 = input[3]; | 1786 tran_high_t x4 = input[3]; |
1771 tran_high_t x5 = input[4]; | 1787 tran_high_t x5 = input[4]; |
1772 tran_high_t x6 = input[1]; | 1788 tran_high_t x6 = input[1]; |
1773 tran_high_t x7 = input[6]; | 1789 tran_high_t x7 = input[6]; |
1774 (void) bd; | 1790 (void) bd; |
1775 | 1791 |
1776 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { | 1792 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
1777 vpx_memset(output, 0, 8 * sizeof(*output)); | 1793 vpx_memset(output, 0, 8 * sizeof(*output)); |
1778 return; | 1794 return; |
1779 } | 1795 } |
1780 | 1796 |
1781 // stage 1 | 1797 // stage 1 |
1782 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 1798 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
1783 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 1799 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
1784 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 1800 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
1785 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 1801 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
1786 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 1802 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
1787 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 1803 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
1788 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 1804 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
1789 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 1805 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
1790 | 1806 |
1791 x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); | 1807 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), bd); |
1792 x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); | 1808 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), bd); |
1793 x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); | 1809 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), bd); |
1794 x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); | 1810 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), bd); |
1795 x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); | 1811 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), bd); |
1796 x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); | 1812 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), bd); |
1797 x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); | 1813 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), bd); |
1798 x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); | 1814 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), bd); |
1799 | 1815 |
1800 // stage 2 | 1816 // stage 2 |
1801 s0 = x0; | 1817 s0 = x0; |
1802 s1 = x1; | 1818 s1 = x1; |
1803 s2 = x2; | 1819 s2 = x2; |
1804 s3 = x3; | 1820 s3 = x3; |
1805 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 1821 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
1806 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 1822 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
1807 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; | 1823 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; |
1808 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 1824 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
1809 | 1825 |
1810 x0 = s0 + s2; | 1826 x0 = WRAPLOW(s0 + s2, bd); |
1811 x1 = s1 + s3; | 1827 x1 = WRAPLOW(s1 + s3, bd); |
1812 x2 = s0 - s2; | 1828 x2 = WRAPLOW(s0 - s2, bd); |
1813 x3 = s1 - s3; | 1829 x3 = WRAPLOW(s1 - s3, bd); |
1814 x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); | 1830 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); |
1815 x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); | 1831 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); |
1816 x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); | 1832 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); |
1817 x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); | 1833 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); |
1818 | 1834 |
1819 // stage 3 | 1835 // stage 3 |
1820 s2 = cospi_16_64 * (x2 + x3); | 1836 s2 = cospi_16_64 * (x2 + x3); |
1821 s3 = cospi_16_64 * (x2 - x3); | 1837 s3 = cospi_16_64 * (x2 - x3); |
1822 s6 = cospi_16_64 * (x6 + x7); | 1838 s6 = cospi_16_64 * (x6 + x7); |
1823 s7 = cospi_16_64 * (x6 - x7); | 1839 s7 = cospi_16_64 * (x6 - x7); |
1824 | 1840 |
1825 x2 = WRAPLOW(dct_const_round_shift(s2)); | 1841 x2 = WRAPLOW(dct_const_round_shift(s2), bd); |
1826 x3 = WRAPLOW(dct_const_round_shift(s3)); | 1842 x3 = WRAPLOW(dct_const_round_shift(s3), bd); |
1827 x6 = WRAPLOW(dct_const_round_shift(s6)); | 1843 x6 = WRAPLOW(dct_const_round_shift(s6), bd); |
1828 x7 = WRAPLOW(dct_const_round_shift(s7)); | 1844 x7 = WRAPLOW(dct_const_round_shift(s7), bd); |
1829 | 1845 |
1830 output[0] = WRAPLOW(x0); | 1846 output[0] = WRAPLOW(x0, bd); |
1831 output[1] = WRAPLOW(-x4); | 1847 output[1] = WRAPLOW(-x4, bd); |
1832 output[2] = WRAPLOW(x6); | 1848 output[2] = WRAPLOW(x6, bd); |
1833 output[3] = WRAPLOW(-x2); | 1849 output[3] = WRAPLOW(-x2, bd); |
1834 output[4] = WRAPLOW(x3); | 1850 output[4] = WRAPLOW(x3, bd); |
1835 output[5] = WRAPLOW(-x7); | 1851 output[5] = WRAPLOW(-x7, bd); |
1836 output[6] = WRAPLOW(x5); | 1852 output[6] = WRAPLOW(x5, bd); |
1837 output[7] = WRAPLOW(-x1); | 1853 output[7] = WRAPLOW(-x1, bd); |
1838 } | 1854 } |
1839 | 1855 |
1840 static const high_transform_2d HIGH_IHT_8[] = { | 1856 static const highbd_transform_2d HIGH_IHT_8[] = { |
1841 { high_idct8, high_idct8 }, // DCT_DCT = 0 | 1857 { highbd_idct8, highbd_idct8 }, // DCT_DCT = 0 |
1842 { high_iadst8, high_idct8 }, // ADST_DCT = 1 | 1858 { highbd_iadst8, highbd_idct8 }, // ADST_DCT = 1 |
1843 { high_idct8, high_iadst8 }, // DCT_ADST = 2 | 1859 { highbd_idct8, highbd_iadst8 }, // DCT_ADST = 2 |
1844 { high_iadst8, high_iadst8 } // ADST_ADST = 3 | 1860 { highbd_iadst8, highbd_iadst8 } // ADST_ADST = 3 |
1845 }; | 1861 }; |
1846 | 1862 |
1847 void vp9_high_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, | 1863 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, |
1848 int stride, int tx_type, int bd) { | 1864 int stride, int tx_type, int bd) { |
1849 int i, j; | 1865 int i, j; |
1850 tran_low_t out[8 * 8]; | 1866 tran_low_t out[8 * 8]; |
1851 tran_low_t *outptr = out; | 1867 tran_low_t *outptr = out; |
1852 tran_low_t temp_in[8], temp_out[8]; | 1868 tran_low_t temp_in[8], temp_out[8]; |
1853 const high_transform_2d ht = HIGH_IHT_8[tx_type]; | 1869 const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; |
1854 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1870 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1855 | 1871 |
1856 // Inverse transform row vectors. | 1872 // Inverse transform row vectors. |
1857 for (i = 0; i < 8; ++i) { | 1873 for (i = 0; i < 8; ++i) { |
1858 ht.rows(input, outptr, bd); | 1874 ht.rows(input, outptr, bd); |
1859 input += 8; | 1875 input += 8; |
1860 outptr += 8; | 1876 outptr += 8; |
1861 } | 1877 } |
1862 | 1878 |
1863 // Inverse transform column vectors. | 1879 // Inverse transform column vectors. |
1864 for (i = 0; i < 8; ++i) { | 1880 for (i = 0; i < 8; ++i) { |
1865 for (j = 0; j < 8; ++j) | 1881 for (j = 0; j < 8; ++j) |
1866 temp_in[j] = out[j * 8 + i]; | 1882 temp_in[j] = out[j * 8 + i]; |
1867 ht.cols(temp_in, temp_out, bd); | 1883 ht.cols(temp_in, temp_out, bd); |
1868 for (j = 0; j < 8; ++j) | 1884 for (j = 0; j < 8; ++j) { |
1869 dest[j * stride + i] = clip_pixel_bd_high( | 1885 dest[j * stride + i] = highbd_clip_pixel_add( |
1870 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1886 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
| 1887 } |
1871 } | 1888 } |
1872 } | 1889 } |
1873 | 1890 |
1874 void vp9_high_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, | 1891 void vp9_highbd_idct8x8_10_add_c(const tran_low_t *input, uint8_t *dest8, |
1875 int stride, int bd) { | 1892 int stride, int bd) { |
1876 tran_low_t out[8 * 8] = { 0 }; | 1893 tran_low_t out[8 * 8] = { 0 }; |
1877 tran_low_t *outptr = out; | 1894 tran_low_t *outptr = out; |
1878 int i, j; | 1895 int i, j; |
1879 tran_low_t temp_in[8], temp_out[8]; | 1896 tran_low_t temp_in[8], temp_out[8]; |
1880 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1897 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1881 | 1898 |
1882 // First transform rows. | 1899 // First transform rows. |
1883 // Only first 4 row has non-zero coefs. | 1900 // Only first 4 row has non-zero coefs. |
1884 for (i = 0; i < 4; ++i) { | 1901 for (i = 0; i < 4; ++i) { |
1885 high_idct8(input, outptr, bd); | 1902 highbd_idct8(input, outptr, bd); |
1886 input += 8; | 1903 input += 8; |
1887 outptr += 8; | 1904 outptr += 8; |
1888 } | 1905 } |
1889 // Then transform columns. | 1906 // Then transform columns. |
1890 for (i = 0; i < 8; ++i) { | 1907 for (i = 0; i < 8; ++i) { |
1891 for (j = 0; j < 8; ++j) | 1908 for (j = 0; j < 8; ++j) |
1892 temp_in[j] = out[j * 8 + i]; | 1909 temp_in[j] = out[j * 8 + i]; |
1893 high_idct8(temp_in, temp_out, bd); | 1910 highbd_idct8(temp_in, temp_out, bd); |
1894 for (j = 0; j < 8; ++j) | 1911 for (j = 0; j < 8; ++j) { |
1895 dest[j * stride + i] = clip_pixel_bd_high( | 1912 dest[j * stride + i] = highbd_clip_pixel_add( |
1896 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1913 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
| 1914 } |
1897 } | 1915 } |
1898 } | 1916 } |
1899 | 1917 |
1900 static void high_idct16(const tran_low_t *input, tran_low_t *output, int bd) { | 1918 static void highbd_idct16(const tran_low_t *input, tran_low_t *output, int bd) { |
1901 tran_low_t step1[16], step2[16]; | 1919 tran_low_t step1[16], step2[16]; |
1902 tran_high_t temp1, temp2; | 1920 tran_high_t temp1, temp2; |
1903 (void) bd; | 1921 (void) bd; |
1904 | 1922 |
1905 // stage 1 | 1923 // stage 1 |
1906 step1[0] = input[0/2]; | 1924 step1[0] = input[0/2]; |
1907 step1[1] = input[16/2]; | 1925 step1[1] = input[16/2]; |
1908 step1[2] = input[8/2]; | 1926 step1[2] = input[8/2]; |
1909 step1[3] = input[24/2]; | 1927 step1[3] = input[24/2]; |
1910 step1[4] = input[4/2]; | 1928 step1[4] = input[4/2]; |
(...skipping 14 matching lines...) Expand all Loading... |
1925 step2[1] = step1[1]; | 1943 step2[1] = step1[1]; |
1926 step2[2] = step1[2]; | 1944 step2[2] = step1[2]; |
1927 step2[3] = step1[3]; | 1945 step2[3] = step1[3]; |
1928 step2[4] = step1[4]; | 1946 step2[4] = step1[4]; |
1929 step2[5] = step1[5]; | 1947 step2[5] = step1[5]; |
1930 step2[6] = step1[6]; | 1948 step2[6] = step1[6]; |
1931 step2[7] = step1[7]; | 1949 step2[7] = step1[7]; |
1932 | 1950 |
1933 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 1951 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
1934 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 1952 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
1935 step2[8] = WRAPLOW(dct_const_round_shift(temp1)); | 1953 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1936 step2[15] = WRAPLOW(dct_const_round_shift(temp2)); | 1954 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1937 | 1955 |
1938 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 1956 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
1939 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 1957 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
1940 step2[9] = WRAPLOW(dct_const_round_shift(temp1)); | 1958 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1941 step2[14] = WRAPLOW(dct_const_round_shift(temp2)); | 1959 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1942 | 1960 |
1943 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 1961 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
1944 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 1962 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
1945 step2[10] = WRAPLOW(dct_const_round_shift(temp1)); | 1963 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1946 step2[13] = WRAPLOW(dct_const_round_shift(temp2)); | 1964 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1947 | 1965 |
1948 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 1966 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
1949 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 1967 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
1950 step2[11] = WRAPLOW(dct_const_round_shift(temp1)); | 1968 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1951 step2[12] = WRAPLOW(dct_const_round_shift(temp2)); | 1969 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1952 | 1970 |
1953 // stage 3 | 1971 // stage 3 |
1954 step1[0] = step2[0]; | 1972 step1[0] = step2[0]; |
1955 step1[1] = step2[1]; | 1973 step1[1] = step2[1]; |
1956 step1[2] = step2[2]; | 1974 step1[2] = step2[2]; |
1957 step1[3] = step2[3]; | 1975 step1[3] = step2[3]; |
1958 | 1976 |
1959 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 1977 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
1960 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 1978 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
1961 step1[4] = WRAPLOW(dct_const_round_shift(temp1)); | 1979 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1962 step1[7] = WRAPLOW(dct_const_round_shift(temp2)); | 1980 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1963 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 1981 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
1964 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 1982 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
1965 step1[5] = WRAPLOW(dct_const_round_shift(temp1)); | 1983 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1966 step1[6] = WRAPLOW(dct_const_round_shift(temp2)); | 1984 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1967 | 1985 |
1968 step1[8] = WRAPLOW(step2[8] + step2[9]); | 1986 step1[8] = WRAPLOW(step2[8] + step2[9], bd); |
1969 step1[9] = WRAPLOW(step2[8] - step2[9]); | 1987 step1[9] = WRAPLOW(step2[8] - step2[9], bd); |
1970 step1[10] = WRAPLOW(-step2[10] + step2[11]); | 1988 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); |
1971 step1[11] = WRAPLOW(step2[10] + step2[11]); | 1989 step1[11] = WRAPLOW(step2[10] + step2[11], bd); |
1972 step1[12] = WRAPLOW(step2[12] + step2[13]); | 1990 step1[12] = WRAPLOW(step2[12] + step2[13], bd); |
1973 step1[13] = WRAPLOW(step2[12] - step2[13]); | 1991 step1[13] = WRAPLOW(step2[12] - step2[13], bd); |
1974 step1[14] = WRAPLOW(-step2[14] + step2[15]); | 1992 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); |
1975 step1[15] = WRAPLOW(step2[14] + step2[15]); | 1993 step1[15] = WRAPLOW(step2[14] + step2[15], bd); |
1976 | 1994 |
1977 // stage 4 | 1995 // stage 4 |
1978 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 1996 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
1979 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 1997 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
1980 step2[0] = WRAPLOW(dct_const_round_shift(temp1)); | 1998 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1981 step2[1] = WRAPLOW(dct_const_round_shift(temp2)); | 1999 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1982 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 2000 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
1983 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 2001 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
1984 step2[2] = WRAPLOW(dct_const_round_shift(temp1)); | 2002 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1985 step2[3] = WRAPLOW(dct_const_round_shift(temp2)); | 2003 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1986 step2[4] = WRAPLOW(step1[4] + step1[5]); | 2004 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
1987 step2[5] = WRAPLOW(step1[4] - step1[5]); | 2005 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
1988 step2[6] = WRAPLOW(-step1[6] + step1[7]); | 2006 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
1989 step2[7] = WRAPLOW(step1[6] + step1[7]); | 2007 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
1990 | 2008 |
1991 step2[8] = step1[8]; | 2009 step2[8] = step1[8]; |
1992 step2[15] = step1[15]; | 2010 step2[15] = step1[15]; |
1993 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 2011 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
1994 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 2012 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
1995 step2[9] = WRAPLOW(dct_const_round_shift(temp1)); | 2013 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); |
1996 step2[14] = WRAPLOW(dct_const_round_shift(temp2)); | 2014 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); |
1997 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 2015 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
1998 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 2016 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
1999 step2[10] = WRAPLOW(dct_const_round_shift(temp1)); | 2017 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2000 step2[13] = WRAPLOW(dct_const_round_shift(temp2)); | 2018 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2001 step2[11] = step1[11]; | 2019 step2[11] = step1[11]; |
2002 step2[12] = step1[12]; | 2020 step2[12] = step1[12]; |
2003 | 2021 |
2004 // stage 5 | 2022 // stage 5 |
2005 step1[0] = WRAPLOW(step2[0] + step2[3]); | 2023 step1[0] = WRAPLOW(step2[0] + step2[3], bd); |
2006 step1[1] = WRAPLOW(step2[1] + step2[2]); | 2024 step1[1] = WRAPLOW(step2[1] + step2[2], bd); |
2007 step1[2] = WRAPLOW(step2[1] - step2[2]); | 2025 step1[2] = WRAPLOW(step2[1] - step2[2], bd); |
2008 step1[3] = WRAPLOW(step2[0] - step2[3]); | 2026 step1[3] = WRAPLOW(step2[0] - step2[3], bd); |
2009 step1[4] = step2[4]; | 2027 step1[4] = step2[4]; |
2010 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 2028 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
2011 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 2029 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
2012 step1[5] = WRAPLOW(dct_const_round_shift(temp1)); | 2030 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2013 step1[6] = WRAPLOW(dct_const_round_shift(temp2)); | 2031 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2014 step1[7] = step2[7]; | 2032 step1[7] = step2[7]; |
2015 | 2033 |
2016 step1[8] = WRAPLOW(step2[8] + step2[11]); | 2034 step1[8] = WRAPLOW(step2[8] + step2[11], bd); |
2017 step1[9] = WRAPLOW(step2[9] + step2[10]); | 2035 step1[9] = WRAPLOW(step2[9] + step2[10], bd); |
2018 step1[10] = WRAPLOW(step2[9] - step2[10]); | 2036 step1[10] = WRAPLOW(step2[9] - step2[10], bd); |
2019 step1[11] = WRAPLOW(step2[8] - step2[11]); | 2037 step1[11] = WRAPLOW(step2[8] - step2[11], bd); |
2020 step1[12] = WRAPLOW(-step2[12] + step2[15]); | 2038 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); |
2021 step1[13] = WRAPLOW(-step2[13] + step2[14]); | 2039 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); |
2022 step1[14] = WRAPLOW(step2[13] + step2[14]); | 2040 step1[14] = WRAPLOW(step2[13] + step2[14], bd); |
2023 step1[15] = WRAPLOW(step2[12] + step2[15]); | 2041 step1[15] = WRAPLOW(step2[12] + step2[15], bd); |
2024 | 2042 |
2025 // stage 6 | 2043 // stage 6 |
2026 step2[0] = WRAPLOW(step1[0] + step1[7]); | 2044 step2[0] = WRAPLOW(step1[0] + step1[7], bd); |
2027 step2[1] = WRAPLOW(step1[1] + step1[6]); | 2045 step2[1] = WRAPLOW(step1[1] + step1[6], bd); |
2028 step2[2] = WRAPLOW(step1[2] + step1[5]); | 2046 step2[2] = WRAPLOW(step1[2] + step1[5], bd); |
2029 step2[3] = WRAPLOW(step1[3] + step1[4]); | 2047 step2[3] = WRAPLOW(step1[3] + step1[4], bd); |
2030 step2[4] = WRAPLOW(step1[3] - step1[4]); | 2048 step2[4] = WRAPLOW(step1[3] - step1[4], bd); |
2031 step2[5] = WRAPLOW(step1[2] - step1[5]); | 2049 step2[5] = WRAPLOW(step1[2] - step1[5], bd); |
2032 step2[6] = WRAPLOW(step1[1] - step1[6]); | 2050 step2[6] = WRAPLOW(step1[1] - step1[6], bd); |
2033 step2[7] = WRAPLOW(step1[0] - step1[7]); | 2051 step2[7] = WRAPLOW(step1[0] - step1[7], bd); |
2034 step2[8] = step1[8]; | 2052 step2[8] = step1[8]; |
2035 step2[9] = step1[9]; | 2053 step2[9] = step1[9]; |
2036 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 2054 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
2037 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 2055 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
2038 step2[10] = WRAPLOW(dct_const_round_shift(temp1)); | 2056 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2039 step2[13] = WRAPLOW(dct_const_round_shift(temp2)); | 2057 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2040 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 2058 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
2041 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 2059 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
2042 step2[11] = WRAPLOW(dct_const_round_shift(temp1)); | 2060 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2043 step2[12] = WRAPLOW(dct_const_round_shift(temp2)); | 2061 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2044 step2[14] = step1[14]; | 2062 step2[14] = step1[14]; |
2045 step2[15] = step1[15]; | 2063 step2[15] = step1[15]; |
2046 | 2064 |
2047 // stage 7 | 2065 // stage 7 |
2048 output[0] = WRAPLOW(step2[0] + step2[15]); | 2066 output[0] = WRAPLOW(step2[0] + step2[15], bd); |
2049 output[1] = WRAPLOW(step2[1] + step2[14]); | 2067 output[1] = WRAPLOW(step2[1] + step2[14], bd); |
2050 output[2] = WRAPLOW(step2[2] + step2[13]); | 2068 output[2] = WRAPLOW(step2[2] + step2[13], bd); |
2051 output[3] = WRAPLOW(step2[3] + step2[12]); | 2069 output[3] = WRAPLOW(step2[3] + step2[12], bd); |
2052 output[4] = WRAPLOW(step2[4] + step2[11]); | 2070 output[4] = WRAPLOW(step2[4] + step2[11], bd); |
2053 output[5] = WRAPLOW(step2[5] + step2[10]); | 2071 output[5] = WRAPLOW(step2[5] + step2[10], bd); |
2054 output[6] = WRAPLOW(step2[6] + step2[9]); | 2072 output[6] = WRAPLOW(step2[6] + step2[9], bd); |
2055 output[7] = WRAPLOW(step2[7] + step2[8]); | 2073 output[7] = WRAPLOW(step2[7] + step2[8], bd); |
2056 output[8] = WRAPLOW(step2[7] - step2[8]); | 2074 output[8] = WRAPLOW(step2[7] - step2[8], bd); |
2057 output[9] = WRAPLOW(step2[6] - step2[9]); | 2075 output[9] = WRAPLOW(step2[6] - step2[9], bd); |
2058 output[10] = WRAPLOW(step2[5] - step2[10]); | 2076 output[10] = WRAPLOW(step2[5] - step2[10], bd); |
2059 output[11] = WRAPLOW(step2[4] - step2[11]); | 2077 output[11] = WRAPLOW(step2[4] - step2[11], bd); |
2060 output[12] = WRAPLOW(step2[3] - step2[12]); | 2078 output[12] = WRAPLOW(step2[3] - step2[12], bd); |
2061 output[13] = WRAPLOW(step2[2] - step2[13]); | 2079 output[13] = WRAPLOW(step2[2] - step2[13], bd); |
2062 output[14] = WRAPLOW(step2[1] - step2[14]); | 2080 output[14] = WRAPLOW(step2[1] - step2[14], bd); |
2063 output[15] = WRAPLOW(step2[0] - step2[15]); | 2081 output[15] = WRAPLOW(step2[0] - step2[15], bd); |
2064 } | 2082 } |
2065 | 2083 |
2066 void vp9_high_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, | 2084 void vp9_highbd_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, |
2067 int stride, int bd) { | 2085 int stride, int bd) { |
2068 tran_low_t out[16 * 16]; | 2086 tran_low_t out[16 * 16]; |
2069 tran_low_t *outptr = out; | 2087 tran_low_t *outptr = out; |
2070 int i, j; | 2088 int i, j; |
2071 tran_low_t temp_in[16], temp_out[16]; | 2089 tran_low_t temp_in[16], temp_out[16]; |
2072 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2090 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2073 | 2091 |
2074 // First transform rows. | 2092 // First transform rows. |
2075 for (i = 0; i < 16; ++i) { | 2093 for (i = 0; i < 16; ++i) { |
2076 high_idct16(input, outptr, bd); | 2094 highbd_idct16(input, outptr, bd); |
2077 input += 16; | 2095 input += 16; |
2078 outptr += 16; | 2096 outptr += 16; |
2079 } | 2097 } |
2080 | 2098 |
2081 // Then transform columns. | 2099 // Then transform columns. |
2082 for (i = 0; i < 16; ++i) { | 2100 for (i = 0; i < 16; ++i) { |
2083 for (j = 0; j < 16; ++j) | 2101 for (j = 0; j < 16; ++j) |
2084 temp_in[j] = out[j * 16 + i]; | 2102 temp_in[j] = out[j * 16 + i]; |
2085 high_idct16(temp_in, temp_out, bd); | 2103 highbd_idct16(temp_in, temp_out, bd); |
2086 for (j = 0; j < 16; ++j) | 2104 for (j = 0; j < 16; ++j) { |
2087 dest[j * stride + i] = clip_pixel_bd_high( | 2105 dest[j * stride + i] = highbd_clip_pixel_add( |
2088 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2106 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2107 } |
2089 } | 2108 } |
2090 } | 2109 } |
2091 | 2110 |
2092 static void high_iadst16(const tran_low_t *input, tran_low_t *output, int bd) { | 2111 static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, |
| 2112 int bd) { |
2093 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 2113 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
2094 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 2114 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
2095 | 2115 |
2096 tran_high_t x0 = input[15]; | 2116 tran_high_t x0 = input[15]; |
2097 tran_high_t x1 = input[0]; | 2117 tran_high_t x1 = input[0]; |
2098 tran_high_t x2 = input[13]; | 2118 tran_high_t x2 = input[13]; |
2099 tran_high_t x3 = input[2]; | 2119 tran_high_t x3 = input[2]; |
2100 tran_high_t x4 = input[11]; | 2120 tran_high_t x4 = input[11]; |
2101 tran_high_t x5 = input[4]; | 2121 tran_high_t x5 = input[4]; |
2102 tran_high_t x6 = input[9]; | 2122 tran_high_t x6 = input[9]; |
(...skipping 25 matching lines...) Expand all Loading... |
2128 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; | 2148 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
2129 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; | 2149 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
2130 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; | 2150 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
2131 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; | 2151 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
2132 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; | 2152 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
2133 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; | 2153 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
2134 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; | 2154 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
2135 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; | 2155 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
2136 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; | 2156 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
2137 | 2157 |
2138 x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); | 2158 x0 = WRAPLOW(dct_const_round_shift(s0 + s8), bd); |
2139 x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); | 2159 x1 = WRAPLOW(dct_const_round_shift(s1 + s9), bd); |
2140 x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); | 2160 x2 = WRAPLOW(dct_const_round_shift(s2 + s10), bd); |
2141 x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); | 2161 x3 = WRAPLOW(dct_const_round_shift(s3 + s11), bd); |
2142 x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); | 2162 x4 = WRAPLOW(dct_const_round_shift(s4 + s12), bd); |
2143 x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); | 2163 x5 = WRAPLOW(dct_const_round_shift(s5 + s13), bd); |
2144 x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); | 2164 x6 = WRAPLOW(dct_const_round_shift(s6 + s14), bd); |
2145 x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); | 2165 x7 = WRAPLOW(dct_const_round_shift(s7 + s15), bd); |
2146 x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); | 2166 x8 = WRAPLOW(dct_const_round_shift(s0 - s8), bd); |
2147 x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); | 2167 x9 = WRAPLOW(dct_const_round_shift(s1 - s9), bd); |
2148 x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); | 2168 x10 = WRAPLOW(dct_const_round_shift(s2 - s10), bd); |
2149 x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); | 2169 x11 = WRAPLOW(dct_const_round_shift(s3 - s11), bd); |
2150 x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); | 2170 x12 = WRAPLOW(dct_const_round_shift(s4 - s12), bd); |
2151 x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); | 2171 x13 = WRAPLOW(dct_const_round_shift(s5 - s13), bd); |
2152 x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); | 2172 x14 = WRAPLOW(dct_const_round_shift(s6 - s14), bd); |
2153 x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); | 2173 x15 = WRAPLOW(dct_const_round_shift(s7 - s15), bd); |
2154 | 2174 |
2155 // stage 2 | 2175 // stage 2 |
2156 s0 = x0; | 2176 s0 = x0; |
2157 s1 = x1; | 2177 s1 = x1; |
2158 s2 = x2; | 2178 s2 = x2; |
2159 s3 = x3; | 2179 s3 = x3; |
2160 s4 = x4; | 2180 s4 = x4; |
2161 s5 = x5; | 2181 s5 = x5; |
2162 s6 = x6; | 2182 s6 = x6; |
2163 s7 = x7; | 2183 s7 = x7; |
2164 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; | 2184 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
2165 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; | 2185 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
2166 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; | 2186 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
2167 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; | 2187 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
2168 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; | 2188 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; |
2169 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; | 2189 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
2170 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; | 2190 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; |
2171 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; | 2191 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
2172 | 2192 |
2173 x0 = WRAPLOW(s0 + s4); | 2193 x0 = WRAPLOW(s0 + s4, bd); |
2174 x1 = WRAPLOW(s1 + s5); | 2194 x1 = WRAPLOW(s1 + s5, bd); |
2175 x2 = WRAPLOW(s2 + s6); | 2195 x2 = WRAPLOW(s2 + s6, bd); |
2176 x3 = WRAPLOW(s3 + s7); | 2196 x3 = WRAPLOW(s3 + s7, bd); |
2177 x4 = WRAPLOW(s0 - s4); | 2197 x4 = WRAPLOW(s0 - s4, bd); |
2178 x5 = WRAPLOW(s1 - s5); | 2198 x5 = WRAPLOW(s1 - s5, bd); |
2179 x6 = WRAPLOW(s2 - s6); | 2199 x6 = WRAPLOW(s2 - s6, bd); |
2180 x7 = WRAPLOW(s3 - s7); | 2200 x7 = WRAPLOW(s3 - s7, bd); |
2181 x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); | 2201 x8 = WRAPLOW(dct_const_round_shift(s8 + s12), bd); |
2182 x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); | 2202 x9 = WRAPLOW(dct_const_round_shift(s9 + s13), bd); |
2183 x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); | 2203 x10 = WRAPLOW(dct_const_round_shift(s10 + s14), bd); |
2184 x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); | 2204 x11 = WRAPLOW(dct_const_round_shift(s11 + s15), bd); |
2185 x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); | 2205 x12 = WRAPLOW(dct_const_round_shift(s8 - s12), bd); |
2186 x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); | 2206 x13 = WRAPLOW(dct_const_round_shift(s9 - s13), bd); |
2187 x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); | 2207 x14 = WRAPLOW(dct_const_round_shift(s10 - s14), bd); |
2188 x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); | 2208 x15 = WRAPLOW(dct_const_round_shift(s11 - s15), bd); |
2189 | 2209 |
2190 // stage 3 | 2210 // stage 3 |
2191 s0 = x0; | 2211 s0 = x0; |
2192 s1 = x1; | 2212 s1 = x1; |
2193 s2 = x2; | 2213 s2 = x2; |
2194 s3 = x3; | 2214 s3 = x3; |
2195 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; | 2215 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
2196 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; | 2216 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
2197 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; | 2217 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; |
2198 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; | 2218 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
2199 s8 = x8; | 2219 s8 = x8; |
2200 s9 = x9; | 2220 s9 = x9; |
2201 s10 = x10; | 2221 s10 = x10; |
2202 s11 = x11; | 2222 s11 = x11; |
2203 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; | 2223 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
2204 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; | 2224 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
2205 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; | 2225 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; |
2206 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; | 2226 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
2207 | 2227 |
2208 x0 = WRAPLOW(s0 + s2); | 2228 x0 = WRAPLOW(s0 + s2, bd); |
2209 x1 = WRAPLOW(s1 + s3); | 2229 x1 = WRAPLOW(s1 + s3, bd); |
2210 x2 = WRAPLOW(s0 - s2); | 2230 x2 = WRAPLOW(s0 - s2, bd); |
2211 x3 = WRAPLOW(s1 - s3); | 2231 x3 = WRAPLOW(s1 - s3, bd); |
2212 x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); | 2232 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); |
2213 x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); | 2233 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); |
2214 x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); | 2234 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); |
2215 x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); | 2235 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); |
2216 x8 = WRAPLOW(s8 + s10); | 2236 x8 = WRAPLOW(s8 + s10, bd); |
2217 x9 = WRAPLOW(s9 + s11); | 2237 x9 = WRAPLOW(s9 + s11, bd); |
2218 x10 = WRAPLOW(s8 - s10); | 2238 x10 = WRAPLOW(s8 - s10, bd); |
2219 x11 = WRAPLOW(s9 - s11); | 2239 x11 = WRAPLOW(s9 - s11, bd); |
2220 x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); | 2240 x12 = WRAPLOW(dct_const_round_shift(s12 + s14), bd); |
2221 x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); | 2241 x13 = WRAPLOW(dct_const_round_shift(s13 + s15), bd); |
2222 x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); | 2242 x14 = WRAPLOW(dct_const_round_shift(s12 - s14), bd); |
2223 x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); | 2243 x15 = WRAPLOW(dct_const_round_shift(s13 - s15), bd); |
2224 | 2244 |
2225 // stage 4 | 2245 // stage 4 |
2226 s2 = (- cospi_16_64) * (x2 + x3); | 2246 s2 = (- cospi_16_64) * (x2 + x3); |
2227 s3 = cospi_16_64 * (x2 - x3); | 2247 s3 = cospi_16_64 * (x2 - x3); |
2228 s6 = cospi_16_64 * (x6 + x7); | 2248 s6 = cospi_16_64 * (x6 + x7); |
2229 s7 = cospi_16_64 * (-x6 + x7); | 2249 s7 = cospi_16_64 * (-x6 + x7); |
2230 s10 = cospi_16_64 * (x10 + x11); | 2250 s10 = cospi_16_64 * (x10 + x11); |
2231 s11 = cospi_16_64 * (-x10 + x11); | 2251 s11 = cospi_16_64 * (-x10 + x11); |
2232 s14 = (- cospi_16_64) * (x14 + x15); | 2252 s14 = (- cospi_16_64) * (x14 + x15); |
2233 s15 = cospi_16_64 * (x14 - x15); | 2253 s15 = cospi_16_64 * (x14 - x15); |
2234 | 2254 |
2235 x2 = WRAPLOW(dct_const_round_shift(s2)); | 2255 x2 = WRAPLOW(dct_const_round_shift(s2), bd); |
2236 x3 = WRAPLOW(dct_const_round_shift(s3)); | 2256 x3 = WRAPLOW(dct_const_round_shift(s3), bd); |
2237 x6 = WRAPLOW(dct_const_round_shift(s6)); | 2257 x6 = WRAPLOW(dct_const_round_shift(s6), bd); |
2238 x7 = WRAPLOW(dct_const_round_shift(s7)); | 2258 x7 = WRAPLOW(dct_const_round_shift(s7), bd); |
2239 x10 = WRAPLOW(dct_const_round_shift(s10)); | 2259 x10 = WRAPLOW(dct_const_round_shift(s10), bd); |
2240 x11 = WRAPLOW(dct_const_round_shift(s11)); | 2260 x11 = WRAPLOW(dct_const_round_shift(s11), bd); |
2241 x14 = WRAPLOW(dct_const_round_shift(s14)); | 2261 x14 = WRAPLOW(dct_const_round_shift(s14), bd); |
2242 x15 = WRAPLOW(dct_const_round_shift(s15)); | 2262 x15 = WRAPLOW(dct_const_round_shift(s15), bd); |
2243 | 2263 |
2244 output[0] = WRAPLOW(x0); | 2264 output[0] = WRAPLOW(x0, bd); |
2245 output[1] = WRAPLOW(-x8); | 2265 output[1] = WRAPLOW(-x8, bd); |
2246 output[2] = WRAPLOW(x12); | 2266 output[2] = WRAPLOW(x12, bd); |
2247 output[3] = WRAPLOW(-x4); | 2267 output[3] = WRAPLOW(-x4, bd); |
2248 output[4] = WRAPLOW(x6); | 2268 output[4] = WRAPLOW(x6, bd); |
2249 output[5] = WRAPLOW(x14); | 2269 output[5] = WRAPLOW(x14, bd); |
2250 output[6] = WRAPLOW(x10); | 2270 output[6] = WRAPLOW(x10, bd); |
2251 output[7] = WRAPLOW(x2); | 2271 output[7] = WRAPLOW(x2, bd); |
2252 output[8] = WRAPLOW(x3); | 2272 output[8] = WRAPLOW(x3, bd); |
2253 output[9] = WRAPLOW(x11); | 2273 output[9] = WRAPLOW(x11, bd); |
2254 output[10] = WRAPLOW(x15); | 2274 output[10] = WRAPLOW(x15, bd); |
2255 output[11] = WRAPLOW(x7); | 2275 output[11] = WRAPLOW(x7, bd); |
2256 output[12] = WRAPLOW(x5); | 2276 output[12] = WRAPLOW(x5, bd); |
2257 output[13] = WRAPLOW(-x13); | 2277 output[13] = WRAPLOW(-x13, bd); |
2258 output[14] = WRAPLOW(x9); | 2278 output[14] = WRAPLOW(x9, bd); |
2259 output[15] = WRAPLOW(-x1); | 2279 output[15] = WRAPLOW(-x1, bd); |
2260 } | 2280 } |
2261 | 2281 |
2262 static const high_transform_2d HIGH_IHT_16[] = { | 2282 static const highbd_transform_2d HIGH_IHT_16[] = { |
2263 { high_idct16, high_idct16 }, // DCT_DCT = 0 | 2283 { highbd_idct16, highbd_idct16 }, // DCT_DCT = 0 |
2264 { high_iadst16, high_idct16 }, // ADST_DCT = 1 | 2284 { highbd_iadst16, highbd_idct16 }, // ADST_DCT = 1 |
2265 { high_idct16, high_iadst16 }, // DCT_ADST = 2 | 2285 { highbd_idct16, highbd_iadst16 }, // DCT_ADST = 2 |
2266 { high_iadst16, high_iadst16 } // ADST_ADST = 3 | 2286 { highbd_iadst16, highbd_iadst16 } // ADST_ADST = 3 |
2267 }; | 2287 }; |
2268 | 2288 |
2269 void vp9_high_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, | 2289 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, |
2270 int stride, int tx_type, int bd) { | 2290 int stride, int tx_type, int bd) { |
2271 int i, j; | 2291 int i, j; |
2272 tran_low_t out[16 * 16]; | 2292 tran_low_t out[16 * 16]; |
2273 tran_low_t *outptr = out; | 2293 tran_low_t *outptr = out; |
2274 tran_low_t temp_in[16], temp_out[16]; | 2294 tran_low_t temp_in[16], temp_out[16]; |
2275 const high_transform_2d ht = HIGH_IHT_16[tx_type]; | 2295 const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; |
2276 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2296 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2277 | 2297 |
2278 // Rows | 2298 // Rows |
2279 for (i = 0; i < 16; ++i) { | 2299 for (i = 0; i < 16; ++i) { |
2280 ht.rows(input, outptr, bd); | 2300 ht.rows(input, outptr, bd); |
2281 input += 16; | 2301 input += 16; |
2282 outptr += 16; | 2302 outptr += 16; |
2283 } | 2303 } |
2284 | 2304 |
2285 // Columns | 2305 // Columns |
2286 for (i = 0; i < 16; ++i) { | 2306 for (i = 0; i < 16; ++i) { |
2287 for (j = 0; j < 16; ++j) | 2307 for (j = 0; j < 16; ++j) |
2288 temp_in[j] = out[j * 16 + i]; | 2308 temp_in[j] = out[j * 16 + i]; |
2289 ht.cols(temp_in, temp_out, bd); | 2309 ht.cols(temp_in, temp_out, bd); |
2290 for (j = 0; j < 16; ++j) | 2310 for (j = 0; j < 16; ++j) { |
2291 dest[j * stride + i] = clip_pixel_bd_high( | 2311 dest[j * stride + i] = highbd_clip_pixel_add( |
2292 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2312 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2313 } |
2293 } | 2314 } |
2294 } | 2315 } |
2295 | 2316 |
2296 void vp9_high_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, | 2317 void vp9_highbd_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest8, |
2297 int stride, int bd) { | 2318 int stride, int bd) { |
2298 tran_low_t out[16 * 16] = { 0 }; | 2319 tran_low_t out[16 * 16] = { 0 }; |
2299 tran_low_t *outptr = out; | 2320 tran_low_t *outptr = out; |
2300 int i, j; | 2321 int i, j; |
2301 tran_low_t temp_in[16], temp_out[16]; | 2322 tran_low_t temp_in[16], temp_out[16]; |
2302 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2323 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2303 | 2324 |
2304 // First transform rows. Since all non-zero dct coefficients are in | 2325 // First transform rows. Since all non-zero dct coefficients are in |
2305 // upper-left 4x4 area, we only need to calculate first 4 rows here. | 2326 // upper-left 4x4 area, we only need to calculate first 4 rows here. |
2306 for (i = 0; i < 4; ++i) { | 2327 for (i = 0; i < 4; ++i) { |
2307 high_idct16(input, outptr, bd); | 2328 highbd_idct16(input, outptr, bd); |
2308 input += 16; | 2329 input += 16; |
2309 outptr += 16; | 2330 outptr += 16; |
2310 } | 2331 } |
2311 | 2332 |
2312 // Then transform columns. | 2333 // Then transform columns. |
2313 for (i = 0; i < 16; ++i) { | 2334 for (i = 0; i < 16; ++i) { |
2314 for (j = 0; j < 16; ++j) | 2335 for (j = 0; j < 16; ++j) |
2315 temp_in[j] = out[j*16 + i]; | 2336 temp_in[j] = out[j*16 + i]; |
2316 high_idct16(temp_in, temp_out, bd); | 2337 highbd_idct16(temp_in, temp_out, bd); |
2317 for (j = 0; j < 16; ++j) | 2338 for (j = 0; j < 16; ++j) { |
2318 dest[j * stride + i] = clip_pixel_bd_high( | 2339 dest[j * stride + i] = highbd_clip_pixel_add( |
2319 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2340 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2341 } |
2320 } | 2342 } |
2321 } | 2343 } |
2322 | 2344 |
2323 void vp9_high_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2345 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, |
2324 int stride, int bd) { | 2346 int stride, int bd) { |
2325 int i, j; | 2347 int i, j; |
2326 tran_high_t a1; | 2348 tran_high_t a1; |
2327 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); | 2349 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); |
2328 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2350 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2329 | 2351 |
2330 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); | 2352 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); |
2331 a1 = ROUND_POWER_OF_TWO(out, 6); | 2353 a1 = ROUND_POWER_OF_TWO(out, 6); |
2332 for (j = 0; j < 16; ++j) { | 2354 for (j = 0; j < 16; ++j) { |
2333 for (i = 0; i < 16; ++i) | 2355 for (i = 0; i < 16; ++i) |
2334 dest[i] = clip_pixel_bd_high(dest[i], a1, bd); | 2356 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2335 dest += stride; | 2357 dest += stride; |
2336 } | 2358 } |
2337 } | 2359 } |
2338 | 2360 |
2339 static void high_idct32(const tran_low_t *input, tran_low_t *output, int bd) { | 2361 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) { |
2340 tran_low_t step1[32], step2[32]; | 2362 tran_low_t step1[32], step2[32]; |
2341 tran_high_t temp1, temp2; | 2363 tran_high_t temp1, temp2; |
2342 (void) bd; | 2364 (void) bd; |
2343 | 2365 |
2344 // stage 1 | 2366 // stage 1 |
2345 step1[0] = input[0]; | 2367 step1[0] = input[0]; |
2346 step1[1] = input[16]; | 2368 step1[1] = input[16]; |
2347 step1[2] = input[8]; | 2369 step1[2] = input[8]; |
2348 step1[3] = input[24]; | 2370 step1[3] = input[24]; |
2349 step1[4] = input[4]; | 2371 step1[4] = input[4]; |
2350 step1[5] = input[20]; | 2372 step1[5] = input[20]; |
2351 step1[6] = input[12]; | 2373 step1[6] = input[12]; |
2352 step1[7] = input[28]; | 2374 step1[7] = input[28]; |
2353 step1[8] = input[2]; | 2375 step1[8] = input[2]; |
2354 step1[9] = input[18]; | 2376 step1[9] = input[18]; |
2355 step1[10] = input[10]; | 2377 step1[10] = input[10]; |
2356 step1[11] = input[26]; | 2378 step1[11] = input[26]; |
2357 step1[12] = input[6]; | 2379 step1[12] = input[6]; |
2358 step1[13] = input[22]; | 2380 step1[13] = input[22]; |
2359 step1[14] = input[14]; | 2381 step1[14] = input[14]; |
2360 step1[15] = input[30]; | 2382 step1[15] = input[30]; |
2361 | 2383 |
2362 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; | 2384 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; |
2363 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; | 2385 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; |
2364 step1[16] = WRAPLOW(dct_const_round_shift(temp1)); | 2386 step1[16] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2365 step1[31] = WRAPLOW(dct_const_round_shift(temp2)); | 2387 step1[31] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2366 | 2388 |
2367 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; | 2389 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; |
2368 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; | 2390 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; |
2369 step1[17] = WRAPLOW(dct_const_round_shift(temp1)); | 2391 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2370 step1[30] = WRAPLOW(dct_const_round_shift(temp2)); | 2392 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2371 | 2393 |
2372 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; | 2394 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; |
2373 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; | 2395 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; |
2374 step1[18] = WRAPLOW(dct_const_round_shift(temp1)); | 2396 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2375 step1[29] = WRAPLOW(dct_const_round_shift(temp2)); | 2397 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2376 | 2398 |
2377 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; | 2399 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; |
2378 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; | 2400 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; |
2379 step1[19] = WRAPLOW(dct_const_round_shift(temp1)); | 2401 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2380 step1[28] = WRAPLOW(dct_const_round_shift(temp2)); | 2402 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2381 | 2403 |
2382 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; | 2404 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; |
2383 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; | 2405 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; |
2384 step1[20] = WRAPLOW(dct_const_round_shift(temp1)); | 2406 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2385 step1[27] = WRAPLOW(dct_const_round_shift(temp2)); | 2407 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2386 | 2408 |
2387 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; | 2409 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; |
2388 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; | 2410 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; |
2389 step1[21] = WRAPLOW(dct_const_round_shift(temp1)); | 2411 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2390 step1[26] = WRAPLOW(dct_const_round_shift(temp2)); | 2412 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2391 | 2413 |
2392 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; | 2414 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; |
2393 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; | 2415 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; |
2394 step1[22] = WRAPLOW(dct_const_round_shift(temp1)); | 2416 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2395 step1[25] = WRAPLOW(dct_const_round_shift(temp2)); | 2417 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2396 | 2418 |
2397 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; | 2419 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; |
2398 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; | 2420 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; |
2399 step1[23] = WRAPLOW(dct_const_round_shift(temp1)); | 2421 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2400 step1[24] = WRAPLOW(dct_const_round_shift(temp2)); | 2422 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2401 | 2423 |
2402 // stage 2 | 2424 // stage 2 |
2403 step2[0] = step1[0]; | 2425 step2[0] = step1[0]; |
2404 step2[1] = step1[1]; | 2426 step2[1] = step1[1]; |
2405 step2[2] = step1[2]; | 2427 step2[2] = step1[2]; |
2406 step2[3] = step1[3]; | 2428 step2[3] = step1[3]; |
2407 step2[4] = step1[4]; | 2429 step2[4] = step1[4]; |
2408 step2[5] = step1[5]; | 2430 step2[5] = step1[5]; |
2409 step2[6] = step1[6]; | 2431 step2[6] = step1[6]; |
2410 step2[7] = step1[7]; | 2432 step2[7] = step1[7]; |
2411 | 2433 |
2412 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 2434 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
2413 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 2435 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
2414 step2[8] = WRAPLOW(dct_const_round_shift(temp1)); | 2436 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2415 step2[15] = WRAPLOW(dct_const_round_shift(temp2)); | 2437 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2416 | 2438 |
2417 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 2439 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
2418 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 2440 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
2419 step2[9] = WRAPLOW(dct_const_round_shift(temp1)); | 2441 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2420 step2[14] = WRAPLOW(dct_const_round_shift(temp2)); | 2442 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2421 | 2443 |
2422 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 2444 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
2423 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 2445 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
2424 step2[10] = WRAPLOW(dct_const_round_shift(temp1)); | 2446 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2425 step2[13] = WRAPLOW(dct_const_round_shift(temp2)); | 2447 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2426 | 2448 |
2427 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 2449 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
2428 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 2450 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
2429 step2[11] = WRAPLOW(dct_const_round_shift(temp1)); | 2451 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2430 step2[12] = WRAPLOW(dct_const_round_shift(temp2)); | 2452 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2431 | 2453 |
2432 step2[16] = WRAPLOW(step1[16] + step1[17]); | 2454 step2[16] = WRAPLOW(step1[16] + step1[17], bd); |
2433 step2[17] = WRAPLOW(step1[16] - step1[17]); | 2455 step2[17] = WRAPLOW(step1[16] - step1[17], bd); |
2434 step2[18] = WRAPLOW(-step1[18] + step1[19]); | 2456 step2[18] = WRAPLOW(-step1[18] + step1[19], bd); |
2435 step2[19] = WRAPLOW(step1[18] + step1[19]); | 2457 step2[19] = WRAPLOW(step1[18] + step1[19], bd); |
2436 step2[20] = WRAPLOW(step1[20] + step1[21]); | 2458 step2[20] = WRAPLOW(step1[20] + step1[21], bd); |
2437 step2[21] = WRAPLOW(step1[20] - step1[21]); | 2459 step2[21] = WRAPLOW(step1[20] - step1[21], bd); |
2438 step2[22] = WRAPLOW(-step1[22] + step1[23]); | 2460 step2[22] = WRAPLOW(-step1[22] + step1[23], bd); |
2439 step2[23] = WRAPLOW(step1[22] + step1[23]); | 2461 step2[23] = WRAPLOW(step1[22] + step1[23], bd); |
2440 step2[24] = WRAPLOW(step1[24] + step1[25]); | 2462 step2[24] = WRAPLOW(step1[24] + step1[25], bd); |
2441 step2[25] = WRAPLOW(step1[24] - step1[25]); | 2463 step2[25] = WRAPLOW(step1[24] - step1[25], bd); |
2442 step2[26] = WRAPLOW(-step1[26] + step1[27]); | 2464 step2[26] = WRAPLOW(-step1[26] + step1[27], bd); |
2443 step2[27] = WRAPLOW(step1[26] + step1[27]); | 2465 step2[27] = WRAPLOW(step1[26] + step1[27], bd); |
2444 step2[28] = WRAPLOW(step1[28] + step1[29]); | 2466 step2[28] = WRAPLOW(step1[28] + step1[29], bd); |
2445 step2[29] = WRAPLOW(step1[28] - step1[29]); | 2467 step2[29] = WRAPLOW(step1[28] - step1[29], bd); |
2446 step2[30] = WRAPLOW(-step1[30] + step1[31]); | 2468 step2[30] = WRAPLOW(-step1[30] + step1[31], bd); |
2447 step2[31] = WRAPLOW(step1[30] + step1[31]); | 2469 step2[31] = WRAPLOW(step1[30] + step1[31], bd); |
2448 | 2470 |
2449 // stage 3 | 2471 // stage 3 |
2450 step1[0] = step2[0]; | 2472 step1[0] = step2[0]; |
2451 step1[1] = step2[1]; | 2473 step1[1] = step2[1]; |
2452 step1[2] = step2[2]; | 2474 step1[2] = step2[2]; |
2453 step1[3] = step2[3]; | 2475 step1[3] = step2[3]; |
2454 | 2476 |
2455 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 2477 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
2456 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 2478 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
2457 step1[4] = WRAPLOW(dct_const_round_shift(temp1)); | 2479 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2458 step1[7] = WRAPLOW(dct_const_round_shift(temp2)); | 2480 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2459 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 2481 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
2460 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 2482 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
2461 step1[5] = WRAPLOW(dct_const_round_shift(temp1)); | 2483 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2462 step1[6] = WRAPLOW(dct_const_round_shift(temp2)); | 2484 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2463 | 2485 |
2464 step1[8] = WRAPLOW(step2[8] + step2[9]); | 2486 step1[8] = WRAPLOW(step2[8] + step2[9], bd); |
2465 step1[9] = WRAPLOW(step2[8] - step2[9]); | 2487 step1[9] = WRAPLOW(step2[8] - step2[9], bd); |
2466 step1[10] = WRAPLOW(-step2[10] + step2[11]); | 2488 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); |
2467 step1[11] = WRAPLOW(step2[10] + step2[11]); | 2489 step1[11] = WRAPLOW(step2[10] + step2[11], bd); |
2468 step1[12] = WRAPLOW(step2[12] + step2[13]); | 2490 step1[12] = WRAPLOW(step2[12] + step2[13], bd); |
2469 step1[13] = WRAPLOW(step2[12] - step2[13]); | 2491 step1[13] = WRAPLOW(step2[12] - step2[13], bd); |
2470 step1[14] = WRAPLOW(-step2[14] + step2[15]); | 2492 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); |
2471 step1[15] = WRAPLOW(step2[14] + step2[15]); | 2493 step1[15] = WRAPLOW(step2[14] + step2[15], bd); |
2472 | 2494 |
2473 step1[16] = step2[16]; | 2495 step1[16] = step2[16]; |
2474 step1[31] = step2[31]; | 2496 step1[31] = step2[31]; |
2475 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; | 2497 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; |
2476 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; | 2498 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; |
2477 step1[17] = WRAPLOW(dct_const_round_shift(temp1)); | 2499 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2478 step1[30] = WRAPLOW(dct_const_round_shift(temp2)); | 2500 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2479 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; | 2501 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; |
2480 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; | 2502 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; |
2481 step1[18] = WRAPLOW(dct_const_round_shift(temp1)); | 2503 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2482 step1[29] = WRAPLOW(dct_const_round_shift(temp2)); | 2504 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2483 step1[19] = step2[19]; | 2505 step1[19] = step2[19]; |
2484 step1[20] = step2[20]; | 2506 step1[20] = step2[20]; |
2485 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; | 2507 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; |
2486 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; | 2508 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; |
2487 step1[21] = WRAPLOW(dct_const_round_shift(temp1)); | 2509 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2488 step1[26] = WRAPLOW(dct_const_round_shift(temp2)); | 2510 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2489 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; | 2511 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; |
2490 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; | 2512 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; |
2491 step1[22] = WRAPLOW(dct_const_round_shift(temp1)); | 2513 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2492 step1[25] = WRAPLOW(dct_const_round_shift(temp2)); | 2514 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2493 step1[23] = step2[23]; | 2515 step1[23] = step2[23]; |
2494 step1[24] = step2[24]; | 2516 step1[24] = step2[24]; |
2495 step1[27] = step2[27]; | 2517 step1[27] = step2[27]; |
2496 step1[28] = step2[28]; | 2518 step1[28] = step2[28]; |
2497 | 2519 |
2498 // stage 4 | 2520 // stage 4 |
2499 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 2521 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
2500 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 2522 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
2501 step2[0] = WRAPLOW(dct_const_round_shift(temp1)); | 2523 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2502 step2[1] = WRAPLOW(dct_const_round_shift(temp2)); | 2524 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2503 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 2525 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
2504 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 2526 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
2505 step2[2] = WRAPLOW(dct_const_round_shift(temp1)); | 2527 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2506 step2[3] = WRAPLOW(dct_const_round_shift(temp2)); | 2528 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2507 step2[4] = WRAPLOW(step1[4] + step1[5]); | 2529 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
2508 step2[5] = WRAPLOW(step1[4] - step1[5]); | 2530 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
2509 step2[6] = WRAPLOW(-step1[6] + step1[7]); | 2531 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
2510 step2[7] = WRAPLOW(step1[6] + step1[7]); | 2532 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
2511 | 2533 |
2512 step2[8] = step1[8]; | 2534 step2[8] = step1[8]; |
2513 step2[15] = step1[15]; | 2535 step2[15] = step1[15]; |
2514 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 2536 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
2515 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 2537 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
2516 step2[9] = WRAPLOW(dct_const_round_shift(temp1)); | 2538 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2517 step2[14] = WRAPLOW(dct_const_round_shift(temp2)); | 2539 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2518 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 2540 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
2519 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 2541 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
2520 step2[10] = WRAPLOW(dct_const_round_shift(temp1)); | 2542 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2521 step2[13] = WRAPLOW(dct_const_round_shift(temp2)); | 2543 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2522 step2[11] = step1[11]; | 2544 step2[11] = step1[11]; |
2523 step2[12] = step1[12]; | 2545 step2[12] = step1[12]; |
2524 | 2546 |
2525 step2[16] = WRAPLOW(step1[16] + step1[19]); | 2547 step2[16] = WRAPLOW(step1[16] + step1[19], bd); |
2526 step2[17] = WRAPLOW(step1[17] + step1[18]); | 2548 step2[17] = WRAPLOW(step1[17] + step1[18], bd); |
2527 step2[18] = WRAPLOW(step1[17] - step1[18]); | 2549 step2[18] = WRAPLOW(step1[17] - step1[18], bd); |
2528 step2[19] = WRAPLOW(step1[16] - step1[19]); | 2550 step2[19] = WRAPLOW(step1[16] - step1[19], bd); |
2529 step2[20] = WRAPLOW(-step1[20] + step1[23]); | 2551 step2[20] = WRAPLOW(-step1[20] + step1[23], bd); |
2530 step2[21] = WRAPLOW(-step1[21] + step1[22]); | 2552 step2[21] = WRAPLOW(-step1[21] + step1[22], bd); |
2531 step2[22] = WRAPLOW(step1[21] + step1[22]); | 2553 step2[22] = WRAPLOW(step1[21] + step1[22], bd); |
2532 step2[23] = WRAPLOW(step1[20] + step1[23]); | 2554 step2[23] = WRAPLOW(step1[20] + step1[23], bd); |
2533 | 2555 |
2534 step2[24] = WRAPLOW(step1[24] + step1[27]); | 2556 step2[24] = WRAPLOW(step1[24] + step1[27], bd); |
2535 step2[25] = WRAPLOW(step1[25] + step1[26]); | 2557 step2[25] = WRAPLOW(step1[25] + step1[26], bd); |
2536 step2[26] = WRAPLOW(step1[25] - step1[26]); | 2558 step2[26] = WRAPLOW(step1[25] - step1[26], bd); |
2537 step2[27] = WRAPLOW(step1[24] - step1[27]); | 2559 step2[27] = WRAPLOW(step1[24] - step1[27], bd); |
2538 step2[28] = WRAPLOW(-step1[28] + step1[31]); | 2560 step2[28] = WRAPLOW(-step1[28] + step1[31], bd); |
2539 step2[29] = WRAPLOW(-step1[29] + step1[30]); | 2561 step2[29] = WRAPLOW(-step1[29] + step1[30], bd); |
2540 step2[30] = WRAPLOW(step1[29] + step1[30]); | 2562 step2[30] = WRAPLOW(step1[29] + step1[30], bd); |
2541 step2[31] = WRAPLOW(step1[28] + step1[31]); | 2563 step2[31] = WRAPLOW(step1[28] + step1[31], bd); |
2542 | 2564 |
2543 // stage 5 | 2565 // stage 5 |
2544 step1[0] = WRAPLOW(step2[0] + step2[3]); | 2566 step1[0] = WRAPLOW(step2[0] + step2[3], bd); |
2545 step1[1] = WRAPLOW(step2[1] + step2[2]); | 2567 step1[1] = WRAPLOW(step2[1] + step2[2], bd); |
2546 step1[2] = WRAPLOW(step2[1] - step2[2]); | 2568 step1[2] = WRAPLOW(step2[1] - step2[2], bd); |
2547 step1[3] = WRAPLOW(step2[0] - step2[3]); | 2569 step1[3] = WRAPLOW(step2[0] - step2[3], bd); |
2548 step1[4] = step2[4]; | 2570 step1[4] = step2[4]; |
2549 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 2571 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
2550 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 2572 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
2551 step1[5] = WRAPLOW(dct_const_round_shift(temp1)); | 2573 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2552 step1[6] = WRAPLOW(dct_const_round_shift(temp2)); | 2574 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2553 step1[7] = step2[7]; | 2575 step1[7] = step2[7]; |
2554 | 2576 |
2555 step1[8] = WRAPLOW(step2[8] + step2[11]); | 2577 step1[8] = WRAPLOW(step2[8] + step2[11], bd); |
2556 step1[9] = WRAPLOW(step2[9] + step2[10]); | 2578 step1[9] = WRAPLOW(step2[9] + step2[10], bd); |
2557 step1[10] = WRAPLOW(step2[9] - step2[10]); | 2579 step1[10] = WRAPLOW(step2[9] - step2[10], bd); |
2558 step1[11] = WRAPLOW(step2[8] - step2[11]); | 2580 step1[11] = WRAPLOW(step2[8] - step2[11], bd); |
2559 step1[12] = WRAPLOW(-step2[12] + step2[15]); | 2581 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); |
2560 step1[13] = WRAPLOW(-step2[13] + step2[14]); | 2582 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); |
2561 step1[14] = WRAPLOW(step2[13] + step2[14]); | 2583 step1[14] = WRAPLOW(step2[13] + step2[14], bd); |
2562 step1[15] = WRAPLOW(step2[12] + step2[15]); | 2584 step1[15] = WRAPLOW(step2[12] + step2[15], bd); |
2563 | 2585 |
2564 step1[16] = step2[16]; | 2586 step1[16] = step2[16]; |
2565 step1[17] = step2[17]; | 2587 step1[17] = step2[17]; |
2566 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; | 2588 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; |
2567 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; | 2589 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; |
2568 step1[18] = WRAPLOW(dct_const_round_shift(temp1)); | 2590 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2569 step1[29] = WRAPLOW(dct_const_round_shift(temp2)); | 2591 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2570 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; | 2592 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; |
2571 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; | 2593 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; |
2572 step1[19] = WRAPLOW(dct_const_round_shift(temp1)); | 2594 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2573 step1[28] = WRAPLOW(dct_const_round_shift(temp2)); | 2595 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2574 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; | 2596 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; |
2575 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; | 2597 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; |
2576 step1[20] = WRAPLOW(dct_const_round_shift(temp1)); | 2598 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2577 step1[27] = WRAPLOW(dct_const_round_shift(temp2)); | 2599 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2578 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; | 2600 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; |
2579 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; | 2601 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; |
2580 step1[21] = WRAPLOW(dct_const_round_shift(temp1)); | 2602 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2581 step1[26] = WRAPLOW(dct_const_round_shift(temp2)); | 2603 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2582 step1[22] = step2[22]; | 2604 step1[22] = step2[22]; |
2583 step1[23] = step2[23]; | 2605 step1[23] = step2[23]; |
2584 step1[24] = step2[24]; | 2606 step1[24] = step2[24]; |
2585 step1[25] = step2[25]; | 2607 step1[25] = step2[25]; |
2586 step1[30] = step2[30]; | 2608 step1[30] = step2[30]; |
2587 step1[31] = step2[31]; | 2609 step1[31] = step2[31]; |
2588 | 2610 |
2589 // stage 6 | 2611 // stage 6 |
2590 step2[0] = WRAPLOW(step1[0] + step1[7]); | 2612 step2[0] = WRAPLOW(step1[0] + step1[7], bd); |
2591 step2[1] = WRAPLOW(step1[1] + step1[6]); | 2613 step2[1] = WRAPLOW(step1[1] + step1[6], bd); |
2592 step2[2] = WRAPLOW(step1[2] + step1[5]); | 2614 step2[2] = WRAPLOW(step1[2] + step1[5], bd); |
2593 step2[3] = WRAPLOW(step1[3] + step1[4]); | 2615 step2[3] = WRAPLOW(step1[3] + step1[4], bd); |
2594 step2[4] = WRAPLOW(step1[3] - step1[4]); | 2616 step2[4] = WRAPLOW(step1[3] - step1[4], bd); |
2595 step2[5] = WRAPLOW(step1[2] - step1[5]); | 2617 step2[5] = WRAPLOW(step1[2] - step1[5], bd); |
2596 step2[6] = WRAPLOW(step1[1] - step1[6]); | 2618 step2[6] = WRAPLOW(step1[1] - step1[6], bd); |
2597 step2[7] = WRAPLOW(step1[0] - step1[7]); | 2619 step2[7] = WRAPLOW(step1[0] - step1[7], bd); |
2598 step2[8] = step1[8]; | 2620 step2[8] = step1[8]; |
2599 step2[9] = step1[9]; | 2621 step2[9] = step1[9]; |
2600 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 2622 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
2601 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 2623 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
2602 step2[10] = WRAPLOW(dct_const_round_shift(temp1)); | 2624 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2603 step2[13] = WRAPLOW(dct_const_round_shift(temp2)); | 2625 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2604 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 2626 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
2605 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 2627 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
2606 step2[11] = WRAPLOW(dct_const_round_shift(temp1)); | 2628 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2607 step2[12] = WRAPLOW(dct_const_round_shift(temp2)); | 2629 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2608 step2[14] = WRAPLOW(step1[14]); | 2630 step2[14] = step1[14]; |
2609 step2[15] = WRAPLOW(step1[15]); | 2631 step2[15] = step1[15]; |
2610 | 2632 |
2611 step2[16] = WRAPLOW(step1[16] + step1[23]); | 2633 step2[16] = WRAPLOW(step1[16] + step1[23], bd); |
2612 step2[17] = WRAPLOW(step1[17] + step1[22]); | 2634 step2[17] = WRAPLOW(step1[17] + step1[22], bd); |
2613 step2[18] = WRAPLOW(step1[18] + step1[21]); | 2635 step2[18] = WRAPLOW(step1[18] + step1[21], bd); |
2614 step2[19] = WRAPLOW(step1[19] + step1[20]); | 2636 step2[19] = WRAPLOW(step1[19] + step1[20], bd); |
2615 step2[20] = WRAPLOW(step1[19] - step1[20]); | 2637 step2[20] = WRAPLOW(step1[19] - step1[20], bd); |
2616 step2[21] = WRAPLOW(step1[18] - step1[21]); | 2638 step2[21] = WRAPLOW(step1[18] - step1[21], bd); |
2617 step2[22] = WRAPLOW(step1[17] - step1[22]); | 2639 step2[22] = WRAPLOW(step1[17] - step1[22], bd); |
2618 step2[23] = WRAPLOW(step1[16] - step1[23]); | 2640 step2[23] = WRAPLOW(step1[16] - step1[23], bd); |
2619 | 2641 |
2620 step2[24] = WRAPLOW(-step1[24] + step1[31]); | 2642 step2[24] = WRAPLOW(-step1[24] + step1[31], bd); |
2621 step2[25] = WRAPLOW(-step1[25] + step1[30]); | 2643 step2[25] = WRAPLOW(-step1[25] + step1[30], bd); |
2622 step2[26] = WRAPLOW(-step1[26] + step1[29]); | 2644 step2[26] = WRAPLOW(-step1[26] + step1[29], bd); |
2623 step2[27] = WRAPLOW(-step1[27] + step1[28]); | 2645 step2[27] = WRAPLOW(-step1[27] + step1[28], bd); |
2624 step2[28] = WRAPLOW(step1[27] + step1[28]); | 2646 step2[28] = WRAPLOW(step1[27] + step1[28], bd); |
2625 step2[29] = WRAPLOW(step1[26] + step1[29]); | 2647 step2[29] = WRAPLOW(step1[26] + step1[29], bd); |
2626 step2[30] = WRAPLOW(step1[25] + step1[30]); | 2648 step2[30] = WRAPLOW(step1[25] + step1[30], bd); |
2627 step2[31] = WRAPLOW(step1[24] + step1[31]); | 2649 step2[31] = WRAPLOW(step1[24] + step1[31], bd); |
2628 | 2650 |
2629 // stage 7 | 2651 // stage 7 |
2630 step1[0] = WRAPLOW(step2[0] + step2[15]); | 2652 step1[0] = WRAPLOW(step2[0] + step2[15], bd); |
2631 step1[1] = WRAPLOW(step2[1] + step2[14]); | 2653 step1[1] = WRAPLOW(step2[1] + step2[14], bd); |
2632 step1[2] = WRAPLOW(step2[2] + step2[13]); | 2654 step1[2] = WRAPLOW(step2[2] + step2[13], bd); |
2633 step1[3] = WRAPLOW(step2[3] + step2[12]); | 2655 step1[3] = WRAPLOW(step2[3] + step2[12], bd); |
2634 step1[4] = WRAPLOW(step2[4] + step2[11]); | 2656 step1[4] = WRAPLOW(step2[4] + step2[11], bd); |
2635 step1[5] = WRAPLOW(step2[5] + step2[10]); | 2657 step1[5] = WRAPLOW(step2[5] + step2[10], bd); |
2636 step1[6] = WRAPLOW(step2[6] + step2[9]); | 2658 step1[6] = WRAPLOW(step2[6] + step2[9], bd); |
2637 step1[7] = WRAPLOW(step2[7] + step2[8]); | 2659 step1[7] = WRAPLOW(step2[7] + step2[8], bd); |
2638 step1[8] = WRAPLOW(step2[7] - step2[8]); | 2660 step1[8] = WRAPLOW(step2[7] - step2[8], bd); |
2639 step1[9] = WRAPLOW(step2[6] - step2[9]); | 2661 step1[9] = WRAPLOW(step2[6] - step2[9], bd); |
2640 step1[10] = WRAPLOW(step2[5] - step2[10]); | 2662 step1[10] = WRAPLOW(step2[5] - step2[10], bd); |
2641 step1[11] = WRAPLOW(step2[4] - step2[11]); | 2663 step1[11] = WRAPLOW(step2[4] - step2[11], bd); |
2642 step1[12] = WRAPLOW(step2[3] - step2[12]); | 2664 step1[12] = WRAPLOW(step2[3] - step2[12], bd); |
2643 step1[13] = WRAPLOW(step2[2] - step2[13]); | 2665 step1[13] = WRAPLOW(step2[2] - step2[13], bd); |
2644 step1[14] = WRAPLOW(step2[1] - step2[14]); | 2666 step1[14] = WRAPLOW(step2[1] - step2[14], bd); |
2645 step1[15] = WRAPLOW(step2[0] - step2[15]); | 2667 step1[15] = WRAPLOW(step2[0] - step2[15], bd); |
2646 | 2668 |
2647 step1[16] = step2[16]; | 2669 step1[16] = step2[16]; |
2648 step1[17] = step2[17]; | 2670 step1[17] = step2[17]; |
2649 step1[18] = step2[18]; | 2671 step1[18] = step2[18]; |
2650 step1[19] = step2[19]; | 2672 step1[19] = step2[19]; |
2651 temp1 = (-step2[20] + step2[27]) * cospi_16_64; | 2673 temp1 = (-step2[20] + step2[27]) * cospi_16_64; |
2652 temp2 = (step2[20] + step2[27]) * cospi_16_64; | 2674 temp2 = (step2[20] + step2[27]) * cospi_16_64; |
2653 step1[20] = WRAPLOW(dct_const_round_shift(temp1)); | 2675 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2654 step1[27] = WRAPLOW(dct_const_round_shift(temp2)); | 2676 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2655 temp1 = (-step2[21] + step2[26]) * cospi_16_64; | 2677 temp1 = (-step2[21] + step2[26]) * cospi_16_64; |
2656 temp2 = (step2[21] + step2[26]) * cospi_16_64; | 2678 temp2 = (step2[21] + step2[26]) * cospi_16_64; |
2657 step1[21] = WRAPLOW(dct_const_round_shift(temp1)); | 2679 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2658 step1[26] = WRAPLOW(dct_const_round_shift(temp2)); | 2680 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2659 temp1 = (-step2[22] + step2[25]) * cospi_16_64; | 2681 temp1 = (-step2[22] + step2[25]) * cospi_16_64; |
2660 temp2 = (step2[22] + step2[25]) * cospi_16_64; | 2682 temp2 = (step2[22] + step2[25]) * cospi_16_64; |
2661 step1[22] = WRAPLOW(dct_const_round_shift(temp1)); | 2683 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2662 step1[25] = WRAPLOW(dct_const_round_shift(temp2)); | 2684 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2663 temp1 = (-step2[23] + step2[24]) * cospi_16_64; | 2685 temp1 = (-step2[23] + step2[24]) * cospi_16_64; |
2664 temp2 = (step2[23] + step2[24]) * cospi_16_64; | 2686 temp2 = (step2[23] + step2[24]) * cospi_16_64; |
2665 step1[23] = WRAPLOW(dct_const_round_shift(temp1)); | 2687 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); |
2666 step1[24] = WRAPLOW(dct_const_round_shift(temp2)); | 2688 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); |
2667 step1[28] = step2[28]; | 2689 step1[28] = step2[28]; |
2668 step1[29] = step2[29]; | 2690 step1[29] = step2[29]; |
2669 step1[30] = step2[30]; | 2691 step1[30] = step2[30]; |
2670 step1[31] = step2[31]; | 2692 step1[31] = step2[31]; |
2671 | 2693 |
2672 // final stage | 2694 // final stage |
2673 output[0] = WRAPLOW(step1[0] + step1[31]); | 2695 output[0] = WRAPLOW(step1[0] + step1[31], bd); |
2674 output[1] = WRAPLOW(step1[1] + step1[30]); | 2696 output[1] = WRAPLOW(step1[1] + step1[30], bd); |
2675 output[2] = WRAPLOW(step1[2] + step1[29]); | 2697 output[2] = WRAPLOW(step1[2] + step1[29], bd); |
2676 output[3] = WRAPLOW(step1[3] + step1[28]); | 2698 output[3] = WRAPLOW(step1[3] + step1[28], bd); |
2677 output[4] = WRAPLOW(step1[4] + step1[27]); | 2699 output[4] = WRAPLOW(step1[4] + step1[27], bd); |
2678 output[5] = WRAPLOW(step1[5] + step1[26]); | 2700 output[5] = WRAPLOW(step1[5] + step1[26], bd); |
2679 output[6] = WRAPLOW(step1[6] + step1[25]); | 2701 output[6] = WRAPLOW(step1[6] + step1[25], bd); |
2680 output[7] = WRAPLOW(step1[7] + step1[24]); | 2702 output[7] = WRAPLOW(step1[7] + step1[24], bd); |
2681 output[8] = WRAPLOW(step1[8] + step1[23]); | 2703 output[8] = WRAPLOW(step1[8] + step1[23], bd); |
2682 output[9] = WRAPLOW(step1[9] + step1[22]); | 2704 output[9] = WRAPLOW(step1[9] + step1[22], bd); |
2683 output[10] = WRAPLOW(step1[10] + step1[21]); | 2705 output[10] = WRAPLOW(step1[10] + step1[21], bd); |
2684 output[11] = WRAPLOW(step1[11] + step1[20]); | 2706 output[11] = WRAPLOW(step1[11] + step1[20], bd); |
2685 output[12] = WRAPLOW(step1[12] + step1[19]); | 2707 output[12] = WRAPLOW(step1[12] + step1[19], bd); |
2686 output[13] = WRAPLOW(step1[13] + step1[18]); | 2708 output[13] = WRAPLOW(step1[13] + step1[18], bd); |
2687 output[14] = WRAPLOW(step1[14] + step1[17]); | 2709 output[14] = WRAPLOW(step1[14] + step1[17], bd); |
2688 output[15] = WRAPLOW(step1[15] + step1[16]); | 2710 output[15] = WRAPLOW(step1[15] + step1[16], bd); |
2689 output[16] = WRAPLOW(step1[15] - step1[16]); | 2711 output[16] = WRAPLOW(step1[15] - step1[16], bd); |
2690 output[17] = WRAPLOW(step1[14] - step1[17]); | 2712 output[17] = WRAPLOW(step1[14] - step1[17], bd); |
2691 output[18] = WRAPLOW(step1[13] - step1[18]); | 2713 output[18] = WRAPLOW(step1[13] - step1[18], bd); |
2692 output[19] = WRAPLOW(step1[12] - step1[19]); | 2714 output[19] = WRAPLOW(step1[12] - step1[19], bd); |
2693 output[20] = WRAPLOW(step1[11] - step1[20]); | 2715 output[20] = WRAPLOW(step1[11] - step1[20], bd); |
2694 output[21] = WRAPLOW(step1[10] - step1[21]); | 2716 output[21] = WRAPLOW(step1[10] - step1[21], bd); |
2695 output[22] = WRAPLOW(step1[9] - step1[22]); | 2717 output[22] = WRAPLOW(step1[9] - step1[22], bd); |
2696 output[23] = WRAPLOW(step1[8] - step1[23]); | 2718 output[23] = WRAPLOW(step1[8] - step1[23], bd); |
2697 output[24] = WRAPLOW(step1[7] - step1[24]); | 2719 output[24] = WRAPLOW(step1[7] - step1[24], bd); |
2698 output[25] = WRAPLOW(step1[6] - step1[25]); | 2720 output[25] = WRAPLOW(step1[6] - step1[25], bd); |
2699 output[26] = WRAPLOW(step1[5] - step1[26]); | 2721 output[26] = WRAPLOW(step1[5] - step1[26], bd); |
2700 output[27] = WRAPLOW(step1[4] - step1[27]); | 2722 output[27] = WRAPLOW(step1[4] - step1[27], bd); |
2701 output[28] = WRAPLOW(step1[3] - step1[28]); | 2723 output[28] = WRAPLOW(step1[3] - step1[28], bd); |
2702 output[29] = WRAPLOW(step1[2] - step1[29]); | 2724 output[29] = WRAPLOW(step1[2] - step1[29], bd); |
2703 output[30] = WRAPLOW(step1[1] - step1[30]); | 2725 output[30] = WRAPLOW(step1[1] - step1[30], bd); |
2704 output[31] = WRAPLOW(step1[0] - step1[31]); | 2726 output[31] = WRAPLOW(step1[0] - step1[31], bd); |
2705 } | 2727 } |
2706 | 2728 |
2707 void vp9_high_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, | 2729 void vp9_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest8, |
2708 int stride, int bd) { | 2730 int stride, int bd) { |
2709 tran_low_t out[32 * 32]; | 2731 tran_low_t out[32 * 32]; |
2710 tran_low_t *outptr = out; | 2732 tran_low_t *outptr = out; |
2711 int i, j; | 2733 int i, j; |
2712 tran_low_t temp_in[32], temp_out[32]; | 2734 tran_low_t temp_in[32], temp_out[32]; |
2713 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2735 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2714 | 2736 |
2715 // Rows | 2737 // Rows |
2716 for (i = 0; i < 32; ++i) { | 2738 for (i = 0; i < 32; ++i) { |
2717 tran_low_t zero_coeff[16]; | 2739 tran_low_t zero_coeff[16]; |
2718 for (j = 0; j < 16; ++j) | 2740 for (j = 0; j < 16; ++j) |
2719 zero_coeff[j] = input[2 * j] | input[2 * j + 1]; | 2741 zero_coeff[j] = input[2 * j] | input[2 * j + 1]; |
2720 for (j = 0; j < 8; ++j) | 2742 for (j = 0; j < 8; ++j) |
2721 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; | 2743 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
2722 for (j = 0; j < 4; ++j) | 2744 for (j = 0; j < 4; ++j) |
2723 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; | 2745 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
2724 for (j = 0; j < 2; ++j) | 2746 for (j = 0; j < 2; ++j) |
2725 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; | 2747 zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
2726 | 2748 |
2727 if (zero_coeff[0] | zero_coeff[1]) | 2749 if (zero_coeff[0] | zero_coeff[1]) |
2728 high_idct32(input, outptr, bd); | 2750 highbd_idct32(input, outptr, bd); |
2729 else | 2751 else |
2730 vpx_memset(outptr, 0, sizeof(tran_low_t) * 32); | 2752 vpx_memset(outptr, 0, sizeof(tran_low_t) * 32); |
2731 input += 32; | 2753 input += 32; |
2732 outptr += 32; | 2754 outptr += 32; |
2733 } | 2755 } |
2734 | 2756 |
2735 // Columns | 2757 // Columns |
2736 for (i = 0; i < 32; ++i) { | 2758 for (i = 0; i < 32; ++i) { |
2737 for (j = 0; j < 32; ++j) | 2759 for (j = 0; j < 32; ++j) |
2738 temp_in[j] = out[j * 32 + i]; | 2760 temp_in[j] = out[j * 32 + i]; |
2739 high_idct32(temp_in, temp_out, bd); | 2761 highbd_idct32(temp_in, temp_out, bd); |
2740 for (j = 0; j < 32; ++j) | 2762 for (j = 0; j < 32; ++j) { |
2741 dest[j * stride + i] = clip_pixel_bd_high( | 2763 dest[j * stride + i] = highbd_clip_pixel_add( |
2742 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2764 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2765 } |
2743 } | 2766 } |
2744 } | 2767 } |
2745 | 2768 |
2746 void vp9_high_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, | 2769 void vp9_highbd_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest8, |
2747 int stride, int bd) { | 2770 int stride, int bd) { |
2748 tran_low_t out[32 * 32] = {0}; | 2771 tran_low_t out[32 * 32] = {0}; |
2749 tran_low_t *outptr = out; | 2772 tran_low_t *outptr = out; |
2750 int i, j; | 2773 int i, j; |
2751 tran_low_t temp_in[32], temp_out[32]; | 2774 tran_low_t temp_in[32], temp_out[32]; |
2752 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2775 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2753 | 2776 |
2754 // Rows | 2777 // Rows |
2755 // Only upper-left 8x8 has non-zero coeff. | 2778 // Only upper-left 8x8 has non-zero coeff. |
2756 for (i = 0; i < 8; ++i) { | 2779 for (i = 0; i < 8; ++i) { |
2757 high_idct32(input, outptr, bd); | 2780 highbd_idct32(input, outptr, bd); |
2758 input += 32; | 2781 input += 32; |
2759 outptr += 32; | 2782 outptr += 32; |
2760 } | 2783 } |
2761 // Columns | 2784 // Columns |
2762 for (i = 0; i < 32; ++i) { | 2785 for (i = 0; i < 32; ++i) { |
2763 for (j = 0; j < 32; ++j) | 2786 for (j = 0; j < 32; ++j) |
2764 temp_in[j] = out[j * 32 + i]; | 2787 temp_in[j] = out[j * 32 + i]; |
2765 high_idct32(temp_in, temp_out, bd); | 2788 highbd_idct32(temp_in, temp_out, bd); |
2766 for (j = 0; j < 32; ++j) | 2789 for (j = 0; j < 32; ++j) { |
2767 dest[j * stride + i] = clip_pixel_bd_high( | 2790 dest[j * stride + i] = highbd_clip_pixel_add( |
2768 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2791 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2792 } |
2769 } | 2793 } |
2770 } | 2794 } |
2771 | 2795 |
2772 void vp9_high_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2796 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, |
2773 int stride, int bd) { | 2797 int stride, int bd) { |
2774 int i, j; | 2798 int i, j; |
2775 int a1; | 2799 int a1; |
2776 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2800 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2777 | 2801 |
2778 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64)); | 2802 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); |
2779 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); | 2803 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); |
2780 a1 = ROUND_POWER_OF_TWO(out, 6); | 2804 a1 = ROUND_POWER_OF_TWO(out, 6); |
2781 | 2805 |
2782 for (j = 0; j < 32; ++j) { | 2806 for (j = 0; j < 32; ++j) { |
2783 for (i = 0; i < 32; ++i) | 2807 for (i = 0; i < 32; ++i) |
2784 dest[i] = clip_pixel_bd_high(dest[i], a1, bd); | 2808 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2785 dest += stride; | 2809 dest += stride; |
2786 } | 2810 } |
2787 } | 2811 } |
2788 | 2812 |
2789 // idct | 2813 // idct |
2790 void vp9_high_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, | 2814 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
2791 int eob, int bd) { | 2815 int eob, int bd) { |
2792 if (eob > 1) | 2816 if (eob > 1) |
2793 vp9_high_idct4x4_16_add(input, dest, stride, bd); | 2817 vp9_highbd_idct4x4_16_add(input, dest, stride, bd); |
2794 else | 2818 else |
2795 vp9_high_idct4x4_1_add(input, dest, stride, bd); | 2819 vp9_highbd_idct4x4_1_add(input, dest, stride, bd); |
2796 } | 2820 } |
2797 | 2821 |
2798 | 2822 |
2799 void vp9_high_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, | 2823 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, |
2800 int eob, int bd) { | 2824 int eob, int bd) { |
2801 if (eob > 1) | 2825 if (eob > 1) |
2802 vp9_high_iwht4x4_16_add(input, dest, stride, bd); | 2826 vp9_highbd_iwht4x4_16_add(input, dest, stride, bd); |
2803 else | 2827 else |
2804 vp9_high_iwht4x4_1_add(input, dest, stride, bd); | 2828 vp9_highbd_iwht4x4_1_add(input, dest, stride, bd); |
2805 } | 2829 } |
2806 | 2830 |
2807 void vp9_high_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, | 2831 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, |
2808 int eob, int bd) { | 2832 int eob, int bd) { |
2809 // If dc is 1, then input[0] is the reconstructed value, do not need | 2833 // If dc is 1, then input[0] is the reconstructed value, do not need |
2810 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. | 2834 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
2811 | 2835 |
2812 // The calculation can be simplified if there are not many non-zero dct | 2836 // The calculation can be simplified if there are not many non-zero dct |
2813 // coefficients. Use eobs to decide what to do. | 2837 // coefficients. Use eobs to decide what to do. |
2814 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. | 2838 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. |
2815 // Combine that with code here. | 2839 // Combine that with code here. |
2816 // DC only DCT coefficient | 2840 // DC only DCT coefficient |
2817 if (eob == 1) { | 2841 if (eob == 1) { |
2818 vp9_high_idct8x8_1_add(input, dest, stride, bd); | 2842 vp9_highbd_idct8x8_1_add(input, dest, stride, bd); |
2819 } else if (eob <= 10) { | 2843 } else if (eob <= 10) { |
2820 vp9_high_idct8x8_10_add(input, dest, stride, bd); | 2844 vp9_highbd_idct8x8_10_add(input, dest, stride, bd); |
2821 } else { | 2845 } else { |
2822 vp9_high_idct8x8_64_add(input, dest, stride, bd); | 2846 vp9_highbd_idct8x8_64_add(input, dest, stride, bd); |
2823 } | 2847 } |
2824 } | 2848 } |
2825 | 2849 |
2826 void vp9_high_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, | 2850 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, |
2827 int eob, int bd) { | 2851 int stride, int eob, int bd) { |
2828 // The calculation can be simplified if there are not many non-zero dct | 2852 // The calculation can be simplified if there are not many non-zero dct |
2829 // coefficients. Use eobs to separate different cases. | 2853 // coefficients. Use eobs to separate different cases. |
2830 // DC only DCT coefficient. | 2854 // DC only DCT coefficient. |
2831 if (eob == 1) { | 2855 if (eob == 1) { |
2832 vp9_high_idct16x16_1_add(input, dest, stride, bd); | 2856 vp9_highbd_idct16x16_1_add(input, dest, stride, bd); |
2833 } else if (eob <= 10) { | 2857 } else if (eob <= 10) { |
2834 vp9_high_idct16x16_10_add(input, dest, stride, bd); | 2858 vp9_highbd_idct16x16_10_add(input, dest, stride, bd); |
2835 } else { | 2859 } else { |
2836 vp9_high_idct16x16_256_add(input, dest, stride, bd); | 2860 vp9_highbd_idct16x16_256_add(input, dest, stride, bd); |
2837 } | 2861 } |
2838 } | 2862 } |
2839 | 2863 |
2840 void vp9_high_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, | 2864 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, |
2841 int eob, int bd) { | 2865 int stride, int eob, int bd) { |
2842 // Non-zero coeff only in upper-left 8x8 | 2866 // Non-zero coeff only in upper-left 8x8 |
2843 if (eob == 1) { | 2867 if (eob == 1) { |
2844 vp9_high_idct32x32_1_add(input, dest, stride, bd); | 2868 vp9_highbd_idct32x32_1_add(input, dest, stride, bd); |
2845 } else if (eob <= 34) { | 2869 } else if (eob <= 34) { |
2846 vp9_high_idct32x32_34_add(input, dest, stride, bd); | 2870 vp9_highbd_idct32x32_34_add(input, dest, stride, bd); |
2847 } else { | 2871 } else { |
2848 vp9_high_idct32x32_1024_add(input, dest, stride, bd); | 2872 vp9_highbd_idct32x32_1024_add(input, dest, stride, bd); |
2849 } | 2873 } |
2850 } | 2874 } |
2851 | 2875 |
2852 // iht | 2876 // iht |
2853 void vp9_high_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, | 2877 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, |
2854 uint8_t *dest, int stride, int eob, int bd) { | 2878 uint8_t *dest, int stride, int eob, int bd) { |
2855 if (tx_type == DCT_DCT) | 2879 if (tx_type == DCT_DCT) |
2856 vp9_high_idct4x4_add(input, dest, stride, eob, bd); | 2880 vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); |
2857 else | 2881 else |
2858 vp9_high_iht4x4_16_add(input, dest, stride, tx_type, bd); | 2882 vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); |
2859 } | 2883 } |
2860 | 2884 |
2861 void vp9_high_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, | 2885 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, |
2862 uint8_t *dest, int stride, int eob, int bd) { | 2886 uint8_t *dest, int stride, int eob, int bd) { |
2863 if (tx_type == DCT_DCT) { | 2887 if (tx_type == DCT_DCT) { |
2864 vp9_high_idct8x8_add(input, dest, stride, eob, bd); | 2888 vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); |
2865 } else { | 2889 } else { |
2866 vp9_high_iht8x8_64_add(input, dest, stride, tx_type, bd); | 2890 vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); |
2867 } | 2891 } |
2868 } | 2892 } |
2869 | 2893 |
2870 void vp9_high_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, | 2894 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
2871 uint8_t *dest, int stride, int eob, int bd) { | 2895 uint8_t *dest, int stride, int eob, int bd) { |
2872 if (tx_type == DCT_DCT) { | 2896 if (tx_type == DCT_DCT) { |
2873 vp9_high_idct16x16_add(input, dest, stride, eob, bd); | 2897 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); |
2874 } else { | 2898 } else { |
2875 vp9_high_iht16x16_256_add(input, dest, stride, tx_type, bd); | 2899 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); |
2876 } | 2900 } |
2877 } | 2901 } |
2878 #endif // CONFIG_VP9_HIGHBITDEPTH | 2902 #endif // CONFIG_VP9_HIGHBITDEPTH |
OLD | NEW |