OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <assert.h> | 11 #include <assert.h> |
12 #include <math.h> | 12 #include <math.h> |
13 | 13 |
14 #include "./vpx_config.h" | 14 #include "./vpx_config.h" |
15 #include "./vp9_rtcd.h" | 15 #include "./vp9_rtcd.h" |
16 | 16 |
17 #include "vp9/common/vp9_blockd.h" | 17 #include "vp9/common/vp9_blockd.h" |
18 #include "vp9/common/vp9_idct.h" | 18 #include "vp9/common/vp9_idct.h" |
19 #include "vp9/common/vp9_systemdependent.h" | 19 #include "vp9/common/vp9_systemdependent.h" |
20 | 20 |
21 #include "vp9/encoder/vp9_dct.h" | 21 #include "vp9/encoder/vp9_dct.h" |
22 | 22 |
| 23 static INLINE int fdct_round_shift(int input) { |
| 24 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); |
| 25 assert(INT16_MIN <= rv && rv <= INT16_MAX); |
| 26 return rv; |
| 27 } |
| 28 |
23 static void fdct4(const int16_t *input, int16_t *output) { | 29 static void fdct4(const int16_t *input, int16_t *output) { |
24 int16_t step[4]; | 30 int16_t step[4]; |
25 int temp1, temp2; | 31 int temp1, temp2; |
26 | 32 |
27 step[0] = input[0] + input[3]; | 33 step[0] = input[0] + input[3]; |
28 step[1] = input[1] + input[2]; | 34 step[1] = input[1] + input[2]; |
29 step[2] = input[1] - input[2]; | 35 step[2] = input[1] - input[2]; |
30 step[3] = input[0] - input[3]; | 36 step[3] = input[0] - input[3]; |
31 | 37 |
32 temp1 = (step[0] + step[1]) * cospi_16_64; | 38 temp1 = (step[0] + step[1]) * cospi_16_64; |
33 temp2 = (step[0] - step[1]) * cospi_16_64; | 39 temp2 = (step[0] - step[1]) * cospi_16_64; |
34 output[0] = dct_const_round_shift(temp1); | 40 output[0] = fdct_round_shift(temp1); |
35 output[2] = dct_const_round_shift(temp2); | 41 output[2] = fdct_round_shift(temp2); |
36 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
37 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
38 output[1] = dct_const_round_shift(temp1); | 44 output[1] = fdct_round_shift(temp1); |
39 output[3] = dct_const_round_shift(temp2); | 45 output[3] = fdct_round_shift(temp2); |
40 } | 46 } |
41 | 47 |
42 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { | 48 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { |
43 // The 2D transform is done with two passes which are actually pretty | 49 // The 2D transform is done with two passes which are actually pretty |
44 // similar. In the first one, we transform the columns and transpose | 50 // similar. In the first one, we transform the columns and transpose |
45 // the results. In the second one, we transform the rows. To achieve that, | 51 // the results. In the second one, we transform the rows. To achieve that, |
46 // as the first pass results are transposed, we tranpose the columns (that | 52 // as the first pass results are transposed, we tranpose the columns (that |
47 // is the transposed rows) and transpose the results (so that it goes back | 53 // is the transposed rows) and transpose the results (so that it goes back |
48 // in normal/row positions). | 54 // in normal/row positions). |
49 int pass; | 55 int pass; |
(...skipping 23 matching lines...) Expand all Loading... |
73 input[2] = in[2 * 4]; | 79 input[2] = in[2 * 4]; |
74 input[3] = in[3 * 4]; | 80 input[3] = in[3 * 4]; |
75 } | 81 } |
76 // Transform. | 82 // Transform. |
77 step[0] = input[0] + input[3]; | 83 step[0] = input[0] + input[3]; |
78 step[1] = input[1] + input[2]; | 84 step[1] = input[1] + input[2]; |
79 step[2] = input[1] - input[2]; | 85 step[2] = input[1] - input[2]; |
80 step[3] = input[0] - input[3]; | 86 step[3] = input[0] - input[3]; |
81 temp1 = (step[0] + step[1]) * cospi_16_64; | 87 temp1 = (step[0] + step[1]) * cospi_16_64; |
82 temp2 = (step[0] - step[1]) * cospi_16_64; | 88 temp2 = (step[0] - step[1]) * cospi_16_64; |
83 out[0] = dct_const_round_shift(temp1); | 89 out[0] = fdct_round_shift(temp1); |
84 out[2] = dct_const_round_shift(temp2); | 90 out[2] = fdct_round_shift(temp2); |
85 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 91 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
86 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 92 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
87 out[1] = dct_const_round_shift(temp1); | 93 out[1] = fdct_round_shift(temp1); |
88 out[3] = dct_const_round_shift(temp2); | 94 out[3] = fdct_round_shift(temp2); |
89 // Do next column (which is a transposed row in second/horizontal pass) | 95 // Do next column (which is a transposed row in second/horizontal pass) |
90 in++; | 96 in++; |
91 out += 4; | 97 out += 4; |
92 } | 98 } |
93 // Setup in/out for next pass. | 99 // Setup in/out for next pass. |
94 in = intermediate; | 100 in = intermediate; |
95 out = output; | 101 out = output; |
96 } | 102 } |
97 | 103 |
98 { | 104 { |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
131 x1 = sinpi_3_9 * s7; | 137 x1 = sinpi_3_9 * s7; |
132 x2 = s1 - s3 + s6; | 138 x2 = s1 - s3 + s6; |
133 x3 = s4; | 139 x3 = s4; |
134 | 140 |
135 s0 = x0 + x3; | 141 s0 = x0 + x3; |
136 s1 = x1; | 142 s1 = x1; |
137 s2 = x2 - x3; | 143 s2 = x2 - x3; |
138 s3 = x2 - x0 + x3; | 144 s3 = x2 - x0 + x3; |
139 | 145 |
140 // 1-D transform scaling factor is sqrt(2). | 146 // 1-D transform scaling factor is sqrt(2). |
141 output[0] = dct_const_round_shift(s0); | 147 output[0] = fdct_round_shift(s0); |
142 output[1] = dct_const_round_shift(s1); | 148 output[1] = fdct_round_shift(s1); |
143 output[2] = dct_const_round_shift(s2); | 149 output[2] = fdct_round_shift(s2); |
144 output[3] = dct_const_round_shift(s3); | 150 output[3] = fdct_round_shift(s3); |
145 } | 151 } |
146 | 152 |
147 static const transform_2d FHT_4[] = { | 153 static const transform_2d FHT_4[] = { |
148 { fdct4, fdct4 }, // DCT_DCT = 0 | 154 { fdct4, fdct4 }, // DCT_DCT = 0 |
149 { fadst4, fdct4 }, // ADST_DCT = 1 | 155 { fadst4, fdct4 }, // ADST_DCT = 1 |
150 { fdct4, fadst4 }, // DCT_ADST = 2 | 156 { fdct4, fadst4 }, // DCT_ADST = 2 |
151 { fadst4, fadst4 } // ADST_ADST = 3 | 157 { fadst4, fadst4 } // ADST_ADST = 3 |
152 }; | 158 }; |
153 | 159 |
154 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, | 160 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 | 203 |
198 // fdct4(step, step); | 204 // fdct4(step, step); |
199 x0 = s0 + s3; | 205 x0 = s0 + s3; |
200 x1 = s1 + s2; | 206 x1 = s1 + s2; |
201 x2 = s1 - s2; | 207 x2 = s1 - s2; |
202 x3 = s0 - s3; | 208 x3 = s0 - s3; |
203 t0 = (x0 + x1) * cospi_16_64; | 209 t0 = (x0 + x1) * cospi_16_64; |
204 t1 = (x0 - x1) * cospi_16_64; | 210 t1 = (x0 - x1) * cospi_16_64; |
205 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 211 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
206 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 212 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
207 output[0] = dct_const_round_shift(t0); | 213 output[0] = fdct_round_shift(t0); |
208 output[2] = dct_const_round_shift(t2); | 214 output[2] = fdct_round_shift(t2); |
209 output[4] = dct_const_round_shift(t1); | 215 output[4] = fdct_round_shift(t1); |
210 output[6] = dct_const_round_shift(t3); | 216 output[6] = fdct_round_shift(t3); |
211 | 217 |
212 // Stage 2 | 218 // Stage 2 |
213 t0 = (s6 - s5) * cospi_16_64; | 219 t0 = (s6 - s5) * cospi_16_64; |
214 t1 = (s6 + s5) * cospi_16_64; | 220 t1 = (s6 + s5) * cospi_16_64; |
215 t2 = dct_const_round_shift(t0); | 221 t2 = fdct_round_shift(t0); |
216 t3 = dct_const_round_shift(t1); | 222 t3 = fdct_round_shift(t1); |
217 | 223 |
218 // Stage 3 | 224 // Stage 3 |
219 x0 = s4 + t2; | 225 x0 = s4 + t2; |
220 x1 = s4 - t2; | 226 x1 = s4 - t2; |
221 x2 = s7 - t3; | 227 x2 = s7 - t3; |
222 x3 = s7 + t3; | 228 x3 = s7 + t3; |
223 | 229 |
224 // Stage 4 | 230 // Stage 4 |
225 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 231 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
226 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 232 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
227 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 233 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
228 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 234 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
229 output[1] = dct_const_round_shift(t0); | 235 output[1] = fdct_round_shift(t0); |
230 output[3] = dct_const_round_shift(t2); | 236 output[3] = fdct_round_shift(t2); |
231 output[5] = dct_const_round_shift(t1); | 237 output[5] = fdct_round_shift(t1); |
232 output[7] = dct_const_round_shift(t3); | 238 output[7] = fdct_round_shift(t3); |
233 } | 239 } |
234 | 240 |
235 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { | 241 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { |
236 int i, j; | 242 int i, j; |
237 int16_t intermediate[64]; | 243 int16_t intermediate[64]; |
238 | 244 |
239 // Transform columns | 245 // Transform columns |
240 { | 246 { |
241 int16_t *output = intermediate; | 247 int16_t *output = intermediate; |
242 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; | 248 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; |
(...skipping 14 matching lines...) Expand all Loading... |
257 | 263 |
258 // fdct4(step, step); | 264 // fdct4(step, step); |
259 x0 = s0 + s3; | 265 x0 = s0 + s3; |
260 x1 = s1 + s2; | 266 x1 = s1 + s2; |
261 x2 = s1 - s2; | 267 x2 = s1 - s2; |
262 x3 = s0 - s3; | 268 x3 = s0 - s3; |
263 t0 = (x0 + x1) * cospi_16_64; | 269 t0 = (x0 + x1) * cospi_16_64; |
264 t1 = (x0 - x1) * cospi_16_64; | 270 t1 = (x0 - x1) * cospi_16_64; |
265 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; | 271 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; |
266 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; | 272 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; |
267 output[0 * 8] = dct_const_round_shift(t0); | 273 output[0 * 8] = fdct_round_shift(t0); |
268 output[2 * 8] = dct_const_round_shift(t2); | 274 output[2 * 8] = fdct_round_shift(t2); |
269 output[4 * 8] = dct_const_round_shift(t1); | 275 output[4 * 8] = fdct_round_shift(t1); |
270 output[6 * 8] = dct_const_round_shift(t3); | 276 output[6 * 8] = fdct_round_shift(t3); |
271 | 277 |
272 // Stage 2 | 278 // Stage 2 |
273 t0 = (s6 - s5) * cospi_16_64; | 279 t0 = (s6 - s5) * cospi_16_64; |
274 t1 = (s6 + s5) * cospi_16_64; | 280 t1 = (s6 + s5) * cospi_16_64; |
275 t2 = dct_const_round_shift(t0); | 281 t2 = fdct_round_shift(t0); |
276 t3 = dct_const_round_shift(t1); | 282 t3 = fdct_round_shift(t1); |
277 | 283 |
278 // Stage 3 | 284 // Stage 3 |
279 x0 = s4 + t2; | 285 x0 = s4 + t2; |
280 x1 = s4 - t2; | 286 x1 = s4 - t2; |
281 x2 = s7 - t3; | 287 x2 = s7 - t3; |
282 x3 = s7 + t3; | 288 x3 = s7 + t3; |
283 | 289 |
284 // Stage 4 | 290 // Stage 4 |
285 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 291 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
286 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 292 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
287 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 293 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
288 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 294 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
289 output[1 * 8] = dct_const_round_shift(t0); | 295 output[1 * 8] = fdct_round_shift(t0); |
290 output[3 * 8] = dct_const_round_shift(t2); | 296 output[3 * 8] = fdct_round_shift(t2); |
291 output[5 * 8] = dct_const_round_shift(t1); | 297 output[5 * 8] = fdct_round_shift(t1); |
292 output[7 * 8] = dct_const_round_shift(t3); | 298 output[7 * 8] = fdct_round_shift(t3); |
293 input++; | 299 input++; |
294 output++; | 300 output++; |
295 } | 301 } |
296 } | 302 } |
297 | 303 |
298 // Rows | 304 // Rows |
299 for (i = 0; i < 8; ++i) { | 305 for (i = 0; i < 8; ++i) { |
300 fdct8(&intermediate[i * 8], &final_output[i * 8]); | 306 fdct8(&intermediate[i * 8], &final_output[i * 8]); |
301 for (j = 0; j < 8; ++j) | 307 for (j = 0; j < 8; ++j) |
302 final_output[j + i * 8] /= 2; | 308 final_output[j + i * 8] /= 2; |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
381 | 387 |
382 // fdct4(step, step); | 388 // fdct4(step, step); |
383 x0 = s0 + s3; | 389 x0 = s0 + s3; |
384 x1 = s1 + s2; | 390 x1 = s1 + s2; |
385 x2 = s1 - s2; | 391 x2 = s1 - s2; |
386 x3 = s0 - s3; | 392 x3 = s0 - s3; |
387 t0 = (x0 + x1) * cospi_16_64; | 393 t0 = (x0 + x1) * cospi_16_64; |
388 t1 = (x0 - x1) * cospi_16_64; | 394 t1 = (x0 - x1) * cospi_16_64; |
389 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 395 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
390 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 396 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
391 out[0] = dct_const_round_shift(t0); | 397 out[0] = fdct_round_shift(t0); |
392 out[4] = dct_const_round_shift(t2); | 398 out[4] = fdct_round_shift(t2); |
393 out[8] = dct_const_round_shift(t1); | 399 out[8] = fdct_round_shift(t1); |
394 out[12] = dct_const_round_shift(t3); | 400 out[12] = fdct_round_shift(t3); |
395 | 401 |
396 // Stage 2 | 402 // Stage 2 |
397 t0 = (s6 - s5) * cospi_16_64; | 403 t0 = (s6 - s5) * cospi_16_64; |
398 t1 = (s6 + s5) * cospi_16_64; | 404 t1 = (s6 + s5) * cospi_16_64; |
399 t2 = dct_const_round_shift(t0); | 405 t2 = fdct_round_shift(t0); |
400 t3 = dct_const_round_shift(t1); | 406 t3 = fdct_round_shift(t1); |
401 | 407 |
402 // Stage 3 | 408 // Stage 3 |
403 x0 = s4 + t2; | 409 x0 = s4 + t2; |
404 x1 = s4 - t2; | 410 x1 = s4 - t2; |
405 x2 = s7 - t3; | 411 x2 = s7 - t3; |
406 x3 = s7 + t3; | 412 x3 = s7 + t3; |
407 | 413 |
408 // Stage 4 | 414 // Stage 4 |
409 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 415 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
410 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 416 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
411 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 417 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
412 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 418 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
413 out[2] = dct_const_round_shift(t0); | 419 out[2] = fdct_round_shift(t0); |
414 out[6] = dct_const_round_shift(t2); | 420 out[6] = fdct_round_shift(t2); |
415 out[10] = dct_const_round_shift(t1); | 421 out[10] = fdct_round_shift(t1); |
416 out[14] = dct_const_round_shift(t3); | 422 out[14] = fdct_round_shift(t3); |
417 } | 423 } |
418 // Work on the next eight values; step1 -> odd_results | 424 // Work on the next eight values; step1 -> odd_results |
419 { | 425 { |
420 // step 2 | 426 // step 2 |
421 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 427 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
422 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 428 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
423 step2[2] = dct_const_round_shift(temp1); | 429 step2[2] = fdct_round_shift(temp1); |
424 step2[3] = dct_const_round_shift(temp2); | 430 step2[3] = fdct_round_shift(temp2); |
425 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 431 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
426 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 432 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
427 step2[4] = dct_const_round_shift(temp1); | 433 step2[4] = fdct_round_shift(temp1); |
428 step2[5] = dct_const_round_shift(temp2); | 434 step2[5] = fdct_round_shift(temp2); |
429 // step 3 | 435 // step 3 |
430 step3[0] = step1[0] + step2[3]; | 436 step3[0] = step1[0] + step2[3]; |
431 step3[1] = step1[1] + step2[2]; | 437 step3[1] = step1[1] + step2[2]; |
432 step3[2] = step1[1] - step2[2]; | 438 step3[2] = step1[1] - step2[2]; |
433 step3[3] = step1[0] - step2[3]; | 439 step3[3] = step1[0] - step2[3]; |
434 step3[4] = step1[7] - step2[4]; | 440 step3[4] = step1[7] - step2[4]; |
435 step3[5] = step1[6] - step2[5]; | 441 step3[5] = step1[6] - step2[5]; |
436 step3[6] = step1[6] + step2[5]; | 442 step3[6] = step1[6] + step2[5]; |
437 step3[7] = step1[7] + step2[4]; | 443 step3[7] = step1[7] + step2[4]; |
438 // step 4 | 444 // step 4 |
439 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; | 445 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; |
440 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; | 446 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; |
441 step2[1] = dct_const_round_shift(temp1); | 447 step2[1] = fdct_round_shift(temp1); |
442 step2[2] = dct_const_round_shift(temp2); | 448 step2[2] = fdct_round_shift(temp2); |
443 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; | 449 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; |
444 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; | 450 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; |
445 step2[5] = dct_const_round_shift(temp1); | 451 step2[5] = fdct_round_shift(temp1); |
446 step2[6] = dct_const_round_shift(temp2); | 452 step2[6] = fdct_round_shift(temp2); |
447 // step 5 | 453 // step 5 |
448 step1[0] = step3[0] + step2[1]; | 454 step1[0] = step3[0] + step2[1]; |
449 step1[1] = step3[0] - step2[1]; | 455 step1[1] = step3[0] - step2[1]; |
450 step1[2] = step3[3] - step2[2]; | 456 step1[2] = step3[3] - step2[2]; |
451 step1[3] = step3[3] + step2[2]; | 457 step1[3] = step3[3] + step2[2]; |
452 step1[4] = step3[4] + step2[5]; | 458 step1[4] = step3[4] + step2[5]; |
453 step1[5] = step3[4] - step2[5]; | 459 step1[5] = step3[4] - step2[5]; |
454 step1[6] = step3[7] - step2[6]; | 460 step1[6] = step3[7] - step2[6]; |
455 step1[7] = step3[7] + step2[6]; | 461 step1[7] = step3[7] + step2[6]; |
456 // step 6 | 462 // step 6 |
457 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 463 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
458 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 464 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
459 out[1] = dct_const_round_shift(temp1); | 465 out[1] = fdct_round_shift(temp1); |
460 out[9] = dct_const_round_shift(temp2); | 466 out[9] = fdct_round_shift(temp2); |
461 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 467 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
462 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 468 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
463 out[5] = dct_const_round_shift(temp1); | 469 out[5] = fdct_round_shift(temp1); |
464 out[13] = dct_const_round_shift(temp2); | 470 out[13] = fdct_round_shift(temp2); |
465 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 471 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
466 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 472 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
467 out[3] = dct_const_round_shift(temp1); | 473 out[3] = fdct_round_shift(temp1); |
468 out[11] = dct_const_round_shift(temp2); | 474 out[11] = fdct_round_shift(temp2); |
469 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 475 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
470 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 476 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
471 out[7] = dct_const_round_shift(temp1); | 477 out[7] = fdct_round_shift(temp1); |
472 out[15] = dct_const_round_shift(temp2); | 478 out[15] = fdct_round_shift(temp2); |
473 } | 479 } |
474 // Do next column (which is a transposed row in second/horizontal pass) | 480 // Do next column (which is a transposed row in second/horizontal pass) |
475 in++; | 481 in++; |
476 out += 16; | 482 out += 16; |
477 } | 483 } |
478 // Setup in/out for next pass. | 484 // Setup in/out for next pass. |
479 in = intermediate; | 485 in = intermediate; |
480 out = output; | 486 out = output; |
481 } | 487 } |
482 } | 488 } |
(...skipping 13 matching lines...) Expand all Loading... |
496 // stage 1 | 502 // stage 1 |
497 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 503 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
498 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 504 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
499 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 505 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
500 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 506 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
501 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 507 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
502 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 508 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
503 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 509 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
504 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 510 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
505 | 511 |
506 x0 = dct_const_round_shift(s0 + s4); | 512 x0 = fdct_round_shift(s0 + s4); |
507 x1 = dct_const_round_shift(s1 + s5); | 513 x1 = fdct_round_shift(s1 + s5); |
508 x2 = dct_const_round_shift(s2 + s6); | 514 x2 = fdct_round_shift(s2 + s6); |
509 x3 = dct_const_round_shift(s3 + s7); | 515 x3 = fdct_round_shift(s3 + s7); |
510 x4 = dct_const_round_shift(s0 - s4); | 516 x4 = fdct_round_shift(s0 - s4); |
511 x5 = dct_const_round_shift(s1 - s5); | 517 x5 = fdct_round_shift(s1 - s5); |
512 x6 = dct_const_round_shift(s2 - s6); | 518 x6 = fdct_round_shift(s2 - s6); |
513 x7 = dct_const_round_shift(s3 - s7); | 519 x7 = fdct_round_shift(s3 - s7); |
514 | 520 |
515 // stage 2 | 521 // stage 2 |
516 s0 = x0; | 522 s0 = x0; |
517 s1 = x1; | 523 s1 = x1; |
518 s2 = x2; | 524 s2 = x2; |
519 s3 = x3; | 525 s3 = x3; |
520 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 526 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
521 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 527 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
522 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; | 528 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; |
523 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 529 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
524 | 530 |
525 x0 = s0 + s2; | 531 x0 = s0 + s2; |
526 x1 = s1 + s3; | 532 x1 = s1 + s3; |
527 x2 = s0 - s2; | 533 x2 = s0 - s2; |
528 x3 = s1 - s3; | 534 x3 = s1 - s3; |
529 x4 = dct_const_round_shift(s4 + s6); | 535 x4 = fdct_round_shift(s4 + s6); |
530 x5 = dct_const_round_shift(s5 + s7); | 536 x5 = fdct_round_shift(s5 + s7); |
531 x6 = dct_const_round_shift(s4 - s6); | 537 x6 = fdct_round_shift(s4 - s6); |
532 x7 = dct_const_round_shift(s5 - s7); | 538 x7 = fdct_round_shift(s5 - s7); |
533 | 539 |
534 // stage 3 | 540 // stage 3 |
535 s2 = cospi_16_64 * (x2 + x3); | 541 s2 = cospi_16_64 * (x2 + x3); |
536 s3 = cospi_16_64 * (x2 - x3); | 542 s3 = cospi_16_64 * (x2 - x3); |
537 s6 = cospi_16_64 * (x6 + x7); | 543 s6 = cospi_16_64 * (x6 + x7); |
538 s7 = cospi_16_64 * (x6 - x7); | 544 s7 = cospi_16_64 * (x6 - x7); |
539 | 545 |
540 x2 = dct_const_round_shift(s2); | 546 x2 = fdct_round_shift(s2); |
541 x3 = dct_const_round_shift(s3); | 547 x3 = fdct_round_shift(s3); |
542 x6 = dct_const_round_shift(s6); | 548 x6 = fdct_round_shift(s6); |
543 x7 = dct_const_round_shift(s7); | 549 x7 = fdct_round_shift(s7); |
544 | 550 |
545 output[0] = x0; | 551 output[0] = x0; |
546 output[1] = - x4; | 552 output[1] = - x4; |
547 output[2] = x6; | 553 output[2] = x6; |
548 output[3] = - x2; | 554 output[3] = - x2; |
549 output[4] = x3; | 555 output[4] = x3; |
550 output[5] = - x7; | 556 output[5] = - x7; |
551 output[6] = x5; | 557 output[6] = x5; |
552 output[7] = - x1; | 558 output[7] = - x1; |
553 } | 559 } |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
686 | 692 |
687 // fdct4(step, step); | 693 // fdct4(step, step); |
688 x0 = s0 + s3; | 694 x0 = s0 + s3; |
689 x1 = s1 + s2; | 695 x1 = s1 + s2; |
690 x2 = s1 - s2; | 696 x2 = s1 - s2; |
691 x3 = s0 - s3; | 697 x3 = s0 - s3; |
692 t0 = (x0 + x1) * cospi_16_64; | 698 t0 = (x0 + x1) * cospi_16_64; |
693 t1 = (x0 - x1) * cospi_16_64; | 699 t1 = (x0 - x1) * cospi_16_64; |
694 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; | 700 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; |
695 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; | 701 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; |
696 out[0] = dct_const_round_shift(t0); | 702 out[0] = fdct_round_shift(t0); |
697 out[4] = dct_const_round_shift(t2); | 703 out[4] = fdct_round_shift(t2); |
698 out[8] = dct_const_round_shift(t1); | 704 out[8] = fdct_round_shift(t1); |
699 out[12] = dct_const_round_shift(t3); | 705 out[12] = fdct_round_shift(t3); |
700 | 706 |
701 // Stage 2 | 707 // Stage 2 |
702 t0 = (s6 - s5) * cospi_16_64; | 708 t0 = (s6 - s5) * cospi_16_64; |
703 t1 = (s6 + s5) * cospi_16_64; | 709 t1 = (s6 + s5) * cospi_16_64; |
704 t2 = dct_const_round_shift(t0); | 710 t2 = fdct_round_shift(t0); |
705 t3 = dct_const_round_shift(t1); | 711 t3 = fdct_round_shift(t1); |
706 | 712 |
707 // Stage 3 | 713 // Stage 3 |
708 x0 = s4 + t2; | 714 x0 = s4 + t2; |
709 x1 = s4 - t2; | 715 x1 = s4 - t2; |
710 x2 = s7 - t3; | 716 x2 = s7 - t3; |
711 x3 = s7 + t3; | 717 x3 = s7 + t3; |
712 | 718 |
713 // Stage 4 | 719 // Stage 4 |
714 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 720 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
715 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 721 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
716 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 722 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
717 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 723 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
718 out[2] = dct_const_round_shift(t0); | 724 out[2] = fdct_round_shift(t0); |
719 out[6] = dct_const_round_shift(t2); | 725 out[6] = fdct_round_shift(t2); |
720 out[10] = dct_const_round_shift(t1); | 726 out[10] = fdct_round_shift(t1); |
721 out[14] = dct_const_round_shift(t3); | 727 out[14] = fdct_round_shift(t3); |
722 } | 728 } |
723 | 729 |
724 // step 2 | 730 // step 2 |
725 temp1 = (step1[5] - step1[2]) * cospi_16_64; | 731 temp1 = (step1[5] - step1[2]) * cospi_16_64; |
726 temp2 = (step1[4] - step1[3]) * cospi_16_64; | 732 temp2 = (step1[4] - step1[3]) * cospi_16_64; |
727 step2[2] = dct_const_round_shift(temp1); | 733 step2[2] = fdct_round_shift(temp1); |
728 step2[3] = dct_const_round_shift(temp2); | 734 step2[3] = fdct_round_shift(temp2); |
729 temp1 = (step1[4] + step1[3]) * cospi_16_64; | 735 temp1 = (step1[4] + step1[3]) * cospi_16_64; |
730 temp2 = (step1[5] + step1[2]) * cospi_16_64; | 736 temp2 = (step1[5] + step1[2]) * cospi_16_64; |
731 step2[4] = dct_const_round_shift(temp1); | 737 step2[4] = fdct_round_shift(temp1); |
732 step2[5] = dct_const_round_shift(temp2); | 738 step2[5] = fdct_round_shift(temp2); |
733 | 739 |
734 // step 3 | 740 // step 3 |
735 step3[0] = step1[0] + step2[3]; | 741 step3[0] = step1[0] + step2[3]; |
736 step3[1] = step1[1] + step2[2]; | 742 step3[1] = step1[1] + step2[2]; |
737 step3[2] = step1[1] - step2[2]; | 743 step3[2] = step1[1] - step2[2]; |
738 step3[3] = step1[0] - step2[3]; | 744 step3[3] = step1[0] - step2[3]; |
739 step3[4] = step1[7] - step2[4]; | 745 step3[4] = step1[7] - step2[4]; |
740 step3[5] = step1[6] - step2[5]; | 746 step3[5] = step1[6] - step2[5]; |
741 step3[6] = step1[6] + step2[5]; | 747 step3[6] = step1[6] + step2[5]; |
742 step3[7] = step1[7] + step2[4]; | 748 step3[7] = step1[7] + step2[4]; |
743 | 749 |
744 // step 4 | 750 // step 4 |
745 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; | 751 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; |
746 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; | 752 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; |
747 step2[1] = dct_const_round_shift(temp1); | 753 step2[1] = fdct_round_shift(temp1); |
748 step2[2] = dct_const_round_shift(temp2); | 754 step2[2] = fdct_round_shift(temp2); |
749 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; | 755 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; |
750 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; | 756 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; |
751 step2[5] = dct_const_round_shift(temp1); | 757 step2[5] = fdct_round_shift(temp1); |
752 step2[6] = dct_const_round_shift(temp2); | 758 step2[6] = fdct_round_shift(temp2); |
753 | 759 |
754 // step 5 | 760 // step 5 |
755 step1[0] = step3[0] + step2[1]; | 761 step1[0] = step3[0] + step2[1]; |
756 step1[1] = step3[0] - step2[1]; | 762 step1[1] = step3[0] - step2[1]; |
757 step1[2] = step3[3] - step2[2]; | 763 step1[2] = step3[3] - step2[2]; |
758 step1[3] = step3[3] + step2[2]; | 764 step1[3] = step3[3] + step2[2]; |
759 step1[4] = step3[4] + step2[5]; | 765 step1[4] = step3[4] + step2[5]; |
760 step1[5] = step3[4] - step2[5]; | 766 step1[5] = step3[4] - step2[5]; |
761 step1[6] = step3[7] - step2[6]; | 767 step1[6] = step3[7] - step2[6]; |
762 step1[7] = step3[7] + step2[6]; | 768 step1[7] = step3[7] + step2[6]; |
763 | 769 |
764 // step 6 | 770 // step 6 |
765 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 771 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
766 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 772 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
767 out[1] = dct_const_round_shift(temp1); | 773 out[1] = fdct_round_shift(temp1); |
768 out[9] = dct_const_round_shift(temp2); | 774 out[9] = fdct_round_shift(temp2); |
769 | 775 |
770 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 776 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
771 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 777 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
772 out[5] = dct_const_round_shift(temp1); | 778 out[5] = fdct_round_shift(temp1); |
773 out[13] = dct_const_round_shift(temp2); | 779 out[13] = fdct_round_shift(temp2); |
774 | 780 |
775 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; | 781 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; |
776 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; | 782 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; |
777 out[3] = dct_const_round_shift(temp1); | 783 out[3] = fdct_round_shift(temp1); |
778 out[11] = dct_const_round_shift(temp2); | 784 out[11] = fdct_round_shift(temp2); |
779 | 785 |
780 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; | 786 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; |
781 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; | 787 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; |
782 out[7] = dct_const_round_shift(temp1); | 788 out[7] = fdct_round_shift(temp1); |
783 out[15] = dct_const_round_shift(temp2); | 789 out[15] = fdct_round_shift(temp2); |
784 } | 790 } |
785 | 791 |
786 static void fadst16(const int16_t *input, int16_t *output) { | 792 static void fadst16(const int16_t *input, int16_t *output) { |
787 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; | 793 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; |
788 | 794 |
789 int x0 = input[15]; | 795 int x0 = input[15]; |
790 int x1 = input[0]; | 796 int x1 = input[0]; |
791 int x2 = input[13]; | 797 int x2 = input[13]; |
792 int x3 = input[2]; | 798 int x3 = input[2]; |
793 int x4 = input[11]; | 799 int x4 = input[11]; |
(...skipping 20 matching lines...) Expand all Loading... |
814 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; | 820 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
815 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; | 821 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
816 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; | 822 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
817 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; | 823 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
818 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; | 824 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
819 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; | 825 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
820 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; | 826 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
821 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; | 827 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
822 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; | 828 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
823 | 829 |
824 x0 = dct_const_round_shift(s0 + s8); | 830 x0 = fdct_round_shift(s0 + s8); |
825 x1 = dct_const_round_shift(s1 + s9); | 831 x1 = fdct_round_shift(s1 + s9); |
826 x2 = dct_const_round_shift(s2 + s10); | 832 x2 = fdct_round_shift(s2 + s10); |
827 x3 = dct_const_round_shift(s3 + s11); | 833 x3 = fdct_round_shift(s3 + s11); |
828 x4 = dct_const_round_shift(s4 + s12); | 834 x4 = fdct_round_shift(s4 + s12); |
829 x5 = dct_const_round_shift(s5 + s13); | 835 x5 = fdct_round_shift(s5 + s13); |
830 x6 = dct_const_round_shift(s6 + s14); | 836 x6 = fdct_round_shift(s6 + s14); |
831 x7 = dct_const_round_shift(s7 + s15); | 837 x7 = fdct_round_shift(s7 + s15); |
832 x8 = dct_const_round_shift(s0 - s8); | 838 x8 = fdct_round_shift(s0 - s8); |
833 x9 = dct_const_round_shift(s1 - s9); | 839 x9 = fdct_round_shift(s1 - s9); |
834 x10 = dct_const_round_shift(s2 - s10); | 840 x10 = fdct_round_shift(s2 - s10); |
835 x11 = dct_const_round_shift(s3 - s11); | 841 x11 = fdct_round_shift(s3 - s11); |
836 x12 = dct_const_round_shift(s4 - s12); | 842 x12 = fdct_round_shift(s4 - s12); |
837 x13 = dct_const_round_shift(s5 - s13); | 843 x13 = fdct_round_shift(s5 - s13); |
838 x14 = dct_const_round_shift(s6 - s14); | 844 x14 = fdct_round_shift(s6 - s14); |
839 x15 = dct_const_round_shift(s7 - s15); | 845 x15 = fdct_round_shift(s7 - s15); |
840 | 846 |
841 // stage 2 | 847 // stage 2 |
842 s0 = x0; | 848 s0 = x0; |
843 s1 = x1; | 849 s1 = x1; |
844 s2 = x2; | 850 s2 = x2; |
845 s3 = x3; | 851 s3 = x3; |
846 s4 = x4; | 852 s4 = x4; |
847 s5 = x5; | 853 s5 = x5; |
848 s6 = x6; | 854 s6 = x6; |
849 s7 = x7; | 855 s7 = x7; |
850 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; | 856 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
851 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; | 857 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
852 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; | 858 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
853 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; | 859 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
854 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; | 860 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; |
855 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; | 861 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
856 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; | 862 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; |
857 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; | 863 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
858 | 864 |
859 x0 = s0 + s4; | 865 x0 = s0 + s4; |
860 x1 = s1 + s5; | 866 x1 = s1 + s5; |
861 x2 = s2 + s6; | 867 x2 = s2 + s6; |
862 x3 = s3 + s7; | 868 x3 = s3 + s7; |
863 x4 = s0 - s4; | 869 x4 = s0 - s4; |
864 x5 = s1 - s5; | 870 x5 = s1 - s5; |
865 x6 = s2 - s6; | 871 x6 = s2 - s6; |
866 x7 = s3 - s7; | 872 x7 = s3 - s7; |
867 x8 = dct_const_round_shift(s8 + s12); | 873 x8 = fdct_round_shift(s8 + s12); |
868 x9 = dct_const_round_shift(s9 + s13); | 874 x9 = fdct_round_shift(s9 + s13); |
869 x10 = dct_const_round_shift(s10 + s14); | 875 x10 = fdct_round_shift(s10 + s14); |
870 x11 = dct_const_round_shift(s11 + s15); | 876 x11 = fdct_round_shift(s11 + s15); |
871 x12 = dct_const_round_shift(s8 - s12); | 877 x12 = fdct_round_shift(s8 - s12); |
872 x13 = dct_const_round_shift(s9 - s13); | 878 x13 = fdct_round_shift(s9 - s13); |
873 x14 = dct_const_round_shift(s10 - s14); | 879 x14 = fdct_round_shift(s10 - s14); |
874 x15 = dct_const_round_shift(s11 - s15); | 880 x15 = fdct_round_shift(s11 - s15); |
875 | 881 |
876 // stage 3 | 882 // stage 3 |
877 s0 = x0; | 883 s0 = x0; |
878 s1 = x1; | 884 s1 = x1; |
879 s2 = x2; | 885 s2 = x2; |
880 s3 = x3; | 886 s3 = x3; |
881 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; | 887 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
882 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; | 888 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
883 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; | 889 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; |
884 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; | 890 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
885 s8 = x8; | 891 s8 = x8; |
886 s9 = x9; | 892 s9 = x9; |
887 s10 = x10; | 893 s10 = x10; |
888 s11 = x11; | 894 s11 = x11; |
889 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; | 895 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
890 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; | 896 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
891 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; | 897 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; |
892 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; | 898 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
893 | 899 |
894 x0 = s0 + s2; | 900 x0 = s0 + s2; |
895 x1 = s1 + s3; | 901 x1 = s1 + s3; |
896 x2 = s0 - s2; | 902 x2 = s0 - s2; |
897 x3 = s1 - s3; | 903 x3 = s1 - s3; |
898 x4 = dct_const_round_shift(s4 + s6); | 904 x4 = fdct_round_shift(s4 + s6); |
899 x5 = dct_const_round_shift(s5 + s7); | 905 x5 = fdct_round_shift(s5 + s7); |
900 x6 = dct_const_round_shift(s4 - s6); | 906 x6 = fdct_round_shift(s4 - s6); |
901 x7 = dct_const_round_shift(s5 - s7); | 907 x7 = fdct_round_shift(s5 - s7); |
902 x8 = s8 + s10; | 908 x8 = s8 + s10; |
903 x9 = s9 + s11; | 909 x9 = s9 + s11; |
904 x10 = s8 - s10; | 910 x10 = s8 - s10; |
905 x11 = s9 - s11; | 911 x11 = s9 - s11; |
906 x12 = dct_const_round_shift(s12 + s14); | 912 x12 = fdct_round_shift(s12 + s14); |
907 x13 = dct_const_round_shift(s13 + s15); | 913 x13 = fdct_round_shift(s13 + s15); |
908 x14 = dct_const_round_shift(s12 - s14); | 914 x14 = fdct_round_shift(s12 - s14); |
909 x15 = dct_const_round_shift(s13 - s15); | 915 x15 = fdct_round_shift(s13 - s15); |
910 | 916 |
911 // stage 4 | 917 // stage 4 |
912 s2 = (- cospi_16_64) * (x2 + x3); | 918 s2 = (- cospi_16_64) * (x2 + x3); |
913 s3 = cospi_16_64 * (x2 - x3); | 919 s3 = cospi_16_64 * (x2 - x3); |
914 s6 = cospi_16_64 * (x6 + x7); | 920 s6 = cospi_16_64 * (x6 + x7); |
915 s7 = cospi_16_64 * (- x6 + x7); | 921 s7 = cospi_16_64 * (- x6 + x7); |
916 s10 = cospi_16_64 * (x10 + x11); | 922 s10 = cospi_16_64 * (x10 + x11); |
917 s11 = cospi_16_64 * (- x10 + x11); | 923 s11 = cospi_16_64 * (- x10 + x11); |
918 s14 = (- cospi_16_64) * (x14 + x15); | 924 s14 = (- cospi_16_64) * (x14 + x15); |
919 s15 = cospi_16_64 * (x14 - x15); | 925 s15 = cospi_16_64 * (x14 - x15); |
920 | 926 |
921 x2 = dct_const_round_shift(s2); | 927 x2 = fdct_round_shift(s2); |
922 x3 = dct_const_round_shift(s3); | 928 x3 = fdct_round_shift(s3); |
923 x6 = dct_const_round_shift(s6); | 929 x6 = fdct_round_shift(s6); |
924 x7 = dct_const_round_shift(s7); | 930 x7 = fdct_round_shift(s7); |
925 x10 = dct_const_round_shift(s10); | 931 x10 = fdct_round_shift(s10); |
926 x11 = dct_const_round_shift(s11); | 932 x11 = fdct_round_shift(s11); |
927 x14 = dct_const_round_shift(s14); | 933 x14 = fdct_round_shift(s14); |
928 x15 = dct_const_round_shift(s15); | 934 x15 = fdct_round_shift(s15); |
929 | 935 |
930 output[0] = x0; | 936 output[0] = x0; |
931 output[1] = - x8; | 937 output[1] = - x8; |
932 output[2] = x12; | 938 output[2] = x12; |
933 output[3] = - x4; | 939 output[3] = - x4; |
934 output[4] = x6; | 940 output[4] = x6; |
935 output[5] = x14; | 941 output[5] = x14; |
936 output[6] = x10; | 942 output[6] = x10; |
937 output[7] = x2; | 943 output[7] = x2; |
938 output[8] = x3; | 944 output[8] = x3; |
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1386 vp9_short_fht8x8(input, output, stride, tx_type); | 1392 vp9_short_fht8x8(input, output, stride, tx_type); |
1387 } | 1393 } |
1388 | 1394 |
1389 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, | 1395 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, |
1390 int stride) { | 1396 int stride) { |
1391 if (tx_type == DCT_DCT) | 1397 if (tx_type == DCT_DCT) |
1392 vp9_fdct16x16(input, output, stride); | 1398 vp9_fdct16x16(input, output, stride); |
1393 else | 1399 else |
1394 vp9_short_fht16x16(input, output, stride, tx_type); | 1400 vp9_short_fht16x16(input, output, stride, tx_type); |
1395 } | 1401 } |
OLD | NEW |