Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Side by Side Diff: source/libvpx/vp9/encoder/vp9_dct.c

Issue 111463005: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 7 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_boolhuff.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <assert.h> 11 #include <assert.h>
12 #include <math.h> 12 #include <math.h>
13 13
14 #include "./vpx_config.h" 14 #include "./vpx_config.h"
15 #include "./vp9_rtcd.h" 15 #include "./vp9_rtcd.h"
16 16
17 #include "vp9/common/vp9_blockd.h" 17 #include "vp9/common/vp9_blockd.h"
18 #include "vp9/common/vp9_idct.h" 18 #include "vp9/common/vp9_idct.h"
19 #include "vp9/common/vp9_systemdependent.h" 19 #include "vp9/common/vp9_systemdependent.h"
20 20
21 #include "vp9/encoder/vp9_dct.h" 21 #include "vp9/encoder/vp9_dct.h"
22 22
23 static INLINE int fdct_round_shift(int input) {
24 int rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS);
25 assert(INT16_MIN <= rv && rv <= INT16_MAX);
26 return rv;
27 }
28
23 static void fdct4(const int16_t *input, int16_t *output) { 29 static void fdct4(const int16_t *input, int16_t *output) {
24 int16_t step[4]; 30 int16_t step[4];
25 int temp1, temp2; 31 int temp1, temp2;
26 32
27 step[0] = input[0] + input[3]; 33 step[0] = input[0] + input[3];
28 step[1] = input[1] + input[2]; 34 step[1] = input[1] + input[2];
29 step[2] = input[1] - input[2]; 35 step[2] = input[1] - input[2];
30 step[3] = input[0] - input[3]; 36 step[3] = input[0] - input[3];
31 37
32 temp1 = (step[0] + step[1]) * cospi_16_64; 38 temp1 = (step[0] + step[1]) * cospi_16_64;
33 temp2 = (step[0] - step[1]) * cospi_16_64; 39 temp2 = (step[0] - step[1]) * cospi_16_64;
34 output[0] = dct_const_round_shift(temp1); 40 output[0] = fdct_round_shift(temp1);
35 output[2] = dct_const_round_shift(temp2); 41 output[2] = fdct_round_shift(temp2);
36 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; 42 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
37 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; 43 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
38 output[1] = dct_const_round_shift(temp1); 44 output[1] = fdct_round_shift(temp1);
39 output[3] = dct_const_round_shift(temp2); 45 output[3] = fdct_round_shift(temp2);
40 } 46 }
41 47
42 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { 48 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) {
43 // The 2D transform is done with two passes which are actually pretty 49 // The 2D transform is done with two passes which are actually pretty
44 // similar. In the first one, we transform the columns and transpose 50 // similar. In the first one, we transform the columns and transpose
45 // the results. In the second one, we transform the rows. To achieve that, 51 // the results. In the second one, we transform the rows. To achieve that,
46 // as the first pass results are transposed, we tranpose the columns (that 52 // as the first pass results are transposed, we tranpose the columns (that
47 // is the transposed rows) and transpose the results (so that it goes back 53 // is the transposed rows) and transpose the results (so that it goes back
48 // in normal/row positions). 54 // in normal/row positions).
49 int pass; 55 int pass;
(...skipping 23 matching lines...) Expand all
73 input[2] = in[2 * 4]; 79 input[2] = in[2 * 4];
74 input[3] = in[3 * 4]; 80 input[3] = in[3 * 4];
75 } 81 }
76 // Transform. 82 // Transform.
77 step[0] = input[0] + input[3]; 83 step[0] = input[0] + input[3];
78 step[1] = input[1] + input[2]; 84 step[1] = input[1] + input[2];
79 step[2] = input[1] - input[2]; 85 step[2] = input[1] - input[2];
80 step[3] = input[0] - input[3]; 86 step[3] = input[0] - input[3];
81 temp1 = (step[0] + step[1]) * cospi_16_64; 87 temp1 = (step[0] + step[1]) * cospi_16_64;
82 temp2 = (step[0] - step[1]) * cospi_16_64; 88 temp2 = (step[0] - step[1]) * cospi_16_64;
83 out[0] = dct_const_round_shift(temp1); 89 out[0] = fdct_round_shift(temp1);
84 out[2] = dct_const_round_shift(temp2); 90 out[2] = fdct_round_shift(temp2);
85 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; 91 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64;
86 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; 92 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64;
87 out[1] = dct_const_round_shift(temp1); 93 out[1] = fdct_round_shift(temp1);
88 out[3] = dct_const_round_shift(temp2); 94 out[3] = fdct_round_shift(temp2);
89 // Do next column (which is a transposed row in second/horizontal pass) 95 // Do next column (which is a transposed row in second/horizontal pass)
90 in++; 96 in++;
91 out += 4; 97 out += 4;
92 } 98 }
93 // Setup in/out for next pass. 99 // Setup in/out for next pass.
94 in = intermediate; 100 in = intermediate;
95 out = output; 101 out = output;
96 } 102 }
97 103
98 { 104 {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
131 x1 = sinpi_3_9 * s7; 137 x1 = sinpi_3_9 * s7;
132 x2 = s1 - s3 + s6; 138 x2 = s1 - s3 + s6;
133 x3 = s4; 139 x3 = s4;
134 140
135 s0 = x0 + x3; 141 s0 = x0 + x3;
136 s1 = x1; 142 s1 = x1;
137 s2 = x2 - x3; 143 s2 = x2 - x3;
138 s3 = x2 - x0 + x3; 144 s3 = x2 - x0 + x3;
139 145
140 // 1-D transform scaling factor is sqrt(2). 146 // 1-D transform scaling factor is sqrt(2).
141 output[0] = dct_const_round_shift(s0); 147 output[0] = fdct_round_shift(s0);
142 output[1] = dct_const_round_shift(s1); 148 output[1] = fdct_round_shift(s1);
143 output[2] = dct_const_round_shift(s2); 149 output[2] = fdct_round_shift(s2);
144 output[3] = dct_const_round_shift(s3); 150 output[3] = fdct_round_shift(s3);
145 } 151 }
146 152
147 static const transform_2d FHT_4[] = { 153 static const transform_2d FHT_4[] = {
148 { fdct4, fdct4 }, // DCT_DCT = 0 154 { fdct4, fdct4 }, // DCT_DCT = 0
149 { fadst4, fdct4 }, // ADST_DCT = 1 155 { fadst4, fdct4 }, // ADST_DCT = 1
150 { fdct4, fadst4 }, // DCT_ADST = 2 156 { fdct4, fadst4 }, // DCT_ADST = 2
151 { fadst4, fadst4 } // ADST_ADST = 3 157 { fadst4, fadst4 } // ADST_ADST = 3
152 }; 158 };
153 159
154 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output, 160 void vp9_short_fht4x4_c(const int16_t *input, int16_t *output,
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
197 203
198 // fdct4(step, step); 204 // fdct4(step, step);
199 x0 = s0 + s3; 205 x0 = s0 + s3;
200 x1 = s1 + s2; 206 x1 = s1 + s2;
201 x2 = s1 - s2; 207 x2 = s1 - s2;
202 x3 = s0 - s3; 208 x3 = s0 - s3;
203 t0 = (x0 + x1) * cospi_16_64; 209 t0 = (x0 + x1) * cospi_16_64;
204 t1 = (x0 - x1) * cospi_16_64; 210 t1 = (x0 - x1) * cospi_16_64;
205 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; 211 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
206 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; 212 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
207 output[0] = dct_const_round_shift(t0); 213 output[0] = fdct_round_shift(t0);
208 output[2] = dct_const_round_shift(t2); 214 output[2] = fdct_round_shift(t2);
209 output[4] = dct_const_round_shift(t1); 215 output[4] = fdct_round_shift(t1);
210 output[6] = dct_const_round_shift(t3); 216 output[6] = fdct_round_shift(t3);
211 217
212 // Stage 2 218 // Stage 2
213 t0 = (s6 - s5) * cospi_16_64; 219 t0 = (s6 - s5) * cospi_16_64;
214 t1 = (s6 + s5) * cospi_16_64; 220 t1 = (s6 + s5) * cospi_16_64;
215 t2 = dct_const_round_shift(t0); 221 t2 = fdct_round_shift(t0);
216 t3 = dct_const_round_shift(t1); 222 t3 = fdct_round_shift(t1);
217 223
218 // Stage 3 224 // Stage 3
219 x0 = s4 + t2; 225 x0 = s4 + t2;
220 x1 = s4 - t2; 226 x1 = s4 - t2;
221 x2 = s7 - t3; 227 x2 = s7 - t3;
222 x3 = s7 + t3; 228 x3 = s7 + t3;
223 229
224 // Stage 4 230 // Stage 4
225 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 231 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
226 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 232 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
227 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 233 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
228 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 234 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
229 output[1] = dct_const_round_shift(t0); 235 output[1] = fdct_round_shift(t0);
230 output[3] = dct_const_round_shift(t2); 236 output[3] = fdct_round_shift(t2);
231 output[5] = dct_const_round_shift(t1); 237 output[5] = fdct_round_shift(t1);
232 output[7] = dct_const_round_shift(t3); 238 output[7] = fdct_round_shift(t3);
233 } 239 }
234 240
235 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { 241 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) {
236 int i, j; 242 int i, j;
237 int16_t intermediate[64]; 243 int16_t intermediate[64];
238 244
239 // Transform columns 245 // Transform columns
240 { 246 {
241 int16_t *output = intermediate; 247 int16_t *output = intermediate;
242 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; 248 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7;
(...skipping 14 matching lines...) Expand all
257 263
258 // fdct4(step, step); 264 // fdct4(step, step);
259 x0 = s0 + s3; 265 x0 = s0 + s3;
260 x1 = s1 + s2; 266 x1 = s1 + s2;
261 x2 = s1 - s2; 267 x2 = s1 - s2;
262 x3 = s0 - s3; 268 x3 = s0 - s3;
263 t0 = (x0 + x1) * cospi_16_64; 269 t0 = (x0 + x1) * cospi_16_64;
264 t1 = (x0 - x1) * cospi_16_64; 270 t1 = (x0 - x1) * cospi_16_64;
265 t2 = x2 * cospi_24_64 + x3 * cospi_8_64; 271 t2 = x2 * cospi_24_64 + x3 * cospi_8_64;
266 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; 272 t3 = -x2 * cospi_8_64 + x3 * cospi_24_64;
267 output[0 * 8] = dct_const_round_shift(t0); 273 output[0 * 8] = fdct_round_shift(t0);
268 output[2 * 8] = dct_const_round_shift(t2); 274 output[2 * 8] = fdct_round_shift(t2);
269 output[4 * 8] = dct_const_round_shift(t1); 275 output[4 * 8] = fdct_round_shift(t1);
270 output[6 * 8] = dct_const_round_shift(t3); 276 output[6 * 8] = fdct_round_shift(t3);
271 277
272 // Stage 2 278 // Stage 2
273 t0 = (s6 - s5) * cospi_16_64; 279 t0 = (s6 - s5) * cospi_16_64;
274 t1 = (s6 + s5) * cospi_16_64; 280 t1 = (s6 + s5) * cospi_16_64;
275 t2 = dct_const_round_shift(t0); 281 t2 = fdct_round_shift(t0);
276 t3 = dct_const_round_shift(t1); 282 t3 = fdct_round_shift(t1);
277 283
278 // Stage 3 284 // Stage 3
279 x0 = s4 + t2; 285 x0 = s4 + t2;
280 x1 = s4 - t2; 286 x1 = s4 - t2;
281 x2 = s7 - t3; 287 x2 = s7 - t3;
282 x3 = s7 + t3; 288 x3 = s7 + t3;
283 289
284 // Stage 4 290 // Stage 4
285 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 291 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
286 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 292 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
287 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 293 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
288 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 294 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
289 output[1 * 8] = dct_const_round_shift(t0); 295 output[1 * 8] = fdct_round_shift(t0);
290 output[3 * 8] = dct_const_round_shift(t2); 296 output[3 * 8] = fdct_round_shift(t2);
291 output[5 * 8] = dct_const_round_shift(t1); 297 output[5 * 8] = fdct_round_shift(t1);
292 output[7 * 8] = dct_const_round_shift(t3); 298 output[7 * 8] = fdct_round_shift(t3);
293 input++; 299 input++;
294 output++; 300 output++;
295 } 301 }
296 } 302 }
297 303
298 // Rows 304 // Rows
299 for (i = 0; i < 8; ++i) { 305 for (i = 0; i < 8; ++i) {
300 fdct8(&intermediate[i * 8], &final_output[i * 8]); 306 fdct8(&intermediate[i * 8], &final_output[i * 8]);
301 for (j = 0; j < 8; ++j) 307 for (j = 0; j < 8; ++j)
302 final_output[j + i * 8] /= 2; 308 final_output[j + i * 8] /= 2;
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
381 387
382 // fdct4(step, step); 388 // fdct4(step, step);
383 x0 = s0 + s3; 389 x0 = s0 + s3;
384 x1 = s1 + s2; 390 x1 = s1 + s2;
385 x2 = s1 - s2; 391 x2 = s1 - s2;
386 x3 = s0 - s3; 392 x3 = s0 - s3;
387 t0 = (x0 + x1) * cospi_16_64; 393 t0 = (x0 + x1) * cospi_16_64;
388 t1 = (x0 - x1) * cospi_16_64; 394 t1 = (x0 - x1) * cospi_16_64;
389 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; 395 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
390 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; 396 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
391 out[0] = dct_const_round_shift(t0); 397 out[0] = fdct_round_shift(t0);
392 out[4] = dct_const_round_shift(t2); 398 out[4] = fdct_round_shift(t2);
393 out[8] = dct_const_round_shift(t1); 399 out[8] = fdct_round_shift(t1);
394 out[12] = dct_const_round_shift(t3); 400 out[12] = fdct_round_shift(t3);
395 401
396 // Stage 2 402 // Stage 2
397 t0 = (s6 - s5) * cospi_16_64; 403 t0 = (s6 - s5) * cospi_16_64;
398 t1 = (s6 + s5) * cospi_16_64; 404 t1 = (s6 + s5) * cospi_16_64;
399 t2 = dct_const_round_shift(t0); 405 t2 = fdct_round_shift(t0);
400 t3 = dct_const_round_shift(t1); 406 t3 = fdct_round_shift(t1);
401 407
402 // Stage 3 408 // Stage 3
403 x0 = s4 + t2; 409 x0 = s4 + t2;
404 x1 = s4 - t2; 410 x1 = s4 - t2;
405 x2 = s7 - t3; 411 x2 = s7 - t3;
406 x3 = s7 + t3; 412 x3 = s7 + t3;
407 413
408 // Stage 4 414 // Stage 4
409 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 415 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
410 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 416 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
411 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 417 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
412 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 418 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
413 out[2] = dct_const_round_shift(t0); 419 out[2] = fdct_round_shift(t0);
414 out[6] = dct_const_round_shift(t2); 420 out[6] = fdct_round_shift(t2);
415 out[10] = dct_const_round_shift(t1); 421 out[10] = fdct_round_shift(t1);
416 out[14] = dct_const_round_shift(t3); 422 out[14] = fdct_round_shift(t3);
417 } 423 }
418 // Work on the next eight values; step1 -> odd_results 424 // Work on the next eight values; step1 -> odd_results
419 { 425 {
420 // step 2 426 // step 2
421 temp1 = (step1[5] - step1[2]) * cospi_16_64; 427 temp1 = (step1[5] - step1[2]) * cospi_16_64;
422 temp2 = (step1[4] - step1[3]) * cospi_16_64; 428 temp2 = (step1[4] - step1[3]) * cospi_16_64;
423 step2[2] = dct_const_round_shift(temp1); 429 step2[2] = fdct_round_shift(temp1);
424 step2[3] = dct_const_round_shift(temp2); 430 step2[3] = fdct_round_shift(temp2);
425 temp1 = (step1[4] + step1[3]) * cospi_16_64; 431 temp1 = (step1[4] + step1[3]) * cospi_16_64;
426 temp2 = (step1[5] + step1[2]) * cospi_16_64; 432 temp2 = (step1[5] + step1[2]) * cospi_16_64;
427 step2[4] = dct_const_round_shift(temp1); 433 step2[4] = fdct_round_shift(temp1);
428 step2[5] = dct_const_round_shift(temp2); 434 step2[5] = fdct_round_shift(temp2);
429 // step 3 435 // step 3
430 step3[0] = step1[0] + step2[3]; 436 step3[0] = step1[0] + step2[3];
431 step3[1] = step1[1] + step2[2]; 437 step3[1] = step1[1] + step2[2];
432 step3[2] = step1[1] - step2[2]; 438 step3[2] = step1[1] - step2[2];
433 step3[3] = step1[0] - step2[3]; 439 step3[3] = step1[0] - step2[3];
434 step3[4] = step1[7] - step2[4]; 440 step3[4] = step1[7] - step2[4];
435 step3[5] = step1[6] - step2[5]; 441 step3[5] = step1[6] - step2[5];
436 step3[6] = step1[6] + step2[5]; 442 step3[6] = step1[6] + step2[5];
437 step3[7] = step1[7] + step2[4]; 443 step3[7] = step1[7] + step2[4];
438 // step 4 444 // step 4
439 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; 445 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
440 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; 446 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64;
441 step2[1] = dct_const_round_shift(temp1); 447 step2[1] = fdct_round_shift(temp1);
442 step2[2] = dct_const_round_shift(temp2); 448 step2[2] = fdct_round_shift(temp2);
443 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; 449 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64;
444 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; 450 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
445 step2[5] = dct_const_round_shift(temp1); 451 step2[5] = fdct_round_shift(temp1);
446 step2[6] = dct_const_round_shift(temp2); 452 step2[6] = fdct_round_shift(temp2);
447 // step 5 453 // step 5
448 step1[0] = step3[0] + step2[1]; 454 step1[0] = step3[0] + step2[1];
449 step1[1] = step3[0] - step2[1]; 455 step1[1] = step3[0] - step2[1];
450 step1[2] = step3[3] - step2[2]; 456 step1[2] = step3[3] - step2[2];
451 step1[3] = step3[3] + step2[2]; 457 step1[3] = step3[3] + step2[2];
452 step1[4] = step3[4] + step2[5]; 458 step1[4] = step3[4] + step2[5];
453 step1[5] = step3[4] - step2[5]; 459 step1[5] = step3[4] - step2[5];
454 step1[6] = step3[7] - step2[6]; 460 step1[6] = step3[7] - step2[6];
455 step1[7] = step3[7] + step2[6]; 461 step1[7] = step3[7] + step2[6];
456 // step 6 462 // step 6
457 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; 463 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
458 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; 464 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
459 out[1] = dct_const_round_shift(temp1); 465 out[1] = fdct_round_shift(temp1);
460 out[9] = dct_const_round_shift(temp2); 466 out[9] = fdct_round_shift(temp2);
461 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; 467 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
462 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; 468 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
463 out[5] = dct_const_round_shift(temp1); 469 out[5] = fdct_round_shift(temp1);
464 out[13] = dct_const_round_shift(temp2); 470 out[13] = fdct_round_shift(temp2);
465 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; 471 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
466 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; 472 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
467 out[3] = dct_const_round_shift(temp1); 473 out[3] = fdct_round_shift(temp1);
468 out[11] = dct_const_round_shift(temp2); 474 out[11] = fdct_round_shift(temp2);
469 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; 475 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
470 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; 476 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
471 out[7] = dct_const_round_shift(temp1); 477 out[7] = fdct_round_shift(temp1);
472 out[15] = dct_const_round_shift(temp2); 478 out[15] = fdct_round_shift(temp2);
473 } 479 }
474 // Do next column (which is a transposed row in second/horizontal pass) 480 // Do next column (which is a transposed row in second/horizontal pass)
475 in++; 481 in++;
476 out += 16; 482 out += 16;
477 } 483 }
478 // Setup in/out for next pass. 484 // Setup in/out for next pass.
479 in = intermediate; 485 in = intermediate;
480 out = output; 486 out = output;
481 } 487 }
482 } 488 }
(...skipping 13 matching lines...) Expand all
496 // stage 1 502 // stage 1
497 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; 503 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
498 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; 504 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
499 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; 505 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
500 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; 506 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
501 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; 507 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
502 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; 508 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
503 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; 509 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
504 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; 510 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
505 511
506 x0 = dct_const_round_shift(s0 + s4); 512 x0 = fdct_round_shift(s0 + s4);
507 x1 = dct_const_round_shift(s1 + s5); 513 x1 = fdct_round_shift(s1 + s5);
508 x2 = dct_const_round_shift(s2 + s6); 514 x2 = fdct_round_shift(s2 + s6);
509 x3 = dct_const_round_shift(s3 + s7); 515 x3 = fdct_round_shift(s3 + s7);
510 x4 = dct_const_round_shift(s0 - s4); 516 x4 = fdct_round_shift(s0 - s4);
511 x5 = dct_const_round_shift(s1 - s5); 517 x5 = fdct_round_shift(s1 - s5);
512 x6 = dct_const_round_shift(s2 - s6); 518 x6 = fdct_round_shift(s2 - s6);
513 x7 = dct_const_round_shift(s3 - s7); 519 x7 = fdct_round_shift(s3 - s7);
514 520
515 // stage 2 521 // stage 2
516 s0 = x0; 522 s0 = x0;
517 s1 = x1; 523 s1 = x1;
518 s2 = x2; 524 s2 = x2;
519 s3 = x3; 525 s3 = x3;
520 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; 526 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
521 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; 527 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
522 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7; 528 s6 = - cospi_24_64 * x6 + cospi_8_64 * x7;
523 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; 529 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
524 530
525 x0 = s0 + s2; 531 x0 = s0 + s2;
526 x1 = s1 + s3; 532 x1 = s1 + s3;
527 x2 = s0 - s2; 533 x2 = s0 - s2;
528 x3 = s1 - s3; 534 x3 = s1 - s3;
529 x4 = dct_const_round_shift(s4 + s6); 535 x4 = fdct_round_shift(s4 + s6);
530 x5 = dct_const_round_shift(s5 + s7); 536 x5 = fdct_round_shift(s5 + s7);
531 x6 = dct_const_round_shift(s4 - s6); 537 x6 = fdct_round_shift(s4 - s6);
532 x7 = dct_const_round_shift(s5 - s7); 538 x7 = fdct_round_shift(s5 - s7);
533 539
534 // stage 3 540 // stage 3
535 s2 = cospi_16_64 * (x2 + x3); 541 s2 = cospi_16_64 * (x2 + x3);
536 s3 = cospi_16_64 * (x2 - x3); 542 s3 = cospi_16_64 * (x2 - x3);
537 s6 = cospi_16_64 * (x6 + x7); 543 s6 = cospi_16_64 * (x6 + x7);
538 s7 = cospi_16_64 * (x6 - x7); 544 s7 = cospi_16_64 * (x6 - x7);
539 545
540 x2 = dct_const_round_shift(s2); 546 x2 = fdct_round_shift(s2);
541 x3 = dct_const_round_shift(s3); 547 x3 = fdct_round_shift(s3);
542 x6 = dct_const_round_shift(s6); 548 x6 = fdct_round_shift(s6);
543 x7 = dct_const_round_shift(s7); 549 x7 = fdct_round_shift(s7);
544 550
545 output[0] = x0; 551 output[0] = x0;
546 output[1] = - x4; 552 output[1] = - x4;
547 output[2] = x6; 553 output[2] = x6;
548 output[3] = - x2; 554 output[3] = - x2;
549 output[4] = x3; 555 output[4] = x3;
550 output[5] = - x7; 556 output[5] = - x7;
551 output[6] = x5; 557 output[6] = x5;
552 output[7] = - x1; 558 output[7] = - x1;
553 } 559 }
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
686 692
687 // fdct4(step, step); 693 // fdct4(step, step);
688 x0 = s0 + s3; 694 x0 = s0 + s3;
689 x1 = s1 + s2; 695 x1 = s1 + s2;
690 x2 = s1 - s2; 696 x2 = s1 - s2;
691 x3 = s0 - s3; 697 x3 = s0 - s3;
692 t0 = (x0 + x1) * cospi_16_64; 698 t0 = (x0 + x1) * cospi_16_64;
693 t1 = (x0 - x1) * cospi_16_64; 699 t1 = (x0 - x1) * cospi_16_64;
694 t2 = x3 * cospi_8_64 + x2 * cospi_24_64; 700 t2 = x3 * cospi_8_64 + x2 * cospi_24_64;
695 t3 = x3 * cospi_24_64 - x2 * cospi_8_64; 701 t3 = x3 * cospi_24_64 - x2 * cospi_8_64;
696 out[0] = dct_const_round_shift(t0); 702 out[0] = fdct_round_shift(t0);
697 out[4] = dct_const_round_shift(t2); 703 out[4] = fdct_round_shift(t2);
698 out[8] = dct_const_round_shift(t1); 704 out[8] = fdct_round_shift(t1);
699 out[12] = dct_const_round_shift(t3); 705 out[12] = fdct_round_shift(t3);
700 706
701 // Stage 2 707 // Stage 2
702 t0 = (s6 - s5) * cospi_16_64; 708 t0 = (s6 - s5) * cospi_16_64;
703 t1 = (s6 + s5) * cospi_16_64; 709 t1 = (s6 + s5) * cospi_16_64;
704 t2 = dct_const_round_shift(t0); 710 t2 = fdct_round_shift(t0);
705 t3 = dct_const_round_shift(t1); 711 t3 = fdct_round_shift(t1);
706 712
707 // Stage 3 713 // Stage 3
708 x0 = s4 + t2; 714 x0 = s4 + t2;
709 x1 = s4 - t2; 715 x1 = s4 - t2;
710 x2 = s7 - t3; 716 x2 = s7 - t3;
711 x3 = s7 + t3; 717 x3 = s7 + t3;
712 718
713 // Stage 4 719 // Stage 4
714 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; 720 t0 = x0 * cospi_28_64 + x3 * cospi_4_64;
715 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; 721 t1 = x1 * cospi_12_64 + x2 * cospi_20_64;
716 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; 722 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64;
717 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; 723 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64;
718 out[2] = dct_const_round_shift(t0); 724 out[2] = fdct_round_shift(t0);
719 out[6] = dct_const_round_shift(t2); 725 out[6] = fdct_round_shift(t2);
720 out[10] = dct_const_round_shift(t1); 726 out[10] = fdct_round_shift(t1);
721 out[14] = dct_const_round_shift(t3); 727 out[14] = fdct_round_shift(t3);
722 } 728 }
723 729
724 // step 2 730 // step 2
725 temp1 = (step1[5] - step1[2]) * cospi_16_64; 731 temp1 = (step1[5] - step1[2]) * cospi_16_64;
726 temp2 = (step1[4] - step1[3]) * cospi_16_64; 732 temp2 = (step1[4] - step1[3]) * cospi_16_64;
727 step2[2] = dct_const_round_shift(temp1); 733 step2[2] = fdct_round_shift(temp1);
728 step2[3] = dct_const_round_shift(temp2); 734 step2[3] = fdct_round_shift(temp2);
729 temp1 = (step1[4] + step1[3]) * cospi_16_64; 735 temp1 = (step1[4] + step1[3]) * cospi_16_64;
730 temp2 = (step1[5] + step1[2]) * cospi_16_64; 736 temp2 = (step1[5] + step1[2]) * cospi_16_64;
731 step2[4] = dct_const_round_shift(temp1); 737 step2[4] = fdct_round_shift(temp1);
732 step2[5] = dct_const_round_shift(temp2); 738 step2[5] = fdct_round_shift(temp2);
733 739
734 // step 3 740 // step 3
735 step3[0] = step1[0] + step2[3]; 741 step3[0] = step1[0] + step2[3];
736 step3[1] = step1[1] + step2[2]; 742 step3[1] = step1[1] + step2[2];
737 step3[2] = step1[1] - step2[2]; 743 step3[2] = step1[1] - step2[2];
738 step3[3] = step1[0] - step2[3]; 744 step3[3] = step1[0] - step2[3];
739 step3[4] = step1[7] - step2[4]; 745 step3[4] = step1[7] - step2[4];
740 step3[5] = step1[6] - step2[5]; 746 step3[5] = step1[6] - step2[5];
741 step3[6] = step1[6] + step2[5]; 747 step3[6] = step1[6] + step2[5];
742 step3[7] = step1[7] + step2[4]; 748 step3[7] = step1[7] + step2[4];
743 749
744 // step 4 750 // step 4
745 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; 751 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64;
746 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; 752 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64;
747 step2[1] = dct_const_round_shift(temp1); 753 step2[1] = fdct_round_shift(temp1);
748 step2[2] = dct_const_round_shift(temp2); 754 step2[2] = fdct_round_shift(temp2);
749 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; 755 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64;
750 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; 756 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64;
751 step2[5] = dct_const_round_shift(temp1); 757 step2[5] = fdct_round_shift(temp1);
752 step2[6] = dct_const_round_shift(temp2); 758 step2[6] = fdct_round_shift(temp2);
753 759
754 // step 5 760 // step 5
755 step1[0] = step3[0] + step2[1]; 761 step1[0] = step3[0] + step2[1];
756 step1[1] = step3[0] - step2[1]; 762 step1[1] = step3[0] - step2[1];
757 step1[2] = step3[3] - step2[2]; 763 step1[2] = step3[3] - step2[2];
758 step1[3] = step3[3] + step2[2]; 764 step1[3] = step3[3] + step2[2];
759 step1[4] = step3[4] + step2[5]; 765 step1[4] = step3[4] + step2[5];
760 step1[5] = step3[4] - step2[5]; 766 step1[5] = step3[4] - step2[5];
761 step1[6] = step3[7] - step2[6]; 767 step1[6] = step3[7] - step2[6];
762 step1[7] = step3[7] + step2[6]; 768 step1[7] = step3[7] + step2[6];
763 769
764 // step 6 770 // step 6
765 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; 771 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64;
766 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; 772 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64;
767 out[1] = dct_const_round_shift(temp1); 773 out[1] = fdct_round_shift(temp1);
768 out[9] = dct_const_round_shift(temp2); 774 out[9] = fdct_round_shift(temp2);
769 775
770 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; 776 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64;
771 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; 777 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64;
772 out[5] = dct_const_round_shift(temp1); 778 out[5] = fdct_round_shift(temp1);
773 out[13] = dct_const_round_shift(temp2); 779 out[13] = fdct_round_shift(temp2);
774 780
775 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; 781 temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64;
776 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; 782 temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64;
777 out[3] = dct_const_round_shift(temp1); 783 out[3] = fdct_round_shift(temp1);
778 out[11] = dct_const_round_shift(temp2); 784 out[11] = fdct_round_shift(temp2);
779 785
780 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; 786 temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64;
781 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; 787 temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64;
782 out[7] = dct_const_round_shift(temp1); 788 out[7] = fdct_round_shift(temp1);
783 out[15] = dct_const_round_shift(temp2); 789 out[15] = fdct_round_shift(temp2);
784 } 790 }
785 791
786 static void fadst16(const int16_t *input, int16_t *output) { 792 static void fadst16(const int16_t *input, int16_t *output) {
787 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; 793 int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15;
788 794
789 int x0 = input[15]; 795 int x0 = input[15];
790 int x1 = input[0]; 796 int x1 = input[0];
791 int x2 = input[13]; 797 int x2 = input[13];
792 int x3 = input[2]; 798 int x3 = input[2];
793 int x4 = input[11]; 799 int x4 = input[11];
(...skipping 20 matching lines...) Expand all
814 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; 820 s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
815 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; 821 s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
816 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; 822 s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
817 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; 823 s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
818 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; 824 s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
819 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; 825 s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
820 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; 826 s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
821 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; 827 s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
822 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; 828 s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
823 829
824 x0 = dct_const_round_shift(s0 + s8); 830 x0 = fdct_round_shift(s0 + s8);
825 x1 = dct_const_round_shift(s1 + s9); 831 x1 = fdct_round_shift(s1 + s9);
826 x2 = dct_const_round_shift(s2 + s10); 832 x2 = fdct_round_shift(s2 + s10);
827 x3 = dct_const_round_shift(s3 + s11); 833 x3 = fdct_round_shift(s3 + s11);
828 x4 = dct_const_round_shift(s4 + s12); 834 x4 = fdct_round_shift(s4 + s12);
829 x5 = dct_const_round_shift(s5 + s13); 835 x5 = fdct_round_shift(s5 + s13);
830 x6 = dct_const_round_shift(s6 + s14); 836 x6 = fdct_round_shift(s6 + s14);
831 x7 = dct_const_round_shift(s7 + s15); 837 x7 = fdct_round_shift(s7 + s15);
832 x8 = dct_const_round_shift(s0 - s8); 838 x8 = fdct_round_shift(s0 - s8);
833 x9 = dct_const_round_shift(s1 - s9); 839 x9 = fdct_round_shift(s1 - s9);
834 x10 = dct_const_round_shift(s2 - s10); 840 x10 = fdct_round_shift(s2 - s10);
835 x11 = dct_const_round_shift(s3 - s11); 841 x11 = fdct_round_shift(s3 - s11);
836 x12 = dct_const_round_shift(s4 - s12); 842 x12 = fdct_round_shift(s4 - s12);
837 x13 = dct_const_round_shift(s5 - s13); 843 x13 = fdct_round_shift(s5 - s13);
838 x14 = dct_const_round_shift(s6 - s14); 844 x14 = fdct_round_shift(s6 - s14);
839 x15 = dct_const_round_shift(s7 - s15); 845 x15 = fdct_round_shift(s7 - s15);
840 846
841 // stage 2 847 // stage 2
842 s0 = x0; 848 s0 = x0;
843 s1 = x1; 849 s1 = x1;
844 s2 = x2; 850 s2 = x2;
845 s3 = x3; 851 s3 = x3;
846 s4 = x4; 852 s4 = x4;
847 s5 = x5; 853 s5 = x5;
848 s6 = x6; 854 s6 = x6;
849 s7 = x7; 855 s7 = x7;
850 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; 856 s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
851 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; 857 s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
852 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; 858 s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
853 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; 859 s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
854 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; 860 s12 = - x12 * cospi_28_64 + x13 * cospi_4_64;
855 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; 861 s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
856 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; 862 s14 = - x14 * cospi_12_64 + x15 * cospi_20_64;
857 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; 863 s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
858 864
859 x0 = s0 + s4; 865 x0 = s0 + s4;
860 x1 = s1 + s5; 866 x1 = s1 + s5;
861 x2 = s2 + s6; 867 x2 = s2 + s6;
862 x3 = s3 + s7; 868 x3 = s3 + s7;
863 x4 = s0 - s4; 869 x4 = s0 - s4;
864 x5 = s1 - s5; 870 x5 = s1 - s5;
865 x6 = s2 - s6; 871 x6 = s2 - s6;
866 x7 = s3 - s7; 872 x7 = s3 - s7;
867 x8 = dct_const_round_shift(s8 + s12); 873 x8 = fdct_round_shift(s8 + s12);
868 x9 = dct_const_round_shift(s9 + s13); 874 x9 = fdct_round_shift(s9 + s13);
869 x10 = dct_const_round_shift(s10 + s14); 875 x10 = fdct_round_shift(s10 + s14);
870 x11 = dct_const_round_shift(s11 + s15); 876 x11 = fdct_round_shift(s11 + s15);
871 x12 = dct_const_round_shift(s8 - s12); 877 x12 = fdct_round_shift(s8 - s12);
872 x13 = dct_const_round_shift(s9 - s13); 878 x13 = fdct_round_shift(s9 - s13);
873 x14 = dct_const_round_shift(s10 - s14); 879 x14 = fdct_round_shift(s10 - s14);
874 x15 = dct_const_round_shift(s11 - s15); 880 x15 = fdct_round_shift(s11 - s15);
875 881
876 // stage 3 882 // stage 3
877 s0 = x0; 883 s0 = x0;
878 s1 = x1; 884 s1 = x1;
879 s2 = x2; 885 s2 = x2;
880 s3 = x3; 886 s3 = x3;
881 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; 887 s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
882 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; 888 s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
883 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; 889 s6 = - x6 * cospi_24_64 + x7 * cospi_8_64;
884 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; 890 s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
885 s8 = x8; 891 s8 = x8;
886 s9 = x9; 892 s9 = x9;
887 s10 = x10; 893 s10 = x10;
888 s11 = x11; 894 s11 = x11;
889 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; 895 s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
890 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; 896 s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
891 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; 897 s14 = - x14 * cospi_24_64 + x15 * cospi_8_64;
892 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; 898 s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
893 899
894 x0 = s0 + s2; 900 x0 = s0 + s2;
895 x1 = s1 + s3; 901 x1 = s1 + s3;
896 x2 = s0 - s2; 902 x2 = s0 - s2;
897 x3 = s1 - s3; 903 x3 = s1 - s3;
898 x4 = dct_const_round_shift(s4 + s6); 904 x4 = fdct_round_shift(s4 + s6);
899 x5 = dct_const_round_shift(s5 + s7); 905 x5 = fdct_round_shift(s5 + s7);
900 x6 = dct_const_round_shift(s4 - s6); 906 x6 = fdct_round_shift(s4 - s6);
901 x7 = dct_const_round_shift(s5 - s7); 907 x7 = fdct_round_shift(s5 - s7);
902 x8 = s8 + s10; 908 x8 = s8 + s10;
903 x9 = s9 + s11; 909 x9 = s9 + s11;
904 x10 = s8 - s10; 910 x10 = s8 - s10;
905 x11 = s9 - s11; 911 x11 = s9 - s11;
906 x12 = dct_const_round_shift(s12 + s14); 912 x12 = fdct_round_shift(s12 + s14);
907 x13 = dct_const_round_shift(s13 + s15); 913 x13 = fdct_round_shift(s13 + s15);
908 x14 = dct_const_round_shift(s12 - s14); 914 x14 = fdct_round_shift(s12 - s14);
909 x15 = dct_const_round_shift(s13 - s15); 915 x15 = fdct_round_shift(s13 - s15);
910 916
911 // stage 4 917 // stage 4
912 s2 = (- cospi_16_64) * (x2 + x3); 918 s2 = (- cospi_16_64) * (x2 + x3);
913 s3 = cospi_16_64 * (x2 - x3); 919 s3 = cospi_16_64 * (x2 - x3);
914 s6 = cospi_16_64 * (x6 + x7); 920 s6 = cospi_16_64 * (x6 + x7);
915 s7 = cospi_16_64 * (- x6 + x7); 921 s7 = cospi_16_64 * (- x6 + x7);
916 s10 = cospi_16_64 * (x10 + x11); 922 s10 = cospi_16_64 * (x10 + x11);
917 s11 = cospi_16_64 * (- x10 + x11); 923 s11 = cospi_16_64 * (- x10 + x11);
918 s14 = (- cospi_16_64) * (x14 + x15); 924 s14 = (- cospi_16_64) * (x14 + x15);
919 s15 = cospi_16_64 * (x14 - x15); 925 s15 = cospi_16_64 * (x14 - x15);
920 926
921 x2 = dct_const_round_shift(s2); 927 x2 = fdct_round_shift(s2);
922 x3 = dct_const_round_shift(s3); 928 x3 = fdct_round_shift(s3);
923 x6 = dct_const_round_shift(s6); 929 x6 = fdct_round_shift(s6);
924 x7 = dct_const_round_shift(s7); 930 x7 = fdct_round_shift(s7);
925 x10 = dct_const_round_shift(s10); 931 x10 = fdct_round_shift(s10);
926 x11 = dct_const_round_shift(s11); 932 x11 = fdct_round_shift(s11);
927 x14 = dct_const_round_shift(s14); 933 x14 = fdct_round_shift(s14);
928 x15 = dct_const_round_shift(s15); 934 x15 = fdct_round_shift(s15);
929 935
930 output[0] = x0; 936 output[0] = x0;
931 output[1] = - x8; 937 output[1] = - x8;
932 output[2] = x12; 938 output[2] = x12;
933 output[3] = - x4; 939 output[3] = - x4;
934 output[4] = x6; 940 output[4] = x6;
935 output[5] = x14; 941 output[5] = x14;
936 output[6] = x10; 942 output[6] = x10;
937 output[7] = x2; 943 output[7] = x2;
938 output[8] = x3; 944 output[8] = x3;
(...skipping 447 matching lines...) Expand 10 before | Expand all | Expand 10 after
1386 vp9_short_fht8x8(input, output, stride, tx_type); 1392 vp9_short_fht8x8(input, output, stride, tx_type);
1387 } 1393 }
1388 1394
1389 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output, 1395 void vp9_fht16x16(TX_TYPE tx_type, const int16_t *input, int16_t *output,
1390 int stride) { 1396 int stride) {
1391 if (tx_type == DCT_DCT) 1397 if (tx_type == DCT_DCT)
1392 vp9_fdct16x16(input, output, stride); 1398 vp9_fdct16x16(input, output, stride);
1393 else 1399 else
1394 vp9_short_fht16x16(input, output, stride, tx_type); 1400 vp9_short_fht16x16(input, output, stride, tx_type);
1395 } 1401 }
OLDNEW
« no previous file with comments | « source/libvpx/vp9/encoder/vp9_boolhuff.h ('k') | source/libvpx/vp9/encoder/vp9_encodeframe.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698