OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 25 matching lines...) Expand all Loading... |
36 temp1 = (step[0] + step[1]) * cospi_16_64; | 36 temp1 = (step[0] + step[1]) * cospi_16_64; |
37 temp2 = (step[0] - step[1]) * cospi_16_64; | 37 temp2 = (step[0] - step[1]) * cospi_16_64; |
38 output[0] = fdct_round_shift(temp1); | 38 output[0] = fdct_round_shift(temp1); |
39 output[2] = fdct_round_shift(temp2); | 39 output[2] = fdct_round_shift(temp2); |
40 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; | 40 temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; |
41 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; | 41 temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; |
42 output[1] = fdct_round_shift(temp1); | 42 output[1] = fdct_round_shift(temp1); |
43 output[3] = fdct_round_shift(temp2); | 43 output[3] = fdct_round_shift(temp2); |
44 } | 44 } |
45 | 45 |
| 46 void vp9_fdct4x4_1_c(const int16_t *input, int16_t *output, int stride) { |
| 47 int r, c; |
| 48 int16_t sum = 0; |
| 49 for (r = 0; r < 4; ++r) |
| 50 for (c = 0; c < 4; ++c) |
| 51 sum += input[r * stride + c]; |
| 52 |
| 53 output[0] = sum << 1; |
| 54 output[1] = 0; |
| 55 } |
| 56 |
46 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { | 57 void vp9_fdct4x4_c(const int16_t *input, int16_t *output, int stride) { |
47 // The 2D transform is done with two passes which are actually pretty | 58 // The 2D transform is done with two passes which are actually pretty |
48 // similar. In the first one, we transform the columns and transpose | 59 // similar. In the first one, we transform the columns and transpose |
49 // the results. In the second one, we transform the rows. To achieve that, | 60 // the results. In the second one, we transform the rows. To achieve that, |
50 // as the first pass results are transposed, we transpose the columns (that | 61 // as the first pass results are transposed, we transpose the columns (that |
51 // is the transposed rows) and transpose the results (so that it goes back | 62 // is the transposed rows) and transpose the results (so that it goes back |
52 // in normal/row positions). | 63 // in normal/row positions). |
53 int pass; | 64 int pass; |
54 // We need an intermediate buffer between passes. | 65 // We need an intermediate buffer between passes. |
55 int16_t intermediate[4 * 4]; | 66 int16_t intermediate[4 * 4]; |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
233 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; | 244 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; |
234 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; | 245 t1 = x1 * cospi_12_64 + x2 * cospi_20_64; |
235 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; | 246 t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; |
236 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; | 247 t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; |
237 output[1] = fdct_round_shift(t0); | 248 output[1] = fdct_round_shift(t0); |
238 output[3] = fdct_round_shift(t2); | 249 output[3] = fdct_round_shift(t2); |
239 output[5] = fdct_round_shift(t1); | 250 output[5] = fdct_round_shift(t1); |
240 output[7] = fdct_round_shift(t3); | 251 output[7] = fdct_round_shift(t3); |
241 } | 252 } |
242 | 253 |
| 254 void vp9_fdct8x8_1_c(const int16_t *input, int16_t *output, int stride) { |
| 255 int r, c; |
| 256 int16_t sum = 0; |
| 257 for (r = 0; r < 8; ++r) |
| 258 for (c = 0; c < 8; ++c) |
| 259 sum += input[r * stride + c]; |
| 260 |
| 261 output[0] = sum; |
| 262 output[1] = 0; |
| 263 } |
| 264 |
243 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { | 265 void vp9_fdct8x8_c(const int16_t *input, int16_t *final_output, int stride) { |
244 int i, j; | 266 int i, j; |
245 int16_t intermediate[64]; | 267 int16_t intermediate[64]; |
246 | 268 |
247 // Transform columns | 269 // Transform columns |
248 { | 270 { |
249 int16_t *output = intermediate; | 271 int16_t *output = intermediate; |
250 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; | 272 /*canbe16*/ int s0, s1, s2, s3, s4, s5, s6, s7; |
251 /*needs32*/ int t0, t1, t2, t3; | 273 /*needs32*/ int t0, t1, t2, t3; |
252 /*canbe16*/ int x0, x1, x2, x3; | 274 /*canbe16*/ int x0, x1, x2, x3; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
304 } | 326 } |
305 | 327 |
306 // Rows | 328 // Rows |
307 for (i = 0; i < 8; ++i) { | 329 for (i = 0; i < 8; ++i) { |
308 fdct8(&intermediate[i * 8], &final_output[i * 8]); | 330 fdct8(&intermediate[i * 8], &final_output[i * 8]); |
309 for (j = 0; j < 8; ++j) | 331 for (j = 0; j < 8; ++j) |
310 final_output[j + i * 8] /= 2; | 332 final_output[j + i * 8] /= 2; |
311 } | 333 } |
312 } | 334 } |
313 | 335 |
| 336 void vp9_fdct16x16_1_c(const int16_t *input, int16_t *output, int stride) { |
| 337 int r, c; |
| 338 int16_t sum = 0; |
| 339 for (r = 0; r < 16; ++r) |
| 340 for (c = 0; c < 16; ++c) |
| 341 sum += input[r * stride + c]; |
| 342 |
| 343 output[0] = sum >> 1; |
| 344 output[1] = 0; |
| 345 } |
| 346 |
314 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { | 347 void vp9_fdct16x16_c(const int16_t *input, int16_t *output, int stride) { |
315 // The 2D transform is done with two passes which are actually pretty | 348 // The 2D transform is done with two passes which are actually pretty |
316 // similar. In the first one, we transform the columns and transpose | 349 // similar. In the first one, we transform the columns and transpose |
317 // the results. In the second one, we transform the rows. To achieve that, | 350 // the results. In the second one, we transform the rows. To achieve that, |
318 // as the first pass results are transposed, we transpose the columns (that | 351 // as the first pass results are transposed, we transpose the columns (that |
319 // is the transposed rows) and transpose the results (so that it goes back | 352 // is the transposed rows) and transpose the results (so that it goes back |
320 // in normal/row positions). | 353 // in normal/row positions). |
321 int pass; | 354 int pass; |
322 // We need an intermediate buffer between passes. | 355 // We need an intermediate buffer between passes. |
323 int16_t intermediate[256]; | 356 int16_t intermediate[256]; |
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
438 step3[0] = step1[0] + step2[3]; | 471 step3[0] = step1[0] + step2[3]; |
439 step3[1] = step1[1] + step2[2]; | 472 step3[1] = step1[1] + step2[2]; |
440 step3[2] = step1[1] - step2[2]; | 473 step3[2] = step1[1] - step2[2]; |
441 step3[3] = step1[0] - step2[3]; | 474 step3[3] = step1[0] - step2[3]; |
442 step3[4] = step1[7] - step2[4]; | 475 step3[4] = step1[7] - step2[4]; |
443 step3[5] = step1[6] - step2[5]; | 476 step3[5] = step1[6] - step2[5]; |
444 step3[6] = step1[6] + step2[5]; | 477 step3[6] = step1[6] + step2[5]; |
445 step3[7] = step1[7] + step2[4]; | 478 step3[7] = step1[7] + step2[4]; |
446 // step 4 | 479 // step 4 |
447 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; | 480 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; |
448 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; | 481 temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; |
449 step2[1] = fdct_round_shift(temp1); | 482 step2[1] = fdct_round_shift(temp1); |
450 step2[2] = fdct_round_shift(temp2); | 483 step2[2] = fdct_round_shift(temp2); |
451 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; | 484 temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; |
452 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; | 485 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; |
453 step2[5] = fdct_round_shift(temp1); | 486 step2[5] = fdct_round_shift(temp1); |
454 step2[6] = fdct_round_shift(temp2); | 487 step2[6] = fdct_round_shift(temp2); |
455 // step 5 | 488 // step 5 |
456 step1[0] = step3[0] + step2[1]; | 489 step1[0] = step3[0] + step2[1]; |
457 step1[1] = step3[0] - step2[1]; | 490 step1[1] = step3[0] - step2[1]; |
458 step1[2] = step3[3] - step2[2]; | 491 step1[2] = step3[3] + step2[2]; |
459 step1[3] = step3[3] + step2[2]; | 492 step1[3] = step3[3] - step2[2]; |
460 step1[4] = step3[4] + step2[5]; | 493 step1[4] = step3[4] - step2[5]; |
461 step1[5] = step3[4] - step2[5]; | 494 step1[5] = step3[4] + step2[5]; |
462 step1[6] = step3[7] - step2[6]; | 495 step1[6] = step3[7] - step2[6]; |
463 step1[7] = step3[7] + step2[6]; | 496 step1[7] = step3[7] + step2[6]; |
464 // step 6 | 497 // step 6 |
465 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 498 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
466 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 499 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
467 out[1] = fdct_round_shift(temp1); | 500 out[1] = fdct_round_shift(temp1); |
468 out[9] = fdct_round_shift(temp2); | 501 out[9] = fdct_round_shift(temp2); |
469 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 502 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
470 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; | 503 temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; |
471 out[5] = fdct_round_shift(temp1); | 504 out[5] = fdct_round_shift(temp1); |
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
748 step3[1] = step1[1] + step2[2]; | 781 step3[1] = step1[1] + step2[2]; |
749 step3[2] = step1[1] - step2[2]; | 782 step3[2] = step1[1] - step2[2]; |
750 step3[3] = step1[0] - step2[3]; | 783 step3[3] = step1[0] - step2[3]; |
751 step3[4] = step1[7] - step2[4]; | 784 step3[4] = step1[7] - step2[4]; |
752 step3[5] = step1[6] - step2[5]; | 785 step3[5] = step1[6] - step2[5]; |
753 step3[6] = step1[6] + step2[5]; | 786 step3[6] = step1[6] + step2[5]; |
754 step3[7] = step1[7] + step2[4]; | 787 step3[7] = step1[7] + step2[4]; |
755 | 788 |
756 // step 4 | 789 // step 4 |
757 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; | 790 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; |
758 temp2 = step3[2] * -cospi_24_64 - step3[5] * cospi_8_64; | 791 temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; |
759 step2[1] = fdct_round_shift(temp1); | 792 step2[1] = fdct_round_shift(temp1); |
760 step2[2] = fdct_round_shift(temp2); | 793 step2[2] = fdct_round_shift(temp2); |
761 temp1 = step3[2] * -cospi_8_64 + step3[5] * cospi_24_64; | 794 temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; |
762 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; | 795 temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; |
763 step2[5] = fdct_round_shift(temp1); | 796 step2[5] = fdct_round_shift(temp1); |
764 step2[6] = fdct_round_shift(temp2); | 797 step2[6] = fdct_round_shift(temp2); |
765 | 798 |
766 // step 5 | 799 // step 5 |
767 step1[0] = step3[0] + step2[1]; | 800 step1[0] = step3[0] + step2[1]; |
768 step1[1] = step3[0] - step2[1]; | 801 step1[1] = step3[0] - step2[1]; |
769 step1[2] = step3[3] - step2[2]; | 802 step1[2] = step3[3] + step2[2]; |
770 step1[3] = step3[3] + step2[2]; | 803 step1[3] = step3[3] - step2[2]; |
771 step1[4] = step3[4] + step2[5]; | 804 step1[4] = step3[4] - step2[5]; |
772 step1[5] = step3[4] - step2[5]; | 805 step1[5] = step3[4] + step2[5]; |
773 step1[6] = step3[7] - step2[6]; | 806 step1[6] = step3[7] - step2[6]; |
774 step1[7] = step3[7] + step2[6]; | 807 step1[7] = step3[7] + step2[6]; |
775 | 808 |
776 // step 6 | 809 // step 6 |
777 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; | 810 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; |
778 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; | 811 temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; |
779 out[1] = fdct_round_shift(temp1); | 812 out[1] = fdct_round_shift(temp1); |
780 out[9] = fdct_round_shift(temp2); | 813 out[9] = fdct_round_shift(temp2); |
781 | 814 |
782 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; | 815 temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; |
(...skipping 539 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1322 output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); | 1355 output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); |
1323 output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); | 1356 output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); |
1324 output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); | 1357 output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); |
1325 output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); | 1358 output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); |
1326 output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); | 1359 output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); |
1327 output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); | 1360 output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); |
1328 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); | 1361 output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); |
1329 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); | 1362 output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); |
1330 } | 1363 } |
1331 | 1364 |
| 1365 void vp9_fdct32x32_1_c(const int16_t *input, int16_t *output, int stride) { |
| 1366 int r, c; |
| 1367 int16_t sum = 0; |
| 1368 for (r = 0; r < 32; ++r) |
| 1369 for (c = 0; c < 32; ++c) |
| 1370 sum += input[r * stride + c]; |
| 1371 |
| 1372 output[0] = sum >> 3; |
| 1373 output[1] = 0; |
| 1374 } |
| 1375 |
1332 void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { | 1376 void vp9_fdct32x32_c(const int16_t *input, int16_t *out, int stride) { |
1333 int i, j; | 1377 int i, j; |
1334 int output[32 * 32]; | 1378 int output[32 * 32]; |
1335 | 1379 |
1336 // Columns | 1380 // Columns |
1337 for (i = 0; i < 32; ++i) { | 1381 for (i = 0; i < 32; ++i) { |
1338 int temp_in[32], temp_out[32]; | 1382 int temp_in[32], temp_out[32]; |
1339 for (j = 0; j < 32; ++j) | 1383 for (j = 0; j < 32; ++j) |
1340 temp_in[j] = input[j * stride + i] * 4; | 1384 temp_in[j] = input[j * stride + i] * 4; |
1341 fdct32(temp_in, temp_out, 0); | 1385 fdct32(temp_in, temp_out, 0); |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1377 // Rows | 1421 // Rows |
1378 for (i = 0; i < 32; ++i) { | 1422 for (i = 0; i < 32; ++i) { |
1379 int temp_in[32], temp_out[32]; | 1423 int temp_in[32], temp_out[32]; |
1380 for (j = 0; j < 32; ++j) | 1424 for (j = 0; j < 32; ++j) |
1381 temp_in[j] = output[j + i * 32]; | 1425 temp_in[j] = output[j + i * 32]; |
1382 fdct32(temp_in, temp_out, 1); | 1426 fdct32(temp_in, temp_out, 1); |
1383 for (j = 0; j < 32; ++j) | 1427 for (j = 0; j < 32; ++j) |
1384 out[j + i * 32] = temp_out[j]; | 1428 out[j + i * 32] = temp_out[j]; |
1385 } | 1429 } |
1386 } | 1430 } |
OLD | NEW |