OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 16 matching lines...) Expand all Loading... |
27 // the C-implementation of the transform). | 27 // the C-implementation of the transform). |
28 // | 28 // |
29 // The particular WRAPLOW implementation below performs strict | 29 // The particular WRAPLOW implementation below performs strict |
30 // overflow wrapping to match common hardware implementations. | 30 // overflow wrapping to match common hardware implementations. |
31 // bd of 8 uses trans_low with 16bits, need to remove 16bits | 31 // bd of 8 uses trans_low with 16bits, need to remove 16bits |
32 // bd of 10 uses trans_low with 18bits, need to remove 14bits | 32 // bd of 10 uses trans_low with 18bits, need to remove 14bits |
33 // bd of 12 uses trans_low with 20bits, need to remove 12bits | 33 // bd of 12 uses trans_low with 20bits, need to remove 12bits |
34 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits | 34 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits |
35 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) | 35 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) |
36 #else | 36 #else |
37 #define WRAPLOW(x, bd) (x) | 37 #define WRAPLOW(x, bd) ((int32_t)(x)) |
38 #endif // CONFIG_EMULATE_HARDWARE | 38 #endif // CONFIG_EMULATE_HARDWARE |
39 | 39 |
40 #if CONFIG_VP9_HIGHBITDEPTH | 40 #if CONFIG_VP9_HIGHBITDEPTH |
41 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, | 41 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, |
42 int bd) { | 42 int bd) { |
43 trans = WRAPLOW(trans, bd); | 43 trans = WRAPLOW(trans, bd); |
44 return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); | 44 return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); |
45 } | 45 } |
46 #endif // CONFIG_VP9_HIGHBITDEPTH | 46 #endif // CONFIG_VP9_HIGHBITDEPTH |
47 | 47 |
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 for (j = 0; j < 8; ++j) { | 269 for (j = 0; j < 8; ++j) { |
270 for (i = 0; i < 8; ++i) | 270 for (i = 0; i < 8; ++i) |
271 dest[i] = clip_pixel_add(dest[i], a1); | 271 dest[i] = clip_pixel_add(dest[i], a1); |
272 dest += stride; | 272 dest += stride; |
273 } | 273 } |
274 } | 274 } |
275 | 275 |
276 static void iadst4(const tran_low_t *input, tran_low_t *output) { | 276 static void iadst4(const tran_low_t *input, tran_low_t *output) { |
277 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 277 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
278 | 278 |
279 tran_high_t x0 = input[0]; | 279 tran_low_t x0 = input[0]; |
280 tran_high_t x1 = input[1]; | 280 tran_low_t x1 = input[1]; |
281 tran_high_t x2 = input[2]; | 281 tran_low_t x2 = input[2]; |
282 tran_high_t x3 = input[3]; | 282 tran_low_t x3 = input[3]; |
283 | 283 |
284 if (!(x0 | x1 | x2 | x3)) { | 284 if (!(x0 | x1 | x2 | x3)) { |
285 output[0] = output[1] = output[2] = output[3] = 0; | 285 output[0] = output[1] = output[2] = output[3] = 0; |
286 return; | 286 return; |
287 } | 287 } |
288 | 288 |
289 s0 = sinpi_1_9 * x0; | 289 s0 = sinpi_1_9 * x0; |
290 s1 = sinpi_2_9 * x0; | 290 s1 = sinpi_2_9 * x0; |
291 s2 = sinpi_3_9 * x1; | 291 s2 = sinpi_3_9 * x1; |
292 s3 = sinpi_4_9 * x2; | 292 s3 = sinpi_4_9 * x2; |
293 s4 = sinpi_1_9 * x2; | 293 s4 = sinpi_1_9 * x2; |
294 s5 = sinpi_2_9 * x3; | 294 s5 = sinpi_2_9 * x3; |
295 s6 = sinpi_4_9 * x3; | 295 s6 = sinpi_4_9 * x3; |
296 s7 = x0 - x2 + x3; | 296 s7 = x0 - x2 + x3; |
297 | 297 |
298 x0 = s0 + s3 + s5; | 298 s0 = s0 + s3 + s5; |
299 x1 = s1 - s4 - s6; | 299 s1 = s1 - s4 - s6; |
300 x2 = sinpi_3_9 * s7; | 300 s3 = s2; |
301 x3 = s2; | 301 s2 = sinpi_3_9 * s7; |
302 | |
303 s0 = x0 + x3; | |
304 s1 = x1 + x3; | |
305 s2 = x2; | |
306 s3 = x0 + x1 - x3; | |
307 | 302 |
308 // 1-D transform scaling factor is sqrt(2). | 303 // 1-D transform scaling factor is sqrt(2). |
309 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 304 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
310 // + 1b (addition) = 29b. | 305 // + 1b (addition) = 29b. |
311 // Hence the output bit depth is 15b. | 306 // Hence the output bit depth is 15b. |
312 output[0] = WRAPLOW(dct_const_round_shift(s0), 8); | 307 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8); |
313 output[1] = WRAPLOW(dct_const_round_shift(s1), 8); | 308 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8); |
314 output[2] = WRAPLOW(dct_const_round_shift(s2), 8); | 309 output[2] = WRAPLOW(dct_const_round_shift(s2), 8); |
315 output[3] = WRAPLOW(dct_const_round_shift(s3), 8); | 310 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8); |
316 } | 311 } |
317 | 312 |
318 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, | 313 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
319 int tx_type) { | 314 int tx_type) { |
320 const transform_2d IHT_4[] = { | 315 const transform_2d IHT_4[] = { |
321 { idct4, idct4 }, // DCT_DCT = 0 | 316 { idct4, idct4 }, // DCT_DCT = 0 |
322 { iadst4, idct4 }, // ADST_DCT = 1 | 317 { iadst4, idct4 }, // ADST_DCT = 1 |
323 { idct4, iadst4 }, // DCT_ADST = 2 | 318 { idct4, iadst4 }, // DCT_ADST = 2 |
324 { iadst4, iadst4 } // ADST_ADST = 3 | 319 { iadst4, iadst4 } // ADST_ADST = 3 |
325 }; | 320 }; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
360 tran_high_t x6 = input[1]; | 355 tran_high_t x6 = input[1]; |
361 tran_high_t x7 = input[6]; | 356 tran_high_t x7 = input[6]; |
362 | 357 |
363 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { | 358 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
364 output[0] = output[1] = output[2] = output[3] = output[4] | 359 output[0] = output[1] = output[2] = output[3] = output[4] |
365 = output[5] = output[6] = output[7] = 0; | 360 = output[5] = output[6] = output[7] = 0; |
366 return; | 361 return; |
367 } | 362 } |
368 | 363 |
369 // stage 1 | 364 // stage 1 |
370 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 365 s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); |
371 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 366 s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); |
372 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 367 s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); |
373 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 368 s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); |
374 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 369 s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); |
375 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 370 s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); |
376 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 371 s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); |
377 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 372 s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); |
378 | 373 |
379 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); | 374 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); |
380 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); | 375 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); |
381 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); | 376 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); |
382 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); | 377 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); |
383 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); | 378 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); |
384 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); | 379 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); |
385 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); | 380 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); |
386 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); | 381 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); |
387 | 382 |
388 // stage 2 | 383 // stage 2 |
389 s0 = x0; | 384 s0 = (int)x0; |
390 s1 = x1; | 385 s1 = (int)x1; |
391 s2 = x2; | 386 s2 = (int)x2; |
392 s3 = x3; | 387 s3 = (int)x3; |
393 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 388 s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); |
394 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 389 s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); |
395 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; | 390 s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); |
396 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 391 s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); |
397 | 392 |
398 x0 = WRAPLOW(s0 + s2, 8); | 393 x0 = WRAPLOW(s0 + s2, 8); |
399 x1 = WRAPLOW(s1 + s3, 8); | 394 x1 = WRAPLOW(s1 + s3, 8); |
400 x2 = WRAPLOW(s0 - s2, 8); | 395 x2 = WRAPLOW(s0 - s2, 8); |
401 x3 = WRAPLOW(s1 - s3, 8); | 396 x3 = WRAPLOW(s1 - s3, 8); |
402 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); | 397 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); |
403 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); | 398 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); |
404 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); | 399 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); |
405 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); | 400 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); |
406 | 401 |
407 // stage 3 | 402 // stage 3 |
408 s2 = cospi_16_64 * (x2 + x3); | 403 s2 = (int)(cospi_16_64 * (x2 + x3)); |
409 s3 = cospi_16_64 * (x2 - x3); | 404 s3 = (int)(cospi_16_64 * (x2 - x3)); |
410 s6 = cospi_16_64 * (x6 + x7); | 405 s6 = (int)(cospi_16_64 * (x6 + x7)); |
411 s7 = cospi_16_64 * (x6 - x7); | 406 s7 = (int)(cospi_16_64 * (x6 - x7)); |
412 | 407 |
413 x2 = WRAPLOW(dct_const_round_shift(s2), 8); | 408 x2 = WRAPLOW(dct_const_round_shift(s2), 8); |
414 x3 = WRAPLOW(dct_const_round_shift(s3), 8); | 409 x3 = WRAPLOW(dct_const_round_shift(s3), 8); |
415 x6 = WRAPLOW(dct_const_round_shift(s6), 8); | 410 x6 = WRAPLOW(dct_const_round_shift(s6), 8); |
416 x7 = WRAPLOW(dct_const_round_shift(s7), 8); | 411 x7 = WRAPLOW(dct_const_round_shift(s7), 8); |
417 | 412 |
418 output[0] = WRAPLOW(x0, 8); | 413 output[0] = WRAPLOW(x0, 8); |
419 output[1] = WRAPLOW(-x4, 8); | 414 output[1] = WRAPLOW(-x4, 8); |
420 output[2] = WRAPLOW(x6, 8); | 415 output[2] = WRAPLOW(x6, 8); |
421 output[3] = WRAPLOW(-x2, 8); | 416 output[3] = WRAPLOW(-x2, 8); |
(...skipping 1273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1695 for (j = 0; j < 8; ++j) { | 1690 for (j = 0; j < 8; ++j) { |
1696 for (i = 0; i < 8; ++i) | 1691 for (i = 0; i < 8; ++i) |
1697 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 1692 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
1698 dest += stride; | 1693 dest += stride; |
1699 } | 1694 } |
1700 } | 1695 } |
1701 | 1696 |
1702 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { | 1697 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { |
1703 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1698 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1704 | 1699 |
1705 tran_high_t x0 = input[0]; | 1700 tran_low_t x0 = input[0]; |
1706 tran_high_t x1 = input[1]; | 1701 tran_low_t x1 = input[1]; |
1707 tran_high_t x2 = input[2]; | 1702 tran_low_t x2 = input[2]; |
1708 tran_high_t x3 = input[3]; | 1703 tran_low_t x3 = input[3]; |
1709 (void) bd; | 1704 (void) bd; |
1710 | 1705 |
1711 if (!(x0 | x1 | x2 | x3)) { | 1706 if (!(x0 | x1 | x2 | x3)) { |
1712 vpx_memset(output, 0, 4 * sizeof(*output)); | 1707 vpx_memset(output, 0, 4 * sizeof(*output)); |
1713 return; | 1708 return; |
1714 } | 1709 } |
1715 | 1710 |
1716 s0 = sinpi_1_9 * x0; | 1711 s0 = sinpi_1_9 * x0; |
1717 s1 = sinpi_2_9 * x0; | 1712 s1 = sinpi_2_9 * x0; |
1718 s2 = sinpi_3_9 * x1; | 1713 s2 = sinpi_3_9 * x1; |
1719 s3 = sinpi_4_9 * x2; | 1714 s3 = sinpi_4_9 * x2; |
1720 s4 = sinpi_1_9 * x2; | 1715 s4 = sinpi_1_9 * x2; |
1721 s5 = sinpi_2_9 * x3; | 1716 s5 = sinpi_2_9 * x3; |
1722 s6 = sinpi_4_9 * x3; | 1717 s6 = sinpi_4_9 * x3; |
1723 s7 = x0 - x2 + x3; | 1718 s7 = (tran_high_t)(x0 - x2 + x3); |
1724 | 1719 |
1725 x0 = s0 + s3 + s5; | 1720 s0 = s0 + s3 + s5; |
1726 x1 = s1 - s4 - s6; | 1721 s1 = s1 - s4 - s6; |
1727 x2 = sinpi_3_9 * s7; | 1722 s3 = s2; |
1728 x3 = s2; | 1723 s2 = sinpi_3_9 * s7; |
1729 | |
1730 s0 = x0 + x3; | |
1731 s1 = x1 + x3; | |
1732 s2 = x2; | |
1733 s3 = x0 + x1 - x3; | |
1734 | 1724 |
1735 // 1-D transform scaling factor is sqrt(2). | 1725 // 1-D transform scaling factor is sqrt(2). |
1736 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1726 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
1737 // + 1b (addition) = 29b. | 1727 // + 1b (addition) = 29b. |
1738 // Hence the output bit depth is 15b. | 1728 // Hence the output bit depth is 15b. |
1739 output[0] = WRAPLOW(dct_const_round_shift(s0), bd); | 1729 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd); |
1740 output[1] = WRAPLOW(dct_const_round_shift(s1), bd); | 1730 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd); |
1741 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); | 1731 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); |
1742 output[3] = WRAPLOW(dct_const_round_shift(s3), bd); | 1732 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); |
1743 } | 1733 } |
1744 | 1734 |
1745 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1735 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1746 int stride, int tx_type, int bd) { | 1736 int stride, int tx_type, int bd) { |
1747 const highbd_transform_2d IHT_4[] = { | 1737 const highbd_transform_2d IHT_4[] = { |
1748 { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 | 1738 { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 |
1749 { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 | 1739 { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 |
1750 { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 | 1740 { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 |
1751 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 | 1741 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 |
1752 }; | 1742 }; |
(...skipping 19 matching lines...) Expand all Loading... |
1772 for (j = 0; j < 4; ++j) { | 1762 for (j = 0; j < 4; ++j) { |
1773 dest[j * stride + i] = highbd_clip_pixel_add( | 1763 dest[j * stride + i] = highbd_clip_pixel_add( |
1774 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1764 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
1775 } | 1765 } |
1776 } | 1766 } |
1777 } | 1767 } |
1778 | 1768 |
1779 static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { | 1769 static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { |
1780 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1770 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
1781 | 1771 |
1782 tran_high_t x0 = input[7]; | 1772 tran_low_t x0 = input[7]; |
1783 tran_high_t x1 = input[0]; | 1773 tran_low_t x1 = input[0]; |
1784 tran_high_t x2 = input[5]; | 1774 tran_low_t x2 = input[5]; |
1785 tran_high_t x3 = input[2]; | 1775 tran_low_t x3 = input[2]; |
1786 tran_high_t x4 = input[3]; | 1776 tran_low_t x4 = input[3]; |
1787 tran_high_t x5 = input[4]; | 1777 tran_low_t x5 = input[4]; |
1788 tran_high_t x6 = input[1]; | 1778 tran_low_t x6 = input[1]; |
1789 tran_high_t x7 = input[6]; | 1779 tran_low_t x7 = input[6]; |
1790 (void) bd; | 1780 (void) bd; |
1791 | 1781 |
1792 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { | 1782 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
1793 vpx_memset(output, 0, 8 * sizeof(*output)); | 1783 vpx_memset(output, 0, 8 * sizeof(*output)); |
1794 return; | 1784 return; |
1795 } | 1785 } |
1796 | 1786 |
1797 // stage 1 | 1787 // stage 1 |
1798 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 1788 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
1799 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 1789 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2106 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2096 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2107 } | 2097 } |
2108 } | 2098 } |
2109 } | 2099 } |
2110 | 2100 |
2111 static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, | 2101 static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, |
2112 int bd) { | 2102 int bd) { |
2113 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 2103 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
2114 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 2104 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
2115 | 2105 |
2116 tran_high_t x0 = input[15]; | 2106 tran_low_t x0 = input[15]; |
2117 tran_high_t x1 = input[0]; | 2107 tran_low_t x1 = input[0]; |
2118 tran_high_t x2 = input[13]; | 2108 tran_low_t x2 = input[13]; |
2119 tran_high_t x3 = input[2]; | 2109 tran_low_t x3 = input[2]; |
2120 tran_high_t x4 = input[11]; | 2110 tran_low_t x4 = input[11]; |
2121 tran_high_t x5 = input[4]; | 2111 tran_low_t x5 = input[4]; |
2122 tran_high_t x6 = input[9]; | 2112 tran_low_t x6 = input[9]; |
2123 tran_high_t x7 = input[6]; | 2113 tran_low_t x7 = input[6]; |
2124 tran_high_t x8 = input[7]; | 2114 tran_low_t x8 = input[7]; |
2125 tran_high_t x9 = input[8]; | 2115 tran_low_t x9 = input[8]; |
2126 tran_high_t x10 = input[5]; | 2116 tran_low_t x10 = input[5]; |
2127 tran_high_t x11 = input[10]; | 2117 tran_low_t x11 = input[10]; |
2128 tran_high_t x12 = input[3]; | 2118 tran_low_t x12 = input[3]; |
2129 tran_high_t x13 = input[12]; | 2119 tran_low_t x13 = input[12]; |
2130 tran_high_t x14 = input[1]; | 2120 tran_low_t x14 = input[1]; |
2131 tran_high_t x15 = input[14]; | 2121 tran_low_t x15 = input[14]; |
2132 (void) bd; | 2122 (void) bd; |
2133 | 2123 |
2134 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | 2124 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 |
2135 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { | 2125 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { |
2136 vpx_memset(output, 0, 16 * sizeof(*output)); | 2126 vpx_memset(output, 0, 16 * sizeof(*output)); |
2137 return; | 2127 return; |
2138 } | 2128 } |
2139 | 2129 |
2140 // stage 1 | 2130 // stage 1 |
2141 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; | 2131 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; |
(...skipping 751 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2893 | 2883 |
2894 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, | 2884 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
2895 uint8_t *dest, int stride, int eob, int bd) { | 2885 uint8_t *dest, int stride, int eob, int bd) { |
2896 if (tx_type == DCT_DCT) { | 2886 if (tx_type == DCT_DCT) { |
2897 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); | 2887 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); |
2898 } else { | 2888 } else { |
2899 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); | 2889 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); |
2900 } | 2890 } |
2901 } | 2891 } |
2902 #endif // CONFIG_VP9_HIGHBITDEPTH | 2892 #endif // CONFIG_VP9_HIGHBITDEPTH |
OLD | NEW |