| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 // the C-implementation of the transform). | 27 // the C-implementation of the transform). |
| 28 // | 28 // |
| 29 // The particular WRAPLOW implementation below performs strict | 29 // The particular WRAPLOW implementation below performs strict |
| 30 // overflow wrapping to match common hardware implementations. | 30 // overflow wrapping to match common hardware implementations. |
| 31 // bd of 8 uses trans_low with 16bits, need to remove 16bits | 31 // bd of 8 uses trans_low with 16bits, need to remove 16bits |
| 32 // bd of 10 uses trans_low with 18bits, need to remove 14bits | 32 // bd of 10 uses trans_low with 18bits, need to remove 14bits |
| 33 // bd of 12 uses trans_low with 20bits, need to remove 12bits | 33 // bd of 12 uses trans_low with 20bits, need to remove 12bits |
| 34 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits | 34 // bd of x uses trans_low with 8+x bits, need to remove 24-x bits |
| 35 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) | 35 #define WRAPLOW(x, bd) ((((int32_t)(x)) << (24 - bd)) >> (24 - bd)) |
| 36 #else | 36 #else |
| 37 #define WRAPLOW(x, bd) (x) | 37 #define WRAPLOW(x, bd) ((int32_t)(x)) |
| 38 #endif // CONFIG_EMULATE_HARDWARE | 38 #endif // CONFIG_EMULATE_HARDWARE |
| 39 | 39 |
| 40 #if CONFIG_VP9_HIGHBITDEPTH | 40 #if CONFIG_VP9_HIGHBITDEPTH |
| 41 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, | 41 static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, |
| 42 int bd) { | 42 int bd) { |
| 43 trans = WRAPLOW(trans, bd); | 43 trans = WRAPLOW(trans, bd); |
| 44 return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); | 44 return clip_pixel_highbd(WRAPLOW(dest + trans, bd), bd); |
| 45 } | 45 } |
| 46 #endif // CONFIG_VP9_HIGHBITDEPTH | 46 #endif // CONFIG_VP9_HIGHBITDEPTH |
| 47 | 47 |
| (...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 269 for (j = 0; j < 8; ++j) { | 269 for (j = 0; j < 8; ++j) { |
| 270 for (i = 0; i < 8; ++i) | 270 for (i = 0; i < 8; ++i) |
| 271 dest[i] = clip_pixel_add(dest[i], a1); | 271 dest[i] = clip_pixel_add(dest[i], a1); |
| 272 dest += stride; | 272 dest += stride; |
| 273 } | 273 } |
| 274 } | 274 } |
| 275 | 275 |
| 276 static void iadst4(const tran_low_t *input, tran_low_t *output) { | 276 static void iadst4(const tran_low_t *input, tran_low_t *output) { |
| 277 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 277 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
| 278 | 278 |
| 279 tran_high_t x0 = input[0]; | 279 tran_low_t x0 = input[0]; |
| 280 tran_high_t x1 = input[1]; | 280 tran_low_t x1 = input[1]; |
| 281 tran_high_t x2 = input[2]; | 281 tran_low_t x2 = input[2]; |
| 282 tran_high_t x3 = input[3]; | 282 tran_low_t x3 = input[3]; |
| 283 | 283 |
| 284 if (!(x0 | x1 | x2 | x3)) { | 284 if (!(x0 | x1 | x2 | x3)) { |
| 285 output[0] = output[1] = output[2] = output[3] = 0; | 285 output[0] = output[1] = output[2] = output[3] = 0; |
| 286 return; | 286 return; |
| 287 } | 287 } |
| 288 | 288 |
| 289 s0 = sinpi_1_9 * x0; | 289 s0 = sinpi_1_9 * x0; |
| 290 s1 = sinpi_2_9 * x0; | 290 s1 = sinpi_2_9 * x0; |
| 291 s2 = sinpi_3_9 * x1; | 291 s2 = sinpi_3_9 * x1; |
| 292 s3 = sinpi_4_9 * x2; | 292 s3 = sinpi_4_9 * x2; |
| 293 s4 = sinpi_1_9 * x2; | 293 s4 = sinpi_1_9 * x2; |
| 294 s5 = sinpi_2_9 * x3; | 294 s5 = sinpi_2_9 * x3; |
| 295 s6 = sinpi_4_9 * x3; | 295 s6 = sinpi_4_9 * x3; |
| 296 s7 = x0 - x2 + x3; | 296 s7 = x0 - x2 + x3; |
| 297 | 297 |
| 298 x0 = s0 + s3 + s5; | 298 s0 = s0 + s3 + s5; |
| 299 x1 = s1 - s4 - s6; | 299 s1 = s1 - s4 - s6; |
| 300 x2 = sinpi_3_9 * s7; | 300 s3 = s2; |
| 301 x3 = s2; | 301 s2 = sinpi_3_9 * s7; |
| 302 | |
| 303 s0 = x0 + x3; | |
| 304 s1 = x1 + x3; | |
| 305 s2 = x2; | |
| 306 s3 = x0 + x1 - x3; | |
| 307 | 302 |
| 308 // 1-D transform scaling factor is sqrt(2). | 303 // 1-D transform scaling factor is sqrt(2). |
| 309 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 304 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
| 310 // + 1b (addition) = 29b. | 305 // + 1b (addition) = 29b. |
| 311 // Hence the output bit depth is 15b. | 306 // Hence the output bit depth is 15b. |
| 312 output[0] = WRAPLOW(dct_const_round_shift(s0), 8); | 307 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), 8); |
| 313 output[1] = WRAPLOW(dct_const_round_shift(s1), 8); | 308 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), 8); |
| 314 output[2] = WRAPLOW(dct_const_round_shift(s2), 8); | 309 output[2] = WRAPLOW(dct_const_round_shift(s2), 8); |
| 315 output[3] = WRAPLOW(dct_const_round_shift(s3), 8); | 310 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), 8); |
| 316 } | 311 } |
| 317 | 312 |
| 318 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, | 313 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, |
| 319 int tx_type) { | 314 int tx_type) { |
| 320 const transform_2d IHT_4[] = { | 315 const transform_2d IHT_4[] = { |
| 321 { idct4, idct4 }, // DCT_DCT = 0 | 316 { idct4, idct4 }, // DCT_DCT = 0 |
| 322 { iadst4, idct4 }, // ADST_DCT = 1 | 317 { iadst4, idct4 }, // ADST_DCT = 1 |
| 323 { idct4, iadst4 }, // DCT_ADST = 2 | 318 { idct4, iadst4 }, // DCT_ADST = 2 |
| 324 { iadst4, iadst4 } // ADST_ADST = 3 | 319 { iadst4, iadst4 } // ADST_ADST = 3 |
| 325 }; | 320 }; |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 360 tran_high_t x6 = input[1]; | 355 tran_high_t x6 = input[1]; |
| 361 tran_high_t x7 = input[6]; | 356 tran_high_t x7 = input[6]; |
| 362 | 357 |
| 363 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { | 358 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
| 364 output[0] = output[1] = output[2] = output[3] = output[4] | 359 output[0] = output[1] = output[2] = output[3] = output[4] |
| 365 = output[5] = output[6] = output[7] = 0; | 360 = output[5] = output[6] = output[7] = 0; |
| 366 return; | 361 return; |
| 367 } | 362 } |
| 368 | 363 |
| 369 // stage 1 | 364 // stage 1 |
| 370 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 365 s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); |
| 371 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 366 s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); |
| 372 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 367 s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); |
| 373 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 368 s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); |
| 374 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 369 s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); |
| 375 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 370 s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); |
| 376 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 371 s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); |
| 377 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 372 s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); |
| 378 | 373 |
| 379 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); | 374 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), 8); |
| 380 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); | 375 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), 8); |
| 381 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); | 376 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), 8); |
| 382 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); | 377 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), 8); |
| 383 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); | 378 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), 8); |
| 384 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); | 379 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), 8); |
| 385 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); | 380 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), 8); |
| 386 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); | 381 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), 8); |
| 387 | 382 |
| 388 // stage 2 | 383 // stage 2 |
| 389 s0 = x0; | 384 s0 = (int)x0; |
| 390 s1 = x1; | 385 s1 = (int)x1; |
| 391 s2 = x2; | 386 s2 = (int)x2; |
| 392 s3 = x3; | 387 s3 = (int)x3; |
| 393 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 388 s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); |
| 394 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 389 s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); |
| 395 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; | 390 s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); |
| 396 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 391 s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); |
| 397 | 392 |
| 398 x0 = WRAPLOW(s0 + s2, 8); | 393 x0 = WRAPLOW(s0 + s2, 8); |
| 399 x1 = WRAPLOW(s1 + s3, 8); | 394 x1 = WRAPLOW(s1 + s3, 8); |
| 400 x2 = WRAPLOW(s0 - s2, 8); | 395 x2 = WRAPLOW(s0 - s2, 8); |
| 401 x3 = WRAPLOW(s1 - s3, 8); | 396 x3 = WRAPLOW(s1 - s3, 8); |
| 402 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); | 397 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), 8); |
| 403 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); | 398 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), 8); |
| 404 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); | 399 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), 8); |
| 405 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); | 400 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), 8); |
| 406 | 401 |
| 407 // stage 3 | 402 // stage 3 |
| 408 s2 = cospi_16_64 * (x2 + x3); | 403 s2 = (int)(cospi_16_64 * (x2 + x3)); |
| 409 s3 = cospi_16_64 * (x2 - x3); | 404 s3 = (int)(cospi_16_64 * (x2 - x3)); |
| 410 s6 = cospi_16_64 * (x6 + x7); | 405 s6 = (int)(cospi_16_64 * (x6 + x7)); |
| 411 s7 = cospi_16_64 * (x6 - x7); | 406 s7 = (int)(cospi_16_64 * (x6 - x7)); |
| 412 | 407 |
| 413 x2 = WRAPLOW(dct_const_round_shift(s2), 8); | 408 x2 = WRAPLOW(dct_const_round_shift(s2), 8); |
| 414 x3 = WRAPLOW(dct_const_round_shift(s3), 8); | 409 x3 = WRAPLOW(dct_const_round_shift(s3), 8); |
| 415 x6 = WRAPLOW(dct_const_round_shift(s6), 8); | 410 x6 = WRAPLOW(dct_const_round_shift(s6), 8); |
| 416 x7 = WRAPLOW(dct_const_round_shift(s7), 8); | 411 x7 = WRAPLOW(dct_const_round_shift(s7), 8); |
| 417 | 412 |
| 418 output[0] = WRAPLOW(x0, 8); | 413 output[0] = WRAPLOW(x0, 8); |
| 419 output[1] = WRAPLOW(-x4, 8); | 414 output[1] = WRAPLOW(-x4, 8); |
| 420 output[2] = WRAPLOW(x6, 8); | 415 output[2] = WRAPLOW(x6, 8); |
| 421 output[3] = WRAPLOW(-x2, 8); | 416 output[3] = WRAPLOW(-x2, 8); |
| (...skipping 1273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1695 for (j = 0; j < 8; ++j) { | 1690 for (j = 0; j < 8; ++j) { |
| 1696 for (i = 0; i < 8; ++i) | 1691 for (i = 0; i < 8; ++i) |
| 1697 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 1692 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 1698 dest += stride; | 1693 dest += stride; |
| 1699 } | 1694 } |
| 1700 } | 1695 } |
| 1701 | 1696 |
| 1702 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { | 1697 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1703 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1698 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
| 1704 | 1699 |
| 1705 tran_high_t x0 = input[0]; | 1700 tran_low_t x0 = input[0]; |
| 1706 tran_high_t x1 = input[1]; | 1701 tran_low_t x1 = input[1]; |
| 1707 tran_high_t x2 = input[2]; | 1702 tran_low_t x2 = input[2]; |
| 1708 tran_high_t x3 = input[3]; | 1703 tran_low_t x3 = input[3]; |
| 1709 (void) bd; | 1704 (void) bd; |
| 1710 | 1705 |
| 1711 if (!(x0 | x1 | x2 | x3)) { | 1706 if (!(x0 | x1 | x2 | x3)) { |
| 1712 vpx_memset(output, 0, 4 * sizeof(*output)); | 1707 vpx_memset(output, 0, 4 * sizeof(*output)); |
| 1713 return; | 1708 return; |
| 1714 } | 1709 } |
| 1715 | 1710 |
| 1716 s0 = sinpi_1_9 * x0; | 1711 s0 = sinpi_1_9 * x0; |
| 1717 s1 = sinpi_2_9 * x0; | 1712 s1 = sinpi_2_9 * x0; |
| 1718 s2 = sinpi_3_9 * x1; | 1713 s2 = sinpi_3_9 * x1; |
| 1719 s3 = sinpi_4_9 * x2; | 1714 s3 = sinpi_4_9 * x2; |
| 1720 s4 = sinpi_1_9 * x2; | 1715 s4 = sinpi_1_9 * x2; |
| 1721 s5 = sinpi_2_9 * x3; | 1716 s5 = sinpi_2_9 * x3; |
| 1722 s6 = sinpi_4_9 * x3; | 1717 s6 = sinpi_4_9 * x3; |
| 1723 s7 = x0 - x2 + x3; | 1718 s7 = (tran_high_t)(x0 - x2 + x3); |
| 1724 | 1719 |
| 1725 x0 = s0 + s3 + s5; | 1720 s0 = s0 + s3 + s5; |
| 1726 x1 = s1 - s4 - s6; | 1721 s1 = s1 - s4 - s6; |
| 1727 x2 = sinpi_3_9 * s7; | 1722 s3 = s2; |
| 1728 x3 = s2; | 1723 s2 = sinpi_3_9 * s7; |
| 1729 | |
| 1730 s0 = x0 + x3; | |
| 1731 s1 = x1 + x3; | |
| 1732 s2 = x2; | |
| 1733 s3 = x0 + x1 - x3; | |
| 1734 | 1724 |
| 1735 // 1-D transform scaling factor is sqrt(2). | 1725 // 1-D transform scaling factor is sqrt(2). |
| 1736 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1726 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
| 1737 // + 1b (addition) = 29b. | 1727 // + 1b (addition) = 29b. |
| 1738 // Hence the output bit depth is 15b. | 1728 // Hence the output bit depth is 15b. |
| 1739 output[0] = WRAPLOW(dct_const_round_shift(s0), bd); | 1729 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd); |
| 1740 output[1] = WRAPLOW(dct_const_round_shift(s1), bd); | 1730 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd); |
| 1741 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); | 1731 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); |
| 1742 output[3] = WRAPLOW(dct_const_round_shift(s3), bd); | 1732 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); |
| 1743 } | 1733 } |
| 1744 | 1734 |
| 1745 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1735 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1746 int stride, int tx_type, int bd) { | 1736 int stride, int tx_type, int bd) { |
| 1747 const highbd_transform_2d IHT_4[] = { | 1737 const highbd_transform_2d IHT_4[] = { |
| 1748 { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 | 1738 { highbd_idct4, highbd_idct4 }, // DCT_DCT = 0 |
| 1749 { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 | 1739 { highbd_iadst4, highbd_idct4 }, // ADST_DCT = 1 |
| 1750 { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 | 1740 { highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 |
| 1751 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 | 1741 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 |
| 1752 }; | 1742 }; |
| (...skipping 19 matching lines...) Expand all Loading... |
| 1772 for (j = 0; j < 4; ++j) { | 1762 for (j = 0; j < 4; ++j) { |
| 1773 dest[j * stride + i] = highbd_clip_pixel_add( | 1763 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1774 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1764 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
| 1775 } | 1765 } |
| 1776 } | 1766 } |
| 1777 } | 1767 } |
| 1778 | 1768 |
| 1779 static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { | 1769 static void highbd_iadst8(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1780 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1770 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
| 1781 | 1771 |
| 1782 tran_high_t x0 = input[7]; | 1772 tran_low_t x0 = input[7]; |
| 1783 tran_high_t x1 = input[0]; | 1773 tran_low_t x1 = input[0]; |
| 1784 tran_high_t x2 = input[5]; | 1774 tran_low_t x2 = input[5]; |
| 1785 tran_high_t x3 = input[2]; | 1775 tran_low_t x3 = input[2]; |
| 1786 tran_high_t x4 = input[3]; | 1776 tran_low_t x4 = input[3]; |
| 1787 tran_high_t x5 = input[4]; | 1777 tran_low_t x5 = input[4]; |
| 1788 tran_high_t x6 = input[1]; | 1778 tran_low_t x6 = input[1]; |
| 1789 tran_high_t x7 = input[6]; | 1779 tran_low_t x7 = input[6]; |
| 1790 (void) bd; | 1780 (void) bd; |
| 1791 | 1781 |
| 1792 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { | 1782 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
| 1793 vpx_memset(output, 0, 8 * sizeof(*output)); | 1783 vpx_memset(output, 0, 8 * sizeof(*output)); |
| 1794 return; | 1784 return; |
| 1795 } | 1785 } |
| 1796 | 1786 |
| 1797 // stage 1 | 1787 // stage 1 |
| 1798 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 1788 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
| 1799 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 1789 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
| (...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2106 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2096 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2107 } | 2097 } |
| 2108 } | 2098 } |
| 2109 } | 2099 } |
| 2110 | 2100 |
| 2111 static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, | 2101 static void highbd_iadst16(const tran_low_t *input, tran_low_t *output, |
| 2112 int bd) { | 2102 int bd) { |
| 2113 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; | 2103 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; |
| 2114 tran_high_t s9, s10, s11, s12, s13, s14, s15; | 2104 tran_high_t s9, s10, s11, s12, s13, s14, s15; |
| 2115 | 2105 |
| 2116 tran_high_t x0 = input[15]; | 2106 tran_low_t x0 = input[15]; |
| 2117 tran_high_t x1 = input[0]; | 2107 tran_low_t x1 = input[0]; |
| 2118 tran_high_t x2 = input[13]; | 2108 tran_low_t x2 = input[13]; |
| 2119 tran_high_t x3 = input[2]; | 2109 tran_low_t x3 = input[2]; |
| 2120 tran_high_t x4 = input[11]; | 2110 tran_low_t x4 = input[11]; |
| 2121 tran_high_t x5 = input[4]; | 2111 tran_low_t x5 = input[4]; |
| 2122 tran_high_t x6 = input[9]; | 2112 tran_low_t x6 = input[9]; |
| 2123 tran_high_t x7 = input[6]; | 2113 tran_low_t x7 = input[6]; |
| 2124 tran_high_t x8 = input[7]; | 2114 tran_low_t x8 = input[7]; |
| 2125 tran_high_t x9 = input[8]; | 2115 tran_low_t x9 = input[8]; |
| 2126 tran_high_t x10 = input[5]; | 2116 tran_low_t x10 = input[5]; |
| 2127 tran_high_t x11 = input[10]; | 2117 tran_low_t x11 = input[10]; |
| 2128 tran_high_t x12 = input[3]; | 2118 tran_low_t x12 = input[3]; |
| 2129 tran_high_t x13 = input[12]; | 2119 tran_low_t x13 = input[12]; |
| 2130 tran_high_t x14 = input[1]; | 2120 tran_low_t x14 = input[1]; |
| 2131 tran_high_t x15 = input[14]; | 2121 tran_low_t x15 = input[14]; |
| 2132 (void) bd; | 2122 (void) bd; |
| 2133 | 2123 |
| 2134 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | 2124 if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 |
| 2135 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { | 2125 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { |
| 2136 vpx_memset(output, 0, 16 * sizeof(*output)); | 2126 vpx_memset(output, 0, 16 * sizeof(*output)); |
| 2137 return; | 2127 return; |
| 2138 } | 2128 } |
| 2139 | 2129 |
| 2140 // stage 1 | 2130 // stage 1 |
| 2141 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; | 2131 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; |
| (...skipping 751 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2893 | 2883 |
| 2894 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, | 2884 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
| 2895 uint8_t *dest, int stride, int eob, int bd) { | 2885 uint8_t *dest, int stride, int eob, int bd) { |
| 2896 if (tx_type == DCT_DCT) { | 2886 if (tx_type == DCT_DCT) { |
| 2897 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); | 2887 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); |
| 2898 } else { | 2888 } else { |
| 2899 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); | 2889 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); |
| 2900 } | 2890 } |
| 2901 } | 2891 } |
| 2902 #endif // CONFIG_VP9_HIGHBITDEPTH | 2892 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |