OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 1499 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1510 } | 1510 } |
1511 } | 1511 } |
1512 | 1512 |
1513 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { | 1513 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { |
1514 tran_low_t step[4]; | 1514 tran_low_t step[4]; |
1515 tran_high_t temp1, temp2; | 1515 tran_high_t temp1, temp2; |
1516 (void) bd; | 1516 (void) bd; |
1517 // stage 1 | 1517 // stage 1 |
1518 temp1 = (input[0] + input[2]) * cospi_16_64; | 1518 temp1 = (input[0] + input[2]) * cospi_16_64; |
1519 temp2 = (input[0] - input[2]) * cospi_16_64; | 1519 temp2 = (input[0] - input[2]) * cospi_16_64; |
1520 step[0] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1520 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1521 step[1] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1521 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1522 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 1522 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
1523 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 1523 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
1524 step[2] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1524 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1525 step[3] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1525 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1526 | 1526 |
1527 // stage 2 | 1527 // stage 2 |
1528 output[0] = WRAPLOW(step[0] + step[3], bd); | 1528 output[0] = WRAPLOW(step[0] + step[3], bd); |
1529 output[1] = WRAPLOW(step[1] + step[2], bd); | 1529 output[1] = WRAPLOW(step[1] + step[2], bd); |
1530 output[2] = WRAPLOW(step[1] - step[2], bd); | 1530 output[2] = WRAPLOW(step[1] - step[2], bd); |
1531 output[3] = WRAPLOW(step[0] - step[3], bd); | 1531 output[3] = WRAPLOW(step[0] - step[3], bd); |
1532 } | 1532 } |
1533 | 1533 |
1534 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1534 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1535 int stride, int bd) { | 1535 int stride, int bd) { |
(...skipping 19 matching lines...) Expand all Loading... |
1555 dest[j * stride + i] = highbd_clip_pixel_add( | 1555 dest[j * stride + i] = highbd_clip_pixel_add( |
1556 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1556 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
1557 } | 1557 } |
1558 } | 1558 } |
1559 } | 1559 } |
1560 | 1560 |
1561 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1561 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, |
1562 int dest_stride, int bd) { | 1562 int dest_stride, int bd) { |
1563 int i; | 1563 int i; |
1564 tran_high_t a1; | 1564 tran_high_t a1; |
1565 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 1565 tran_low_t out = WRAPLOW( |
| 1566 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
1566 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1567 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1567 | 1568 |
1568 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 1569 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
1569 a1 = ROUND_POWER_OF_TWO(out, 4); | 1570 a1 = ROUND_POWER_OF_TWO(out, 4); |
1570 | 1571 |
1571 for (i = 0; i < 4; i++) { | 1572 for (i = 0; i < 4; i++) { |
1572 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); | 1573 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); |
1573 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); | 1574 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); |
1574 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); | 1575 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); |
1575 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); | 1576 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); |
1576 dest += dest_stride; | 1577 dest += dest_stride; |
1577 } | 1578 } |
1578 } | 1579 } |
1579 | 1580 |
1580 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { | 1581 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { |
1581 tran_low_t step1[8], step2[8]; | 1582 tran_low_t step1[8], step2[8]; |
1582 tran_high_t temp1, temp2; | 1583 tran_high_t temp1, temp2; |
1583 // stage 1 | 1584 // stage 1 |
1584 step1[0] = input[0]; | 1585 step1[0] = input[0]; |
1585 step1[2] = input[4]; | 1586 step1[2] = input[4]; |
1586 step1[1] = input[2]; | 1587 step1[1] = input[2]; |
1587 step1[3] = input[6]; | 1588 step1[3] = input[6]; |
1588 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 1589 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
1589 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 1590 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
1590 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1591 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1591 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1592 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1592 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 1593 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
1593 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 1594 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
1594 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1595 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1595 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1596 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1596 | 1597 |
1597 // stage 2 & stage 3 - even half | 1598 // stage 2 & stage 3 - even half |
1598 vp9_highbd_idct4(step1, step1, bd); | 1599 vp9_highbd_idct4(step1, step1, bd); |
1599 | 1600 |
1600 // stage 2 - odd half | 1601 // stage 2 - odd half |
1601 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 1602 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
1602 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 1603 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
1603 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 1604 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
1604 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 1605 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
1605 | 1606 |
1606 // stage 3 - odd half | 1607 // stage 3 - odd half |
1607 step1[4] = step2[4]; | 1608 step1[4] = step2[4]; |
1608 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1609 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
1609 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1610 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
1610 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1611 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1611 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1612 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1612 step1[7] = step2[7]; | 1613 step1[7] = step2[7]; |
1613 | 1614 |
1614 // stage 4 | 1615 // stage 4 |
1615 output[0] = WRAPLOW(step1[0] + step1[7], bd); | 1616 output[0] = WRAPLOW(step1[0] + step1[7], bd); |
1616 output[1] = WRAPLOW(step1[1] + step1[6], bd); | 1617 output[1] = WRAPLOW(step1[1] + step1[6], bd); |
1617 output[2] = WRAPLOW(step1[2] + step1[5], bd); | 1618 output[2] = WRAPLOW(step1[2] + step1[5], bd); |
1618 output[3] = WRAPLOW(step1[3] + step1[4], bd); | 1619 output[3] = WRAPLOW(step1[3] + step1[4], bd); |
1619 output[4] = WRAPLOW(step1[3] - step1[4], bd); | 1620 output[4] = WRAPLOW(step1[3] - step1[4], bd); |
1620 output[5] = WRAPLOW(step1[2] - step1[5], bd); | 1621 output[5] = WRAPLOW(step1[2] - step1[5], bd); |
1621 output[6] = WRAPLOW(step1[1] - step1[6], bd); | 1622 output[6] = WRAPLOW(step1[1] - step1[6], bd); |
(...skipping 24 matching lines...) Expand all Loading... |
1646 dest[j * stride + i] = highbd_clip_pixel_add( | 1647 dest[j * stride + i] = highbd_clip_pixel_add( |
1647 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1648 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
1648 } | 1649 } |
1649 } | 1650 } |
1650 } | 1651 } |
1651 | 1652 |
1652 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1653 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, |
1653 int stride, int bd) { | 1654 int stride, int bd) { |
1654 int i, j; | 1655 int i, j; |
1655 tran_high_t a1; | 1656 tran_high_t a1; |
1656 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 1657 tran_low_t out = WRAPLOW( |
| 1658 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
1657 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1659 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
1658 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 1660 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
1659 a1 = ROUND_POWER_OF_TWO(out, 5); | 1661 a1 = ROUND_POWER_OF_TWO(out, 5); |
1660 for (j = 0; j < 8; ++j) { | 1662 for (j = 0; j < 8; ++j) { |
1661 for (i = 0; i < 8; ++i) | 1663 for (i = 0; i < 8; ++i) |
1662 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 1664 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
1663 dest += stride; | 1665 dest += stride; |
1664 } | 1666 } |
1665 } | 1667 } |
1666 | 1668 |
1667 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { | 1669 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { |
1668 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1670 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
(...skipping 20 matching lines...) Expand all Loading... |
1689 | 1691 |
1690 s0 = s0 + s3 + s5; | 1692 s0 = s0 + s3 + s5; |
1691 s1 = s1 - s4 - s6; | 1693 s1 = s1 - s4 - s6; |
1692 s3 = s2; | 1694 s3 = s2; |
1693 s2 = sinpi_3_9 * s7; | 1695 s2 = sinpi_3_9 * s7; |
1694 | 1696 |
1695 // 1-D transform scaling factor is sqrt(2). | 1697 // 1-D transform scaling factor is sqrt(2). |
1696 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1698 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
1697 // + 1b (addition) = 29b. | 1699 // + 1b (addition) = 29b. |
1698 // Hence the output bit depth is 15b. | 1700 // Hence the output bit depth is 15b. |
1699 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd); | 1701 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); |
1700 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd); | 1702 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); |
1701 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); | 1703 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
1702 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); | 1704 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); |
1703 } | 1705 } |
1704 | 1706 |
1705 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1707 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
1706 int stride, int tx_type, int bd) { | 1708 int stride, int tx_type, int bd) { |
1707 const highbd_transform_2d IHT_4[] = { | 1709 const highbd_transform_2d IHT_4[] = { |
1708 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0 | 1710 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0 |
1709 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1 | 1711 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1 |
1710 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 | 1712 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 |
1711 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 | 1713 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 |
1712 }; | 1714 }; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1757 // stage 1 | 1759 // stage 1 |
1758 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 1760 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
1759 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 1761 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
1760 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 1762 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
1761 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 1763 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
1762 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 1764 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
1763 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 1765 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
1764 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 1766 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
1765 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 1767 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
1766 | 1768 |
1767 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), bd); | 1769 x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s4, bd), bd); |
1768 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), bd); | 1770 x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s5, bd), bd); |
1769 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), bd); | 1771 x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s6, bd), bd); |
1770 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), bd); | 1772 x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s7, bd), bd); |
1771 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), bd); | 1773 x4 = WRAPLOW(highbd_dct_const_round_shift(s0 - s4, bd), bd); |
1772 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), bd); | 1774 x5 = WRAPLOW(highbd_dct_const_round_shift(s1 - s5, bd), bd); |
1773 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), bd); | 1775 x6 = WRAPLOW(highbd_dct_const_round_shift(s2 - s6, bd), bd); |
1774 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), bd); | 1776 x7 = WRAPLOW(highbd_dct_const_round_shift(s3 - s7, bd), bd); |
1775 | 1777 |
1776 // stage 2 | 1778 // stage 2 |
1777 s0 = x0; | 1779 s0 = x0; |
1778 s1 = x1; | 1780 s1 = x1; |
1779 s2 = x2; | 1781 s2 = x2; |
1780 s3 = x3; | 1782 s3 = x3; |
1781 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 1783 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
1782 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 1784 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
1783 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; | 1785 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; |
1784 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 1786 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
1785 | 1787 |
1786 x0 = WRAPLOW(s0 + s2, bd); | 1788 x0 = WRAPLOW(s0 + s2, bd); |
1787 x1 = WRAPLOW(s1 + s3, bd); | 1789 x1 = WRAPLOW(s1 + s3, bd); |
1788 x2 = WRAPLOW(s0 - s2, bd); | 1790 x2 = WRAPLOW(s0 - s2, bd); |
1789 x3 = WRAPLOW(s1 - s3, bd); | 1791 x3 = WRAPLOW(s1 - s3, bd); |
1790 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); | 1792 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); |
1791 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); | 1793 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); |
1792 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); | 1794 x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); |
1793 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); | 1795 x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); |
1794 | 1796 |
1795 // stage 3 | 1797 // stage 3 |
1796 s2 = cospi_16_64 * (x2 + x3); | 1798 s2 = cospi_16_64 * (x2 + x3); |
1797 s3 = cospi_16_64 * (x2 - x3); | 1799 s3 = cospi_16_64 * (x2 - x3); |
1798 s6 = cospi_16_64 * (x6 + x7); | 1800 s6 = cospi_16_64 * (x6 + x7); |
1799 s7 = cospi_16_64 * (x6 - x7); | 1801 s7 = cospi_16_64 * (x6 - x7); |
1800 | 1802 |
1801 x2 = WRAPLOW(dct_const_round_shift(s2), bd); | 1803 x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
1802 x3 = WRAPLOW(dct_const_round_shift(s3), bd); | 1804 x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); |
1803 x6 = WRAPLOW(dct_const_round_shift(s6), bd); | 1805 x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); |
1804 x7 = WRAPLOW(dct_const_round_shift(s7), bd); | 1806 x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); |
1805 | 1807 |
1806 output[0] = WRAPLOW(x0, bd); | 1808 output[0] = WRAPLOW(x0, bd); |
1807 output[1] = WRAPLOW(-x4, bd); | 1809 output[1] = WRAPLOW(-x4, bd); |
1808 output[2] = WRAPLOW(x6, bd); | 1810 output[2] = WRAPLOW(x6, bd); |
1809 output[3] = WRAPLOW(-x2, bd); | 1811 output[3] = WRAPLOW(-x2, bd); |
1810 output[4] = WRAPLOW(x3, bd); | 1812 output[4] = WRAPLOW(x3, bd); |
1811 output[5] = WRAPLOW(-x7, bd); | 1813 output[5] = WRAPLOW(-x7, bd); |
1812 output[6] = WRAPLOW(x5, bd); | 1814 output[6] = WRAPLOW(x5, bd); |
1813 output[7] = WRAPLOW(-x1, bd); | 1815 output[7] = WRAPLOW(-x1, bd); |
1814 } | 1816 } |
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1903 step2[1] = step1[1]; | 1905 step2[1] = step1[1]; |
1904 step2[2] = step1[2]; | 1906 step2[2] = step1[2]; |
1905 step2[3] = step1[3]; | 1907 step2[3] = step1[3]; |
1906 step2[4] = step1[4]; | 1908 step2[4] = step1[4]; |
1907 step2[5] = step1[5]; | 1909 step2[5] = step1[5]; |
1908 step2[6] = step1[6]; | 1910 step2[6] = step1[6]; |
1909 step2[7] = step1[7]; | 1911 step2[7] = step1[7]; |
1910 | 1912 |
1911 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 1913 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
1912 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 1914 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
1913 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1915 step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1914 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1916 step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1915 | 1917 |
1916 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 1918 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
1917 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 1919 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
1918 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1920 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1919 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1921 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1920 | 1922 |
1921 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 1923 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
1922 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 1924 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
1923 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1925 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1924 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1926 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1925 | 1927 |
1926 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 1928 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
1927 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 1929 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
1928 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1930 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1929 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1931 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1930 | 1932 |
1931 // stage 3 | 1933 // stage 3 |
1932 step1[0] = step2[0]; | 1934 step1[0] = step2[0]; |
1933 step1[1] = step2[1]; | 1935 step1[1] = step2[1]; |
1934 step1[2] = step2[2]; | 1936 step1[2] = step2[2]; |
1935 step1[3] = step2[3]; | 1937 step1[3] = step2[3]; |
1936 | 1938 |
1937 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 1939 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
1938 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 1940 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
1939 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1941 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1940 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1942 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1941 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 1943 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
1942 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 1944 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
1943 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1945 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1944 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1946 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1945 | 1947 |
1946 step1[8] = WRAPLOW(step2[8] + step2[9], bd); | 1948 step1[8] = WRAPLOW(step2[8] + step2[9], bd); |
1947 step1[9] = WRAPLOW(step2[8] - step2[9], bd); | 1949 step1[9] = WRAPLOW(step2[8] - step2[9], bd); |
1948 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); | 1950 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); |
1949 step1[11] = WRAPLOW(step2[10] + step2[11], bd); | 1951 step1[11] = WRAPLOW(step2[10] + step2[11], bd); |
1950 step1[12] = WRAPLOW(step2[12] + step2[13], bd); | 1952 step1[12] = WRAPLOW(step2[12] + step2[13], bd); |
1951 step1[13] = WRAPLOW(step2[12] - step2[13], bd); | 1953 step1[13] = WRAPLOW(step2[12] - step2[13], bd); |
1952 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); | 1954 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); |
1953 step1[15] = WRAPLOW(step2[14] + step2[15], bd); | 1955 step1[15] = WRAPLOW(step2[14] + step2[15], bd); |
1954 | 1956 |
1955 // stage 4 | 1957 // stage 4 |
1956 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 1958 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
1957 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 1959 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
1958 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1960 step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1959 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1961 step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1960 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 1962 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
1961 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 1963 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
1962 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1964 step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1963 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1965 step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1964 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 1966 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
1965 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 1967 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
1966 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 1968 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
1967 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 1969 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
1968 | 1970 |
1969 step2[8] = step1[8]; | 1971 step2[8] = step1[8]; |
1970 step2[15] = step1[15]; | 1972 step2[15] = step1[15]; |
1971 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 1973 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
1972 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 1974 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
1973 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1975 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1974 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1976 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1975 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 1977 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
1976 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 1978 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
1977 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1979 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1978 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1980 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1979 step2[11] = step1[11]; | 1981 step2[11] = step1[11]; |
1980 step2[12] = step1[12]; | 1982 step2[12] = step1[12]; |
1981 | 1983 |
1982 // stage 5 | 1984 // stage 5 |
1983 step1[0] = WRAPLOW(step2[0] + step2[3], bd); | 1985 step1[0] = WRAPLOW(step2[0] + step2[3], bd); |
1984 step1[1] = WRAPLOW(step2[1] + step2[2], bd); | 1986 step1[1] = WRAPLOW(step2[1] + step2[2], bd); |
1985 step1[2] = WRAPLOW(step2[1] - step2[2], bd); | 1987 step1[2] = WRAPLOW(step2[1] - step2[2], bd); |
1986 step1[3] = WRAPLOW(step2[0] - step2[3], bd); | 1988 step1[3] = WRAPLOW(step2[0] - step2[3], bd); |
1987 step1[4] = step2[4]; | 1989 step1[4] = step2[4]; |
1988 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1990 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
1989 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1991 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
1990 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1992 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
1991 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1993 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
1992 step1[7] = step2[7]; | 1994 step1[7] = step2[7]; |
1993 | 1995 |
1994 step1[8] = WRAPLOW(step2[8] + step2[11], bd); | 1996 step1[8] = WRAPLOW(step2[8] + step2[11], bd); |
1995 step1[9] = WRAPLOW(step2[9] + step2[10], bd); | 1997 step1[9] = WRAPLOW(step2[9] + step2[10], bd); |
1996 step1[10] = WRAPLOW(step2[9] - step2[10], bd); | 1998 step1[10] = WRAPLOW(step2[9] - step2[10], bd); |
1997 step1[11] = WRAPLOW(step2[8] - step2[11], bd); | 1999 step1[11] = WRAPLOW(step2[8] - step2[11], bd); |
1998 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); | 2000 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); |
1999 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); | 2001 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); |
2000 step1[14] = WRAPLOW(step2[13] + step2[14], bd); | 2002 step1[14] = WRAPLOW(step2[13] + step2[14], bd); |
2001 step1[15] = WRAPLOW(step2[12] + step2[15], bd); | 2003 step1[15] = WRAPLOW(step2[12] + step2[15], bd); |
2002 | 2004 |
2003 // stage 6 | 2005 // stage 6 |
2004 step2[0] = WRAPLOW(step1[0] + step1[7], bd); | 2006 step2[0] = WRAPLOW(step1[0] + step1[7], bd); |
2005 step2[1] = WRAPLOW(step1[1] + step1[6], bd); | 2007 step2[1] = WRAPLOW(step1[1] + step1[6], bd); |
2006 step2[2] = WRAPLOW(step1[2] + step1[5], bd); | 2008 step2[2] = WRAPLOW(step1[2] + step1[5], bd); |
2007 step2[3] = WRAPLOW(step1[3] + step1[4], bd); | 2009 step2[3] = WRAPLOW(step1[3] + step1[4], bd); |
2008 step2[4] = WRAPLOW(step1[3] - step1[4], bd); | 2010 step2[4] = WRAPLOW(step1[3] - step1[4], bd); |
2009 step2[5] = WRAPLOW(step1[2] - step1[5], bd); | 2011 step2[5] = WRAPLOW(step1[2] - step1[5], bd); |
2010 step2[6] = WRAPLOW(step1[1] - step1[6], bd); | 2012 step2[6] = WRAPLOW(step1[1] - step1[6], bd); |
2011 step2[7] = WRAPLOW(step1[0] - step1[7], bd); | 2013 step2[7] = WRAPLOW(step1[0] - step1[7], bd); |
2012 step2[8] = step1[8]; | 2014 step2[8] = step1[8]; |
2013 step2[9] = step1[9]; | 2015 step2[9] = step1[9]; |
2014 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 2016 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
2015 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 2017 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
2016 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2018 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2017 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2019 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2018 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 2020 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
2019 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 2021 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
2020 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2022 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2021 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2023 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2022 step2[14] = step1[14]; | 2024 step2[14] = step1[14]; |
2023 step2[15] = step1[15]; | 2025 step2[15] = step1[15]; |
2024 | 2026 |
2025 // stage 7 | 2027 // stage 7 |
2026 output[0] = WRAPLOW(step2[0] + step2[15], bd); | 2028 output[0] = WRAPLOW(step2[0] + step2[15], bd); |
2027 output[1] = WRAPLOW(step2[1] + step2[14], bd); | 2029 output[1] = WRAPLOW(step2[1] + step2[14], bd); |
2028 output[2] = WRAPLOW(step2[2] + step2[13], bd); | 2030 output[2] = WRAPLOW(step2[2] + step2[13], bd); |
2029 output[3] = WRAPLOW(step2[3] + step2[12], bd); | 2031 output[3] = WRAPLOW(step2[3] + step2[12], bd); |
2030 output[4] = WRAPLOW(step2[4] + step2[11], bd); | 2032 output[4] = WRAPLOW(step2[4] + step2[11], bd); |
2031 output[5] = WRAPLOW(step2[5] + step2[10], bd); | 2033 output[5] = WRAPLOW(step2[5] + step2[10], bd); |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2108 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; | 2110 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
2109 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; | 2111 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
2110 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; | 2112 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
2111 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; | 2113 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
2112 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; | 2114 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
2113 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; | 2115 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
2114 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; | 2116 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
2115 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; | 2117 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
2116 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; | 2118 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
2117 | 2119 |
2118 x0 = WRAPLOW(dct_const_round_shift(s0 + s8), bd); | 2120 x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s8, bd), bd); |
2119 x1 = WRAPLOW(dct_const_round_shift(s1 + s9), bd); | 2121 x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s9, bd), bd); |
2120 x2 = WRAPLOW(dct_const_round_shift(s2 + s10), bd); | 2122 x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s10, bd), bd); |
2121 x3 = WRAPLOW(dct_const_round_shift(s3 + s11), bd); | 2123 x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s11, bd), bd); |
2122 x4 = WRAPLOW(dct_const_round_shift(s4 + s12), bd); | 2124 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s12, bd), bd); |
2123 x5 = WRAPLOW(dct_const_round_shift(s5 + s13), bd); | 2125 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s13, bd), bd); |
2124 x6 = WRAPLOW(dct_const_round_shift(s6 + s14), bd); | 2126 x6 = WRAPLOW(highbd_dct_const_round_shift(s6 + s14, bd), bd); |
2125 x7 = WRAPLOW(dct_const_round_shift(s7 + s15), bd); | 2127 x7 = WRAPLOW(highbd_dct_const_round_shift(s7 + s15, bd), bd); |
2126 x8 = WRAPLOW(dct_const_round_shift(s0 - s8), bd); | 2128 x8 = WRAPLOW(highbd_dct_const_round_shift(s0 - s8, bd), bd); |
2127 x9 = WRAPLOW(dct_const_round_shift(s1 - s9), bd); | 2129 x9 = WRAPLOW(highbd_dct_const_round_shift(s1 - s9, bd), bd); |
2128 x10 = WRAPLOW(dct_const_round_shift(s2 - s10), bd); | 2130 x10 = WRAPLOW(highbd_dct_const_round_shift(s2 - s10, bd), bd); |
2129 x11 = WRAPLOW(dct_const_round_shift(s3 - s11), bd); | 2131 x11 = WRAPLOW(highbd_dct_const_round_shift(s3 - s11, bd), bd); |
2130 x12 = WRAPLOW(dct_const_round_shift(s4 - s12), bd); | 2132 x12 = WRAPLOW(highbd_dct_const_round_shift(s4 - s12, bd), bd); |
2131 x13 = WRAPLOW(dct_const_round_shift(s5 - s13), bd); | 2133 x13 = WRAPLOW(highbd_dct_const_round_shift(s5 - s13, bd), bd); |
2132 x14 = WRAPLOW(dct_const_round_shift(s6 - s14), bd); | 2134 x14 = WRAPLOW(highbd_dct_const_round_shift(s6 - s14, bd), bd); |
2133 x15 = WRAPLOW(dct_const_round_shift(s7 - s15), bd); | 2135 x15 = WRAPLOW(highbd_dct_const_round_shift(s7 - s15, bd), bd); |
2134 | 2136 |
2135 // stage 2 | 2137 // stage 2 |
2136 s0 = x0; | 2138 s0 = x0; |
2137 s1 = x1; | 2139 s1 = x1; |
2138 s2 = x2; | 2140 s2 = x2; |
2139 s3 = x3; | 2141 s3 = x3; |
2140 s4 = x4; | 2142 s4 = x4; |
2141 s5 = x5; | 2143 s5 = x5; |
2142 s6 = x6; | 2144 s6 = x6; |
2143 s7 = x7; | 2145 s7 = x7; |
2144 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; | 2146 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
2145 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; | 2147 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
2146 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; | 2148 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
2147 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; | 2149 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
2148 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; | 2150 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; |
2149 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; | 2151 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
2150 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; | 2152 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; |
2151 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; | 2153 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
2152 | 2154 |
2153 x0 = WRAPLOW(s0 + s4, bd); | 2155 x0 = WRAPLOW(s0 + s4, bd); |
2154 x1 = WRAPLOW(s1 + s5, bd); | 2156 x1 = WRAPLOW(s1 + s5, bd); |
2155 x2 = WRAPLOW(s2 + s6, bd); | 2157 x2 = WRAPLOW(s2 + s6, bd); |
2156 x3 = WRAPLOW(s3 + s7, bd); | 2158 x3 = WRAPLOW(s3 + s7, bd); |
2157 x4 = WRAPLOW(s0 - s4, bd); | 2159 x4 = WRAPLOW(s0 - s4, bd); |
2158 x5 = WRAPLOW(s1 - s5, bd); | 2160 x5 = WRAPLOW(s1 - s5, bd); |
2159 x6 = WRAPLOW(s2 - s6, bd); | 2161 x6 = WRAPLOW(s2 - s6, bd); |
2160 x7 = WRAPLOW(s3 - s7, bd); | 2162 x7 = WRAPLOW(s3 - s7, bd); |
2161 x8 = WRAPLOW(dct_const_round_shift(s8 + s12), bd); | 2163 x8 = WRAPLOW(highbd_dct_const_round_shift(s8 + s12, bd), bd); |
2162 x9 = WRAPLOW(dct_const_round_shift(s9 + s13), bd); | 2164 x9 = WRAPLOW(highbd_dct_const_round_shift(s9 + s13, bd), bd); |
2163 x10 = WRAPLOW(dct_const_round_shift(s10 + s14), bd); | 2165 x10 = WRAPLOW(highbd_dct_const_round_shift(s10 + s14, bd), bd); |
2164 x11 = WRAPLOW(dct_const_round_shift(s11 + s15), bd); | 2166 x11 = WRAPLOW(highbd_dct_const_round_shift(s11 + s15, bd), bd); |
2165 x12 = WRAPLOW(dct_const_round_shift(s8 - s12), bd); | 2167 x12 = WRAPLOW(highbd_dct_const_round_shift(s8 - s12, bd), bd); |
2166 x13 = WRAPLOW(dct_const_round_shift(s9 - s13), bd); | 2168 x13 = WRAPLOW(highbd_dct_const_round_shift(s9 - s13, bd), bd); |
2167 x14 = WRAPLOW(dct_const_round_shift(s10 - s14), bd); | 2169 x14 = WRAPLOW(highbd_dct_const_round_shift(s10 - s14, bd), bd); |
2168 x15 = WRAPLOW(dct_const_round_shift(s11 - s15), bd); | 2170 x15 = WRAPLOW(highbd_dct_const_round_shift(s11 - s15, bd), bd); |
2169 | 2171 |
2170 // stage 3 | 2172 // stage 3 |
2171 s0 = x0; | 2173 s0 = x0; |
2172 s1 = x1; | 2174 s1 = x1; |
2173 s2 = x2; | 2175 s2 = x2; |
2174 s3 = x3; | 2176 s3 = x3; |
2175 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; | 2177 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
2176 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; | 2178 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
2177 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; | 2179 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; |
2178 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; | 2180 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
2179 s8 = x8; | 2181 s8 = x8; |
2180 s9 = x9; | 2182 s9 = x9; |
2181 s10 = x10; | 2183 s10 = x10; |
2182 s11 = x11; | 2184 s11 = x11; |
2183 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; | 2185 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
2184 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; | 2186 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
2185 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; | 2187 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; |
2186 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; | 2188 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
2187 | 2189 |
2188 x0 = WRAPLOW(s0 + s2, bd); | 2190 x0 = WRAPLOW(s0 + s2, bd); |
2189 x1 = WRAPLOW(s1 + s3, bd); | 2191 x1 = WRAPLOW(s1 + s3, bd); |
2190 x2 = WRAPLOW(s0 - s2, bd); | 2192 x2 = WRAPLOW(s0 - s2, bd); |
2191 x3 = WRAPLOW(s1 - s3, bd); | 2193 x3 = WRAPLOW(s1 - s3, bd); |
2192 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); | 2194 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); |
2193 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); | 2195 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); |
2194 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); | 2196 x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); |
2195 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); | 2197 x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); |
2196 x8 = WRAPLOW(s8 + s10, bd); | 2198 x8 = WRAPLOW(s8 + s10, bd); |
2197 x9 = WRAPLOW(s9 + s11, bd); | 2199 x9 = WRAPLOW(s9 + s11, bd); |
2198 x10 = WRAPLOW(s8 - s10, bd); | 2200 x10 = WRAPLOW(s8 - s10, bd); |
2199 x11 = WRAPLOW(s9 - s11, bd); | 2201 x11 = WRAPLOW(s9 - s11, bd); |
2200 x12 = WRAPLOW(dct_const_round_shift(s12 + s14), bd); | 2202 x12 = WRAPLOW(highbd_dct_const_round_shift(s12 + s14, bd), bd); |
2201 x13 = WRAPLOW(dct_const_round_shift(s13 + s15), bd); | 2203 x13 = WRAPLOW(highbd_dct_const_round_shift(s13 + s15, bd), bd); |
2202 x14 = WRAPLOW(dct_const_round_shift(s12 - s14), bd); | 2204 x14 = WRAPLOW(highbd_dct_const_round_shift(s12 - s14, bd), bd); |
2203 x15 = WRAPLOW(dct_const_round_shift(s13 - s15), bd); | 2205 x15 = WRAPLOW(highbd_dct_const_round_shift(s13 - s15, bd), bd); |
2204 | 2206 |
2205 // stage 4 | 2207 // stage 4 |
2206 s2 = (- cospi_16_64) * (x2 + x3); | 2208 s2 = (- cospi_16_64) * (x2 + x3); |
2207 s3 = cospi_16_64 * (x2 - x3); | 2209 s3 = cospi_16_64 * (x2 - x3); |
2208 s6 = cospi_16_64 * (x6 + x7); | 2210 s6 = cospi_16_64 * (x6 + x7); |
2209 s7 = cospi_16_64 * (-x6 + x7); | 2211 s7 = cospi_16_64 * (-x6 + x7); |
2210 s10 = cospi_16_64 * (x10 + x11); | 2212 s10 = cospi_16_64 * (x10 + x11); |
2211 s11 = cospi_16_64 * (-x10 + x11); | 2213 s11 = cospi_16_64 * (-x10 + x11); |
2212 s14 = (- cospi_16_64) * (x14 + x15); | 2214 s14 = (- cospi_16_64) * (x14 + x15); |
2213 s15 = cospi_16_64 * (x14 - x15); | 2215 s15 = cospi_16_64 * (x14 - x15); |
2214 | 2216 |
2215 x2 = WRAPLOW(dct_const_round_shift(s2), bd); | 2217 x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
2216 x3 = WRAPLOW(dct_const_round_shift(s3), bd); | 2218 x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); |
2217 x6 = WRAPLOW(dct_const_round_shift(s6), bd); | 2219 x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); |
2218 x7 = WRAPLOW(dct_const_round_shift(s7), bd); | 2220 x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); |
2219 x10 = WRAPLOW(dct_const_round_shift(s10), bd); | 2221 x10 = WRAPLOW(highbd_dct_const_round_shift(s10, bd), bd); |
2220 x11 = WRAPLOW(dct_const_round_shift(s11), bd); | 2222 x11 = WRAPLOW(highbd_dct_const_round_shift(s11, bd), bd); |
2221 x14 = WRAPLOW(dct_const_round_shift(s14), bd); | 2223 x14 = WRAPLOW(highbd_dct_const_round_shift(s14, bd), bd); |
2222 x15 = WRAPLOW(dct_const_round_shift(s15), bd); | 2224 x15 = WRAPLOW(highbd_dct_const_round_shift(s15, bd), bd); |
2223 | 2225 |
2224 output[0] = WRAPLOW(x0, bd); | 2226 output[0] = WRAPLOW(x0, bd); |
2225 output[1] = WRAPLOW(-x8, bd); | 2227 output[1] = WRAPLOW(-x8, bd); |
2226 output[2] = WRAPLOW(x12, bd); | 2228 output[2] = WRAPLOW(x12, bd); |
2227 output[3] = WRAPLOW(-x4, bd); | 2229 output[3] = WRAPLOW(-x4, bd); |
2228 output[4] = WRAPLOW(x6, bd); | 2230 output[4] = WRAPLOW(x6, bd); |
2229 output[5] = WRAPLOW(x14, bd); | 2231 output[5] = WRAPLOW(x14, bd); |
2230 output[6] = WRAPLOW(x10, bd); | 2232 output[6] = WRAPLOW(x10, bd); |
2231 output[7] = WRAPLOW(x2, bd); | 2233 output[7] = WRAPLOW(x2, bd); |
2232 output[8] = WRAPLOW(x3, bd); | 2234 output[8] = WRAPLOW(x3, bd); |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2299 dest[j * stride + i] = highbd_clip_pixel_add( | 2301 dest[j * stride + i] = highbd_clip_pixel_add( |
2300 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2302 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
2301 } | 2303 } |
2302 } | 2304 } |
2303 } | 2305 } |
2304 | 2306 |
2305 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2307 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, |
2306 int stride, int bd) { | 2308 int stride, int bd) { |
2307 int i, j; | 2309 int i, j; |
2308 tran_high_t a1; | 2310 tran_high_t a1; |
2309 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 2311 tran_low_t out = WRAPLOW( |
| 2312 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
2310 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2313 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2311 | 2314 |
2312 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 2315 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
2313 a1 = ROUND_POWER_OF_TWO(out, 6); | 2316 a1 = ROUND_POWER_OF_TWO(out, 6); |
2314 for (j = 0; j < 16; ++j) { | 2317 for (j = 0; j < 16; ++j) { |
2315 for (i = 0; i < 16; ++i) | 2318 for (i = 0; i < 16; ++i) |
2316 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2319 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2317 dest += stride; | 2320 dest += stride; |
2318 } | 2321 } |
2319 } | 2322 } |
2320 | 2323 |
2321 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) { | 2324 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) { |
2322 tran_low_t step1[32], step2[32]; | 2325 tran_low_t step1[32], step2[32]; |
(...skipping 13 matching lines...) Expand all Loading... |
2336 step1[9] = input[18]; | 2339 step1[9] = input[18]; |
2337 step1[10] = input[10]; | 2340 step1[10] = input[10]; |
2338 step1[11] = input[26]; | 2341 step1[11] = input[26]; |
2339 step1[12] = input[6]; | 2342 step1[12] = input[6]; |
2340 step1[13] = input[22]; | 2343 step1[13] = input[22]; |
2341 step1[14] = input[14]; | 2344 step1[14] = input[14]; |
2342 step1[15] = input[30]; | 2345 step1[15] = input[30]; |
2343 | 2346 |
2344 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; | 2347 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; |
2345 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; | 2348 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; |
2346 step1[16] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2349 step1[16] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2347 step1[31] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2350 step1[31] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2348 | 2351 |
2349 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; | 2352 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; |
2350 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; | 2353 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; |
2351 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2354 step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2352 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2355 step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2353 | 2356 |
2354 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; | 2357 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; |
2355 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; | 2358 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; |
2356 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2359 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2357 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2360 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2358 | 2361 |
2359 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; | 2362 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; |
2360 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; | 2363 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; |
2361 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2364 step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2362 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2365 step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2363 | 2366 |
2364 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; | 2367 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; |
2365 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; | 2368 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; |
2366 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2369 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2367 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2370 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2368 | 2371 |
2369 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; | 2372 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; |
2370 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; | 2373 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; |
2371 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2374 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2372 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2375 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2373 | 2376 |
2374 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; | 2377 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; |
2375 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; | 2378 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; |
2376 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2379 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2377 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2380 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2378 | 2381 |
2379 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; | 2382 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; |
2380 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; | 2383 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; |
2381 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2384 step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2382 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2385 step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2383 | 2386 |
2384 // stage 2 | 2387 // stage 2 |
2385 step2[0] = step1[0]; | 2388 step2[0] = step1[0]; |
2386 step2[1] = step1[1]; | 2389 step2[1] = step1[1]; |
2387 step2[2] = step1[2]; | 2390 step2[2] = step1[2]; |
2388 step2[3] = step1[3]; | 2391 step2[3] = step1[3]; |
2389 step2[4] = step1[4]; | 2392 step2[4] = step1[4]; |
2390 step2[5] = step1[5]; | 2393 step2[5] = step1[5]; |
2391 step2[6] = step1[6]; | 2394 step2[6] = step1[6]; |
2392 step2[7] = step1[7]; | 2395 step2[7] = step1[7]; |
2393 | 2396 |
2394 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 2397 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
2395 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 2398 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
2396 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2399 step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2397 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2400 step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2398 | 2401 |
2399 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 2402 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
2400 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 2403 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
2401 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2404 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2402 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2405 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2403 | 2406 |
2404 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 2407 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
2405 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 2408 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
2406 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2409 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2407 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2410 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2408 | 2411 |
2409 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 2412 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
2410 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 2413 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
2411 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2414 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2412 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2415 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2413 | 2416 |
2414 step2[16] = WRAPLOW(step1[16] + step1[17], bd); | 2417 step2[16] = WRAPLOW(step1[16] + step1[17], bd); |
2415 step2[17] = WRAPLOW(step1[16] - step1[17], bd); | 2418 step2[17] = WRAPLOW(step1[16] - step1[17], bd); |
2416 step2[18] = WRAPLOW(-step1[18] + step1[19], bd); | 2419 step2[18] = WRAPLOW(-step1[18] + step1[19], bd); |
2417 step2[19] = WRAPLOW(step1[18] + step1[19], bd); | 2420 step2[19] = WRAPLOW(step1[18] + step1[19], bd); |
2418 step2[20] = WRAPLOW(step1[20] + step1[21], bd); | 2421 step2[20] = WRAPLOW(step1[20] + step1[21], bd); |
2419 step2[21] = WRAPLOW(step1[20] - step1[21], bd); | 2422 step2[21] = WRAPLOW(step1[20] - step1[21], bd); |
2420 step2[22] = WRAPLOW(-step1[22] + step1[23], bd); | 2423 step2[22] = WRAPLOW(-step1[22] + step1[23], bd); |
2421 step2[23] = WRAPLOW(step1[22] + step1[23], bd); | 2424 step2[23] = WRAPLOW(step1[22] + step1[23], bd); |
2422 step2[24] = WRAPLOW(step1[24] + step1[25], bd); | 2425 step2[24] = WRAPLOW(step1[24] + step1[25], bd); |
2423 step2[25] = WRAPLOW(step1[24] - step1[25], bd); | 2426 step2[25] = WRAPLOW(step1[24] - step1[25], bd); |
2424 step2[26] = WRAPLOW(-step1[26] + step1[27], bd); | 2427 step2[26] = WRAPLOW(-step1[26] + step1[27], bd); |
2425 step2[27] = WRAPLOW(step1[26] + step1[27], bd); | 2428 step2[27] = WRAPLOW(step1[26] + step1[27], bd); |
2426 step2[28] = WRAPLOW(step1[28] + step1[29], bd); | 2429 step2[28] = WRAPLOW(step1[28] + step1[29], bd); |
2427 step2[29] = WRAPLOW(step1[28] - step1[29], bd); | 2430 step2[29] = WRAPLOW(step1[28] - step1[29], bd); |
2428 step2[30] = WRAPLOW(-step1[30] + step1[31], bd); | 2431 step2[30] = WRAPLOW(-step1[30] + step1[31], bd); |
2429 step2[31] = WRAPLOW(step1[30] + step1[31], bd); | 2432 step2[31] = WRAPLOW(step1[30] + step1[31], bd); |
2430 | 2433 |
2431 // stage 3 | 2434 // stage 3 |
2432 step1[0] = step2[0]; | 2435 step1[0] = step2[0]; |
2433 step1[1] = step2[1]; | 2436 step1[1] = step2[1]; |
2434 step1[2] = step2[2]; | 2437 step1[2] = step2[2]; |
2435 step1[3] = step2[3]; | 2438 step1[3] = step2[3]; |
2436 | 2439 |
2437 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 2440 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
2438 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 2441 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
2439 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2442 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2440 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2443 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2441 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 2444 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
2442 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 2445 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
2443 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2446 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2444 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2447 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2445 | 2448 |
2446 step1[8] = WRAPLOW(step2[8] + step2[9], bd); | 2449 step1[8] = WRAPLOW(step2[8] + step2[9], bd); |
2447 step1[9] = WRAPLOW(step2[8] - step2[9], bd); | 2450 step1[9] = WRAPLOW(step2[8] - step2[9], bd); |
2448 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); | 2451 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); |
2449 step1[11] = WRAPLOW(step2[10] + step2[11], bd); | 2452 step1[11] = WRAPLOW(step2[10] + step2[11], bd); |
2450 step1[12] = WRAPLOW(step2[12] + step2[13], bd); | 2453 step1[12] = WRAPLOW(step2[12] + step2[13], bd); |
2451 step1[13] = WRAPLOW(step2[12] - step2[13], bd); | 2454 step1[13] = WRAPLOW(step2[12] - step2[13], bd); |
2452 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); | 2455 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); |
2453 step1[15] = WRAPLOW(step2[14] + step2[15], bd); | 2456 step1[15] = WRAPLOW(step2[14] + step2[15], bd); |
2454 | 2457 |
2455 step1[16] = step2[16]; | 2458 step1[16] = step2[16]; |
2456 step1[31] = step2[31]; | 2459 step1[31] = step2[31]; |
2457 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; | 2460 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; |
2458 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; | 2461 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; |
2459 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2462 step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2460 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2463 step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2461 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; | 2464 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; |
2462 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; | 2465 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; |
2463 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2466 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2464 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2467 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2465 step1[19] = step2[19]; | 2468 step1[19] = step2[19]; |
2466 step1[20] = step2[20]; | 2469 step1[20] = step2[20]; |
2467 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; | 2470 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; |
2468 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; | 2471 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; |
2469 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2472 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2470 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2473 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2471 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; | 2474 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; |
2472 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; | 2475 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; |
2473 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2476 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2474 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2477 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2475 step1[23] = step2[23]; | 2478 step1[23] = step2[23]; |
2476 step1[24] = step2[24]; | 2479 step1[24] = step2[24]; |
2477 step1[27] = step2[27]; | 2480 step1[27] = step2[27]; |
2478 step1[28] = step2[28]; | 2481 step1[28] = step2[28]; |
2479 | 2482 |
2480 // stage 4 | 2483 // stage 4 |
2481 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 2484 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
2482 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 2485 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
2483 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2486 step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2484 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2487 step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2485 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 2488 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
2486 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 2489 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
2487 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2490 step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2488 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2491 step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2489 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 2492 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
2490 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 2493 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
2491 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 2494 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
2492 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 2495 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
2493 | 2496 |
2494 step2[8] = step1[8]; | 2497 step2[8] = step1[8]; |
2495 step2[15] = step1[15]; | 2498 step2[15] = step1[15]; |
2496 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 2499 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
2497 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 2500 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
2498 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2501 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2499 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2502 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2500 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 2503 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
2501 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 2504 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
2502 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2505 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2503 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2506 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2504 step2[11] = step1[11]; | 2507 step2[11] = step1[11]; |
2505 step2[12] = step1[12]; | 2508 step2[12] = step1[12]; |
2506 | 2509 |
2507 step2[16] = WRAPLOW(step1[16] + step1[19], bd); | 2510 step2[16] = WRAPLOW(step1[16] + step1[19], bd); |
2508 step2[17] = WRAPLOW(step1[17] + step1[18], bd); | 2511 step2[17] = WRAPLOW(step1[17] + step1[18], bd); |
2509 step2[18] = WRAPLOW(step1[17] - step1[18], bd); | 2512 step2[18] = WRAPLOW(step1[17] - step1[18], bd); |
2510 step2[19] = WRAPLOW(step1[16] - step1[19], bd); | 2513 step2[19] = WRAPLOW(step1[16] - step1[19], bd); |
2511 step2[20] = WRAPLOW(-step1[20] + step1[23], bd); | 2514 step2[20] = WRAPLOW(-step1[20] + step1[23], bd); |
2512 step2[21] = WRAPLOW(-step1[21] + step1[22], bd); | 2515 step2[21] = WRAPLOW(-step1[21] + step1[22], bd); |
2513 step2[22] = WRAPLOW(step1[21] + step1[22], bd); | 2516 step2[22] = WRAPLOW(step1[21] + step1[22], bd); |
2514 step2[23] = WRAPLOW(step1[20] + step1[23], bd); | 2517 step2[23] = WRAPLOW(step1[20] + step1[23], bd); |
2515 | 2518 |
2516 step2[24] = WRAPLOW(step1[24] + step1[27], bd); | 2519 step2[24] = WRAPLOW(step1[24] + step1[27], bd); |
2517 step2[25] = WRAPLOW(step1[25] + step1[26], bd); | 2520 step2[25] = WRAPLOW(step1[25] + step1[26], bd); |
2518 step2[26] = WRAPLOW(step1[25] - step1[26], bd); | 2521 step2[26] = WRAPLOW(step1[25] - step1[26], bd); |
2519 step2[27] = WRAPLOW(step1[24] - step1[27], bd); | 2522 step2[27] = WRAPLOW(step1[24] - step1[27], bd); |
2520 step2[28] = WRAPLOW(-step1[28] + step1[31], bd); | 2523 step2[28] = WRAPLOW(-step1[28] + step1[31], bd); |
2521 step2[29] = WRAPLOW(-step1[29] + step1[30], bd); | 2524 step2[29] = WRAPLOW(-step1[29] + step1[30], bd); |
2522 step2[30] = WRAPLOW(step1[29] + step1[30], bd); | 2525 step2[30] = WRAPLOW(step1[29] + step1[30], bd); |
2523 step2[31] = WRAPLOW(step1[28] + step1[31], bd); | 2526 step2[31] = WRAPLOW(step1[28] + step1[31], bd); |
2524 | 2527 |
2525 // stage 5 | 2528 // stage 5 |
2526 step1[0] = WRAPLOW(step2[0] + step2[3], bd); | 2529 step1[0] = WRAPLOW(step2[0] + step2[3], bd); |
2527 step1[1] = WRAPLOW(step2[1] + step2[2], bd); | 2530 step1[1] = WRAPLOW(step2[1] + step2[2], bd); |
2528 step1[2] = WRAPLOW(step2[1] - step2[2], bd); | 2531 step1[2] = WRAPLOW(step2[1] - step2[2], bd); |
2529 step1[3] = WRAPLOW(step2[0] - step2[3], bd); | 2532 step1[3] = WRAPLOW(step2[0] - step2[3], bd); |
2530 step1[4] = step2[4]; | 2533 step1[4] = step2[4]; |
2531 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 2534 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
2532 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 2535 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
2533 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2536 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2534 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2537 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2535 step1[7] = step2[7]; | 2538 step1[7] = step2[7]; |
2536 | 2539 |
2537 step1[8] = WRAPLOW(step2[8] + step2[11], bd); | 2540 step1[8] = WRAPLOW(step2[8] + step2[11], bd); |
2538 step1[9] = WRAPLOW(step2[9] + step2[10], bd); | 2541 step1[9] = WRAPLOW(step2[9] + step2[10], bd); |
2539 step1[10] = WRAPLOW(step2[9] - step2[10], bd); | 2542 step1[10] = WRAPLOW(step2[9] - step2[10], bd); |
2540 step1[11] = WRAPLOW(step2[8] - step2[11], bd); | 2543 step1[11] = WRAPLOW(step2[8] - step2[11], bd); |
2541 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); | 2544 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); |
2542 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); | 2545 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); |
2543 step1[14] = WRAPLOW(step2[13] + step2[14], bd); | 2546 step1[14] = WRAPLOW(step2[13] + step2[14], bd); |
2544 step1[15] = WRAPLOW(step2[12] + step2[15], bd); | 2547 step1[15] = WRAPLOW(step2[12] + step2[15], bd); |
2545 | 2548 |
2546 step1[16] = step2[16]; | 2549 step1[16] = step2[16]; |
2547 step1[17] = step2[17]; | 2550 step1[17] = step2[17]; |
2548 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; | 2551 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; |
2549 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; | 2552 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; |
2550 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2553 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2551 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2554 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2552 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; | 2555 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; |
2553 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; | 2556 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; |
2554 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2557 step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2555 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2558 step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2556 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; | 2559 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; |
2557 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; | 2560 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; |
2558 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2561 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2559 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2562 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2560 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; | 2563 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; |
2561 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; | 2564 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; |
2562 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2565 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2563 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2566 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2564 step1[22] = step2[22]; | 2567 step1[22] = step2[22]; |
2565 step1[23] = step2[23]; | 2568 step1[23] = step2[23]; |
2566 step1[24] = step2[24]; | 2569 step1[24] = step2[24]; |
2567 step1[25] = step2[25]; | 2570 step1[25] = step2[25]; |
2568 step1[30] = step2[30]; | 2571 step1[30] = step2[30]; |
2569 step1[31] = step2[31]; | 2572 step1[31] = step2[31]; |
2570 | 2573 |
2571 // stage 6 | 2574 // stage 6 |
2572 step2[0] = WRAPLOW(step1[0] + step1[7], bd); | 2575 step2[0] = WRAPLOW(step1[0] + step1[7], bd); |
2573 step2[1] = WRAPLOW(step1[1] + step1[6], bd); | 2576 step2[1] = WRAPLOW(step1[1] + step1[6], bd); |
2574 step2[2] = WRAPLOW(step1[2] + step1[5], bd); | 2577 step2[2] = WRAPLOW(step1[2] + step1[5], bd); |
2575 step2[3] = WRAPLOW(step1[3] + step1[4], bd); | 2578 step2[3] = WRAPLOW(step1[3] + step1[4], bd); |
2576 step2[4] = WRAPLOW(step1[3] - step1[4], bd); | 2579 step2[4] = WRAPLOW(step1[3] - step1[4], bd); |
2577 step2[5] = WRAPLOW(step1[2] - step1[5], bd); | 2580 step2[5] = WRAPLOW(step1[2] - step1[5], bd); |
2578 step2[6] = WRAPLOW(step1[1] - step1[6], bd); | 2581 step2[6] = WRAPLOW(step1[1] - step1[6], bd); |
2579 step2[7] = WRAPLOW(step1[0] - step1[7], bd); | 2582 step2[7] = WRAPLOW(step1[0] - step1[7], bd); |
2580 step2[8] = step1[8]; | 2583 step2[8] = step1[8]; |
2581 step2[9] = step1[9]; | 2584 step2[9] = step1[9]; |
2582 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 2585 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
2583 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 2586 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
2584 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2587 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2585 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2588 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2586 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 2589 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
2587 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 2590 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
2588 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2591 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2589 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2592 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2590 step2[14] = step1[14]; | 2593 step2[14] = step1[14]; |
2591 step2[15] = step1[15]; | 2594 step2[15] = step1[15]; |
2592 | 2595 |
2593 step2[16] = WRAPLOW(step1[16] + step1[23], bd); | 2596 step2[16] = WRAPLOW(step1[16] + step1[23], bd); |
2594 step2[17] = WRAPLOW(step1[17] + step1[22], bd); | 2597 step2[17] = WRAPLOW(step1[17] + step1[22], bd); |
2595 step2[18] = WRAPLOW(step1[18] + step1[21], bd); | 2598 step2[18] = WRAPLOW(step1[18] + step1[21], bd); |
2596 step2[19] = WRAPLOW(step1[19] + step1[20], bd); | 2599 step2[19] = WRAPLOW(step1[19] + step1[20], bd); |
2597 step2[20] = WRAPLOW(step1[19] - step1[20], bd); | 2600 step2[20] = WRAPLOW(step1[19] - step1[20], bd); |
2598 step2[21] = WRAPLOW(step1[18] - step1[21], bd); | 2601 step2[21] = WRAPLOW(step1[18] - step1[21], bd); |
2599 step2[22] = WRAPLOW(step1[17] - step1[22], bd); | 2602 step2[22] = WRAPLOW(step1[17] - step1[22], bd); |
(...skipping 25 matching lines...) Expand all Loading... |
2625 step1[13] = WRAPLOW(step2[2] - step2[13], bd); | 2628 step1[13] = WRAPLOW(step2[2] - step2[13], bd); |
2626 step1[14] = WRAPLOW(step2[1] - step2[14], bd); | 2629 step1[14] = WRAPLOW(step2[1] - step2[14], bd); |
2627 step1[15] = WRAPLOW(step2[0] - step2[15], bd); | 2630 step1[15] = WRAPLOW(step2[0] - step2[15], bd); |
2628 | 2631 |
2629 step1[16] = step2[16]; | 2632 step1[16] = step2[16]; |
2630 step1[17] = step2[17]; | 2633 step1[17] = step2[17]; |
2631 step1[18] = step2[18]; | 2634 step1[18] = step2[18]; |
2632 step1[19] = step2[19]; | 2635 step1[19] = step2[19]; |
2633 temp1 = (-step2[20] + step2[27]) * cospi_16_64; | 2636 temp1 = (-step2[20] + step2[27]) * cospi_16_64; |
2634 temp2 = (step2[20] + step2[27]) * cospi_16_64; | 2637 temp2 = (step2[20] + step2[27]) * cospi_16_64; |
2635 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2638 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2636 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2639 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2637 temp1 = (-step2[21] + step2[26]) * cospi_16_64; | 2640 temp1 = (-step2[21] + step2[26]) * cospi_16_64; |
2638 temp2 = (step2[21] + step2[26]) * cospi_16_64; | 2641 temp2 = (step2[21] + step2[26]) * cospi_16_64; |
2639 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2642 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2640 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2643 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2641 temp1 = (-step2[22] + step2[25]) * cospi_16_64; | 2644 temp1 = (-step2[22] + step2[25]) * cospi_16_64; |
2642 temp2 = (step2[22] + step2[25]) * cospi_16_64; | 2645 temp2 = (step2[22] + step2[25]) * cospi_16_64; |
2643 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2646 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2644 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2647 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2645 temp1 = (-step2[23] + step2[24]) * cospi_16_64; | 2648 temp1 = (-step2[23] + step2[24]) * cospi_16_64; |
2646 temp2 = (step2[23] + step2[24]) * cospi_16_64; | 2649 temp2 = (step2[23] + step2[24]) * cospi_16_64; |
2647 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2650 step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
2648 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2651 step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
2649 step1[28] = step2[28]; | 2652 step1[28] = step2[28]; |
2650 step1[29] = step2[29]; | 2653 step1[29] = step2[29]; |
2651 step1[30] = step2[30]; | 2654 step1[30] = step2[30]; |
2652 step1[31] = step2[31]; | 2655 step1[31] = step2[31]; |
2653 | 2656 |
2654 // final stage | 2657 // final stage |
2655 output[0] = WRAPLOW(step1[0] + step1[31], bd); | 2658 output[0] = WRAPLOW(step1[0] + step1[31], bd); |
2656 output[1] = WRAPLOW(step1[1] + step1[30], bd); | 2659 output[1] = WRAPLOW(step1[1] + step1[30], bd); |
2657 output[2] = WRAPLOW(step1[2] + step1[29], bd); | 2660 output[2] = WRAPLOW(step1[2] + step1[29], bd); |
2658 output[3] = WRAPLOW(step1[3] + step1[28], bd); | 2661 output[3] = WRAPLOW(step1[3] + step1[28], bd); |
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2752 } | 2755 } |
2753 } | 2756 } |
2754 } | 2757 } |
2755 | 2758 |
2756 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2759 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, |
2757 int stride, int bd) { | 2760 int stride, int bd) { |
2758 int i, j; | 2761 int i, j; |
2759 int a1; | 2762 int a1; |
2760 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2763 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
2761 | 2764 |
2762 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 2765 tran_low_t out = WRAPLOW( |
2763 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 2766 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 2767 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
2764 a1 = ROUND_POWER_OF_TWO(out, 6); | 2768 a1 = ROUND_POWER_OF_TWO(out, 6); |
2765 | 2769 |
2766 for (j = 0; j < 32; ++j) { | 2770 for (j = 0; j < 32; ++j) { |
2767 for (i = 0; i < 32; ++i) | 2771 for (i = 0; i < 32; ++i) |
2768 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2772 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
2769 dest += stride; | 2773 dest += stride; |
2770 } | 2774 } |
2771 } | 2775 } |
2772 | 2776 |
2773 // idct | 2777 // idct |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2853 | 2857 |
2854 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, | 2858 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
2855 uint8_t *dest, int stride, int eob, int bd) { | 2859 uint8_t *dest, int stride, int eob, int bd) { |
2856 if (tx_type == DCT_DCT) { | 2860 if (tx_type == DCT_DCT) { |
2857 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); | 2861 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); |
2858 } else { | 2862 } else { |
2859 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); | 2863 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); |
2860 } | 2864 } |
2861 } | 2865 } |
2862 #endif // CONFIG_VP9_HIGHBITDEPTH | 2866 #endif // CONFIG_VP9_HIGHBITDEPTH |
OLD | NEW |