| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 1499 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1510 } | 1510 } |
| 1511 } | 1511 } |
| 1512 | 1512 |
| 1513 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { | 1513 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1514 tran_low_t step[4]; | 1514 tran_low_t step[4]; |
| 1515 tran_high_t temp1, temp2; | 1515 tran_high_t temp1, temp2; |
| 1516 (void) bd; | 1516 (void) bd; |
| 1517 // stage 1 | 1517 // stage 1 |
| 1518 temp1 = (input[0] + input[2]) * cospi_16_64; | 1518 temp1 = (input[0] + input[2]) * cospi_16_64; |
| 1519 temp2 = (input[0] - input[2]) * cospi_16_64; | 1519 temp2 = (input[0] - input[2]) * cospi_16_64; |
| 1520 step[0] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1520 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1521 step[1] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1521 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1522 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; | 1522 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
| 1523 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; | 1523 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
| 1524 step[2] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1524 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1525 step[3] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1525 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1526 | 1526 |
| 1527 // stage 2 | 1527 // stage 2 |
| 1528 output[0] = WRAPLOW(step[0] + step[3], bd); | 1528 output[0] = WRAPLOW(step[0] + step[3], bd); |
| 1529 output[1] = WRAPLOW(step[1] + step[2], bd); | 1529 output[1] = WRAPLOW(step[1] + step[2], bd); |
| 1530 output[2] = WRAPLOW(step[1] - step[2], bd); | 1530 output[2] = WRAPLOW(step[1] - step[2], bd); |
| 1531 output[3] = WRAPLOW(step[0] - step[3], bd); | 1531 output[3] = WRAPLOW(step[0] - step[3], bd); |
| 1532 } | 1532 } |
| 1533 | 1533 |
| 1534 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1534 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1535 int stride, int bd) { | 1535 int stride, int bd) { |
| (...skipping 19 matching lines...) Expand all Loading... |
| 1555 dest[j * stride + i] = highbd_clip_pixel_add( | 1555 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1556 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); | 1556 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); |
| 1557 } | 1557 } |
| 1558 } | 1558 } |
| 1559 } | 1559 } |
| 1560 | 1560 |
| 1561 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1561 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1562 int dest_stride, int bd) { | 1562 int dest_stride, int bd) { |
| 1563 int i; | 1563 int i; |
| 1564 tran_high_t a1; | 1564 tran_high_t a1; |
| 1565 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 1565 tran_low_t out = WRAPLOW( |
| 1566 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 1566 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1567 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1567 | 1568 |
| 1568 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 1569 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 1569 a1 = ROUND_POWER_OF_TWO(out, 4); | 1570 a1 = ROUND_POWER_OF_TWO(out, 4); |
| 1570 | 1571 |
| 1571 for (i = 0; i < 4; i++) { | 1572 for (i = 0; i < 4; i++) { |
| 1572 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); | 1573 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); |
| 1573 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); | 1574 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); |
| 1574 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); | 1575 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); |
| 1575 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); | 1576 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); |
| 1576 dest += dest_stride; | 1577 dest += dest_stride; |
| 1577 } | 1578 } |
| 1578 } | 1579 } |
| 1579 | 1580 |
| 1580 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { | 1581 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1581 tran_low_t step1[8], step2[8]; | 1582 tran_low_t step1[8], step2[8]; |
| 1582 tran_high_t temp1, temp2; | 1583 tran_high_t temp1, temp2; |
| 1583 // stage 1 | 1584 // stage 1 |
| 1584 step1[0] = input[0]; | 1585 step1[0] = input[0]; |
| 1585 step1[2] = input[4]; | 1586 step1[2] = input[4]; |
| 1586 step1[1] = input[2]; | 1587 step1[1] = input[2]; |
| 1587 step1[3] = input[6]; | 1588 step1[3] = input[6]; |
| 1588 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; | 1589 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
| 1589 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; | 1590 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
| 1590 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1591 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1591 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1592 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1592 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; | 1593 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
| 1593 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; | 1594 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
| 1594 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1595 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1595 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1596 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1596 | 1597 |
| 1597 // stage 2 & stage 3 - even half | 1598 // stage 2 & stage 3 - even half |
| 1598 vp9_highbd_idct4(step1, step1, bd); | 1599 vp9_highbd_idct4(step1, step1, bd); |
| 1599 | 1600 |
| 1600 // stage 2 - odd half | 1601 // stage 2 - odd half |
| 1601 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 1602 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
| 1602 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 1603 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
| 1603 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 1604 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
| 1604 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 1605 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
| 1605 | 1606 |
| 1606 // stage 3 - odd half | 1607 // stage 3 - odd half |
| 1607 step1[4] = step2[4]; | 1608 step1[4] = step2[4]; |
| 1608 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1609 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
| 1609 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1610 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
| 1610 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1611 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1611 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1612 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1612 step1[7] = step2[7]; | 1613 step1[7] = step2[7]; |
| 1613 | 1614 |
| 1614 // stage 4 | 1615 // stage 4 |
| 1615 output[0] = WRAPLOW(step1[0] + step1[7], bd); | 1616 output[0] = WRAPLOW(step1[0] + step1[7], bd); |
| 1616 output[1] = WRAPLOW(step1[1] + step1[6], bd); | 1617 output[1] = WRAPLOW(step1[1] + step1[6], bd); |
| 1617 output[2] = WRAPLOW(step1[2] + step1[5], bd); | 1618 output[2] = WRAPLOW(step1[2] + step1[5], bd); |
| 1618 output[3] = WRAPLOW(step1[3] + step1[4], bd); | 1619 output[3] = WRAPLOW(step1[3] + step1[4], bd); |
| 1619 output[4] = WRAPLOW(step1[3] - step1[4], bd); | 1620 output[4] = WRAPLOW(step1[3] - step1[4], bd); |
| 1620 output[5] = WRAPLOW(step1[2] - step1[5], bd); | 1621 output[5] = WRAPLOW(step1[2] - step1[5], bd); |
| 1621 output[6] = WRAPLOW(step1[1] - step1[6], bd); | 1622 output[6] = WRAPLOW(step1[1] - step1[6], bd); |
| (...skipping 24 matching lines...) Expand all Loading... |
| 1646 dest[j * stride + i] = highbd_clip_pixel_add( | 1647 dest[j * stride + i] = highbd_clip_pixel_add( |
| 1647 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); | 1648 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); |
| 1648 } | 1649 } |
| 1649 } | 1650 } |
| 1650 } | 1651 } |
| 1651 | 1652 |
| 1652 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, | 1653 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1653 int stride, int bd) { | 1654 int stride, int bd) { |
| 1654 int i, j; | 1655 int i, j; |
| 1655 tran_high_t a1; | 1656 tran_high_t a1; |
| 1656 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 1657 tran_low_t out = WRAPLOW( |
| 1658 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 1657 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 1659 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 1658 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 1660 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 1659 a1 = ROUND_POWER_OF_TWO(out, 5); | 1661 a1 = ROUND_POWER_OF_TWO(out, 5); |
| 1660 for (j = 0; j < 8; ++j) { | 1662 for (j = 0; j < 8; ++j) { |
| 1661 for (i = 0; i < 8; ++i) | 1663 for (i = 0; i < 8; ++i) |
| 1662 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 1664 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 1663 dest += stride; | 1665 dest += stride; |
| 1664 } | 1666 } |
| 1665 } | 1667 } |
| 1666 | 1668 |
| 1667 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { | 1669 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { |
| 1668 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; | 1670 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; |
| (...skipping 20 matching lines...) Expand all Loading... |
| 1689 | 1691 |
| 1690 s0 = s0 + s3 + s5; | 1692 s0 = s0 + s3 + s5; |
| 1691 s1 = s1 - s4 - s6; | 1693 s1 = s1 - s4 - s6; |
| 1692 s3 = s2; | 1694 s3 = s2; |
| 1693 s2 = sinpi_3_9 * s7; | 1695 s2 = sinpi_3_9 * s7; |
| 1694 | 1696 |
| 1695 // 1-D transform scaling factor is sqrt(2). | 1697 // 1-D transform scaling factor is sqrt(2). |
| 1696 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) | 1698 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
| 1697 // + 1b (addition) = 29b. | 1699 // + 1b (addition) = 29b. |
| 1698 // Hence the output bit depth is 15b. | 1700 // Hence the output bit depth is 15b. |
| 1699 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd); | 1701 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd); |
| 1700 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd); | 1702 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd); |
| 1701 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); | 1703 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
| 1702 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); | 1704 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd); |
| 1703 } | 1705 } |
| 1704 | 1706 |
| 1705 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, | 1707 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, |
| 1706 int stride, int tx_type, int bd) { | 1708 int stride, int tx_type, int bd) { |
| 1707 const highbd_transform_2d IHT_4[] = { | 1709 const highbd_transform_2d IHT_4[] = { |
| 1708 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0 | 1710 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0 |
| 1709 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1 | 1711 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1 |
| 1710 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 | 1712 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 |
| 1711 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 | 1713 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 |
| 1712 }; | 1714 }; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1757 // stage 1 | 1759 // stage 1 |
| 1758 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; | 1760 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
| 1759 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; | 1761 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
| 1760 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; | 1762 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
| 1761 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; | 1763 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
| 1762 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; | 1764 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
| 1763 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; | 1765 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
| 1764 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; | 1766 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
| 1765 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; | 1767 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
| 1766 | 1768 |
| 1767 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), bd); | 1769 x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s4, bd), bd); |
| 1768 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), bd); | 1770 x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s5, bd), bd); |
| 1769 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), bd); | 1771 x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s6, bd), bd); |
| 1770 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), bd); | 1772 x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s7, bd), bd); |
| 1771 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), bd); | 1773 x4 = WRAPLOW(highbd_dct_const_round_shift(s0 - s4, bd), bd); |
| 1772 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), bd); | 1774 x5 = WRAPLOW(highbd_dct_const_round_shift(s1 - s5, bd), bd); |
| 1773 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), bd); | 1775 x6 = WRAPLOW(highbd_dct_const_round_shift(s2 - s6, bd), bd); |
| 1774 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), bd); | 1776 x7 = WRAPLOW(highbd_dct_const_round_shift(s3 - s7, bd), bd); |
| 1775 | 1777 |
| 1776 // stage 2 | 1778 // stage 2 |
| 1777 s0 = x0; | 1779 s0 = x0; |
| 1778 s1 = x1; | 1780 s1 = x1; |
| 1779 s2 = x2; | 1781 s2 = x2; |
| 1780 s3 = x3; | 1782 s3 = x3; |
| 1781 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; | 1783 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
| 1782 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; | 1784 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
| 1783 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; | 1785 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; |
| 1784 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; | 1786 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
| 1785 | 1787 |
| 1786 x0 = WRAPLOW(s0 + s2, bd); | 1788 x0 = WRAPLOW(s0 + s2, bd); |
| 1787 x1 = WRAPLOW(s1 + s3, bd); | 1789 x1 = WRAPLOW(s1 + s3, bd); |
| 1788 x2 = WRAPLOW(s0 - s2, bd); | 1790 x2 = WRAPLOW(s0 - s2, bd); |
| 1789 x3 = WRAPLOW(s1 - s3, bd); | 1791 x3 = WRAPLOW(s1 - s3, bd); |
| 1790 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); | 1792 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); |
| 1791 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); | 1793 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); |
| 1792 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); | 1794 x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); |
| 1793 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); | 1795 x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); |
| 1794 | 1796 |
| 1795 // stage 3 | 1797 // stage 3 |
| 1796 s2 = cospi_16_64 * (x2 + x3); | 1798 s2 = cospi_16_64 * (x2 + x3); |
| 1797 s3 = cospi_16_64 * (x2 - x3); | 1799 s3 = cospi_16_64 * (x2 - x3); |
| 1798 s6 = cospi_16_64 * (x6 + x7); | 1800 s6 = cospi_16_64 * (x6 + x7); |
| 1799 s7 = cospi_16_64 * (x6 - x7); | 1801 s7 = cospi_16_64 * (x6 - x7); |
| 1800 | 1802 |
| 1801 x2 = WRAPLOW(dct_const_round_shift(s2), bd); | 1803 x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
| 1802 x3 = WRAPLOW(dct_const_round_shift(s3), bd); | 1804 x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); |
| 1803 x6 = WRAPLOW(dct_const_round_shift(s6), bd); | 1805 x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); |
| 1804 x7 = WRAPLOW(dct_const_round_shift(s7), bd); | 1806 x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); |
| 1805 | 1807 |
| 1806 output[0] = WRAPLOW(x0, bd); | 1808 output[0] = WRAPLOW(x0, bd); |
| 1807 output[1] = WRAPLOW(-x4, bd); | 1809 output[1] = WRAPLOW(-x4, bd); |
| 1808 output[2] = WRAPLOW(x6, bd); | 1810 output[2] = WRAPLOW(x6, bd); |
| 1809 output[3] = WRAPLOW(-x2, bd); | 1811 output[3] = WRAPLOW(-x2, bd); |
| 1810 output[4] = WRAPLOW(x3, bd); | 1812 output[4] = WRAPLOW(x3, bd); |
| 1811 output[5] = WRAPLOW(-x7, bd); | 1813 output[5] = WRAPLOW(-x7, bd); |
| 1812 output[6] = WRAPLOW(x5, bd); | 1814 output[6] = WRAPLOW(x5, bd); |
| 1813 output[7] = WRAPLOW(-x1, bd); | 1815 output[7] = WRAPLOW(-x1, bd); |
| 1814 } | 1816 } |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1903 step2[1] = step1[1]; | 1905 step2[1] = step1[1]; |
| 1904 step2[2] = step1[2]; | 1906 step2[2] = step1[2]; |
| 1905 step2[3] = step1[3]; | 1907 step2[3] = step1[3]; |
| 1906 step2[4] = step1[4]; | 1908 step2[4] = step1[4]; |
| 1907 step2[5] = step1[5]; | 1909 step2[5] = step1[5]; |
| 1908 step2[6] = step1[6]; | 1910 step2[6] = step1[6]; |
| 1909 step2[7] = step1[7]; | 1911 step2[7] = step1[7]; |
| 1910 | 1912 |
| 1911 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 1913 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
| 1912 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 1914 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
| 1913 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1915 step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1914 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1916 step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1915 | 1917 |
| 1916 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 1918 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
| 1917 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 1919 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
| 1918 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1920 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1919 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1921 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1920 | 1922 |
| 1921 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 1923 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
| 1922 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 1924 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
| 1923 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1925 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1924 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1926 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1925 | 1927 |
| 1926 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 1928 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
| 1927 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 1929 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
| 1928 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1930 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1929 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1931 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1930 | 1932 |
| 1931 // stage 3 | 1933 // stage 3 |
| 1932 step1[0] = step2[0]; | 1934 step1[0] = step2[0]; |
| 1933 step1[1] = step2[1]; | 1935 step1[1] = step2[1]; |
| 1934 step1[2] = step2[2]; | 1936 step1[2] = step2[2]; |
| 1935 step1[3] = step2[3]; | 1937 step1[3] = step2[3]; |
| 1936 | 1938 |
| 1937 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 1939 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
| 1938 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 1940 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
| 1939 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1941 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1940 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1942 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1941 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 1943 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
| 1942 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 1944 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
| 1943 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1945 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1944 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1946 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1945 | 1947 |
| 1946 step1[8] = WRAPLOW(step2[8] + step2[9], bd); | 1948 step1[8] = WRAPLOW(step2[8] + step2[9], bd); |
| 1947 step1[9] = WRAPLOW(step2[8] - step2[9], bd); | 1949 step1[9] = WRAPLOW(step2[8] - step2[9], bd); |
| 1948 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); | 1950 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); |
| 1949 step1[11] = WRAPLOW(step2[10] + step2[11], bd); | 1951 step1[11] = WRAPLOW(step2[10] + step2[11], bd); |
| 1950 step1[12] = WRAPLOW(step2[12] + step2[13], bd); | 1952 step1[12] = WRAPLOW(step2[12] + step2[13], bd); |
| 1951 step1[13] = WRAPLOW(step2[12] - step2[13], bd); | 1953 step1[13] = WRAPLOW(step2[12] - step2[13], bd); |
| 1952 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); | 1954 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); |
| 1953 step1[15] = WRAPLOW(step2[14] + step2[15], bd); | 1955 step1[15] = WRAPLOW(step2[14] + step2[15], bd); |
| 1954 | 1956 |
| 1955 // stage 4 | 1957 // stage 4 |
| 1956 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 1958 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
| 1957 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 1959 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
| 1958 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1960 step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1959 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1961 step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1960 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 1962 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
| 1961 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 1963 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
| 1962 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1964 step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1963 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1965 step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1964 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 1966 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
| 1965 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 1967 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
| 1966 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 1968 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
| 1967 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 1969 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
| 1968 | 1970 |
| 1969 step2[8] = step1[8]; | 1971 step2[8] = step1[8]; |
| 1970 step2[15] = step1[15]; | 1972 step2[15] = step1[15]; |
| 1971 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 1973 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
| 1972 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 1974 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
| 1973 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1975 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1974 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1976 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1975 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 1977 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
| 1976 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 1978 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
| 1977 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1979 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1978 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1980 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1979 step2[11] = step1[11]; | 1981 step2[11] = step1[11]; |
| 1980 step2[12] = step1[12]; | 1982 step2[12] = step1[12]; |
| 1981 | 1983 |
| 1982 // stage 5 | 1984 // stage 5 |
| 1983 step1[0] = WRAPLOW(step2[0] + step2[3], bd); | 1985 step1[0] = WRAPLOW(step2[0] + step2[3], bd); |
| 1984 step1[1] = WRAPLOW(step2[1] + step2[2], bd); | 1986 step1[1] = WRAPLOW(step2[1] + step2[2], bd); |
| 1985 step1[2] = WRAPLOW(step2[1] - step2[2], bd); | 1987 step1[2] = WRAPLOW(step2[1] - step2[2], bd); |
| 1986 step1[3] = WRAPLOW(step2[0] - step2[3], bd); | 1988 step1[3] = WRAPLOW(step2[0] - step2[3], bd); |
| 1987 step1[4] = step2[4]; | 1989 step1[4] = step2[4]; |
| 1988 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 1990 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
| 1989 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 1991 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
| 1990 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 1992 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 1991 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 1993 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 1992 step1[7] = step2[7]; | 1994 step1[7] = step2[7]; |
| 1993 | 1995 |
| 1994 step1[8] = WRAPLOW(step2[8] + step2[11], bd); | 1996 step1[8] = WRAPLOW(step2[8] + step2[11], bd); |
| 1995 step1[9] = WRAPLOW(step2[9] + step2[10], bd); | 1997 step1[9] = WRAPLOW(step2[9] + step2[10], bd); |
| 1996 step1[10] = WRAPLOW(step2[9] - step2[10], bd); | 1998 step1[10] = WRAPLOW(step2[9] - step2[10], bd); |
| 1997 step1[11] = WRAPLOW(step2[8] - step2[11], bd); | 1999 step1[11] = WRAPLOW(step2[8] - step2[11], bd); |
| 1998 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); | 2000 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); |
| 1999 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); | 2001 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); |
| 2000 step1[14] = WRAPLOW(step2[13] + step2[14], bd); | 2002 step1[14] = WRAPLOW(step2[13] + step2[14], bd); |
| 2001 step1[15] = WRAPLOW(step2[12] + step2[15], bd); | 2003 step1[15] = WRAPLOW(step2[12] + step2[15], bd); |
| 2002 | 2004 |
| 2003 // stage 6 | 2005 // stage 6 |
| 2004 step2[0] = WRAPLOW(step1[0] + step1[7], bd); | 2006 step2[0] = WRAPLOW(step1[0] + step1[7], bd); |
| 2005 step2[1] = WRAPLOW(step1[1] + step1[6], bd); | 2007 step2[1] = WRAPLOW(step1[1] + step1[6], bd); |
| 2006 step2[2] = WRAPLOW(step1[2] + step1[5], bd); | 2008 step2[2] = WRAPLOW(step1[2] + step1[5], bd); |
| 2007 step2[3] = WRAPLOW(step1[3] + step1[4], bd); | 2009 step2[3] = WRAPLOW(step1[3] + step1[4], bd); |
| 2008 step2[4] = WRAPLOW(step1[3] - step1[4], bd); | 2010 step2[4] = WRAPLOW(step1[3] - step1[4], bd); |
| 2009 step2[5] = WRAPLOW(step1[2] - step1[5], bd); | 2011 step2[5] = WRAPLOW(step1[2] - step1[5], bd); |
| 2010 step2[6] = WRAPLOW(step1[1] - step1[6], bd); | 2012 step2[6] = WRAPLOW(step1[1] - step1[6], bd); |
| 2011 step2[7] = WRAPLOW(step1[0] - step1[7], bd); | 2013 step2[7] = WRAPLOW(step1[0] - step1[7], bd); |
| 2012 step2[8] = step1[8]; | 2014 step2[8] = step1[8]; |
| 2013 step2[9] = step1[9]; | 2015 step2[9] = step1[9]; |
| 2014 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 2016 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
| 2015 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 2017 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
| 2016 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2018 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2017 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2019 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2018 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 2020 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
| 2019 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 2021 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
| 2020 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2022 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2021 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2023 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2022 step2[14] = step1[14]; | 2024 step2[14] = step1[14]; |
| 2023 step2[15] = step1[15]; | 2025 step2[15] = step1[15]; |
| 2024 | 2026 |
| 2025 // stage 7 | 2027 // stage 7 |
| 2026 output[0] = WRAPLOW(step2[0] + step2[15], bd); | 2028 output[0] = WRAPLOW(step2[0] + step2[15], bd); |
| 2027 output[1] = WRAPLOW(step2[1] + step2[14], bd); | 2029 output[1] = WRAPLOW(step2[1] + step2[14], bd); |
| 2028 output[2] = WRAPLOW(step2[2] + step2[13], bd); | 2030 output[2] = WRAPLOW(step2[2] + step2[13], bd); |
| 2029 output[3] = WRAPLOW(step2[3] + step2[12], bd); | 2031 output[3] = WRAPLOW(step2[3] + step2[12], bd); |
| 2030 output[4] = WRAPLOW(step2[4] + step2[11], bd); | 2032 output[4] = WRAPLOW(step2[4] + step2[11], bd); |
| 2031 output[5] = WRAPLOW(step2[5] + step2[10], bd); | 2033 output[5] = WRAPLOW(step2[5] + step2[10], bd); |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2108 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; | 2110 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
| 2109 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; | 2111 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
| 2110 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; | 2112 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
| 2111 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; | 2113 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
| 2112 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; | 2114 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
| 2113 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; | 2115 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
| 2114 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; | 2116 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
| 2115 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; | 2117 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
| 2116 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; | 2118 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
| 2117 | 2119 |
| 2118 x0 = WRAPLOW(dct_const_round_shift(s0 + s8), bd); | 2120 x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s8, bd), bd); |
| 2119 x1 = WRAPLOW(dct_const_round_shift(s1 + s9), bd); | 2121 x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s9, bd), bd); |
| 2120 x2 = WRAPLOW(dct_const_round_shift(s2 + s10), bd); | 2122 x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s10, bd), bd); |
| 2121 x3 = WRAPLOW(dct_const_round_shift(s3 + s11), bd); | 2123 x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s11, bd), bd); |
| 2122 x4 = WRAPLOW(dct_const_round_shift(s4 + s12), bd); | 2124 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s12, bd), bd); |
| 2123 x5 = WRAPLOW(dct_const_round_shift(s5 + s13), bd); | 2125 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s13, bd), bd); |
| 2124 x6 = WRAPLOW(dct_const_round_shift(s6 + s14), bd); | 2126 x6 = WRAPLOW(highbd_dct_const_round_shift(s6 + s14, bd), bd); |
| 2125 x7 = WRAPLOW(dct_const_round_shift(s7 + s15), bd); | 2127 x7 = WRAPLOW(highbd_dct_const_round_shift(s7 + s15, bd), bd); |
| 2126 x8 = WRAPLOW(dct_const_round_shift(s0 - s8), bd); | 2128 x8 = WRAPLOW(highbd_dct_const_round_shift(s0 - s8, bd), bd); |
| 2127 x9 = WRAPLOW(dct_const_round_shift(s1 - s9), bd); | 2129 x9 = WRAPLOW(highbd_dct_const_round_shift(s1 - s9, bd), bd); |
| 2128 x10 = WRAPLOW(dct_const_round_shift(s2 - s10), bd); | 2130 x10 = WRAPLOW(highbd_dct_const_round_shift(s2 - s10, bd), bd); |
| 2129 x11 = WRAPLOW(dct_const_round_shift(s3 - s11), bd); | 2131 x11 = WRAPLOW(highbd_dct_const_round_shift(s3 - s11, bd), bd); |
| 2130 x12 = WRAPLOW(dct_const_round_shift(s4 - s12), bd); | 2132 x12 = WRAPLOW(highbd_dct_const_round_shift(s4 - s12, bd), bd); |
| 2131 x13 = WRAPLOW(dct_const_round_shift(s5 - s13), bd); | 2133 x13 = WRAPLOW(highbd_dct_const_round_shift(s5 - s13, bd), bd); |
| 2132 x14 = WRAPLOW(dct_const_round_shift(s6 - s14), bd); | 2134 x14 = WRAPLOW(highbd_dct_const_round_shift(s6 - s14, bd), bd); |
| 2133 x15 = WRAPLOW(dct_const_round_shift(s7 - s15), bd); | 2135 x15 = WRAPLOW(highbd_dct_const_round_shift(s7 - s15, bd), bd); |
| 2134 | 2136 |
| 2135 // stage 2 | 2137 // stage 2 |
| 2136 s0 = x0; | 2138 s0 = x0; |
| 2137 s1 = x1; | 2139 s1 = x1; |
| 2138 s2 = x2; | 2140 s2 = x2; |
| 2139 s3 = x3; | 2141 s3 = x3; |
| 2140 s4 = x4; | 2142 s4 = x4; |
| 2141 s5 = x5; | 2143 s5 = x5; |
| 2142 s6 = x6; | 2144 s6 = x6; |
| 2143 s7 = x7; | 2145 s7 = x7; |
| 2144 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; | 2146 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
| 2145 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; | 2147 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
| 2146 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; | 2148 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
| 2147 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; | 2149 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
| 2148 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; | 2150 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; |
| 2149 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; | 2151 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
| 2150 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; | 2152 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; |
| 2151 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; | 2153 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
| 2152 | 2154 |
| 2153 x0 = WRAPLOW(s0 + s4, bd); | 2155 x0 = WRAPLOW(s0 + s4, bd); |
| 2154 x1 = WRAPLOW(s1 + s5, bd); | 2156 x1 = WRAPLOW(s1 + s5, bd); |
| 2155 x2 = WRAPLOW(s2 + s6, bd); | 2157 x2 = WRAPLOW(s2 + s6, bd); |
| 2156 x3 = WRAPLOW(s3 + s7, bd); | 2158 x3 = WRAPLOW(s3 + s7, bd); |
| 2157 x4 = WRAPLOW(s0 - s4, bd); | 2159 x4 = WRAPLOW(s0 - s4, bd); |
| 2158 x5 = WRAPLOW(s1 - s5, bd); | 2160 x5 = WRAPLOW(s1 - s5, bd); |
| 2159 x6 = WRAPLOW(s2 - s6, bd); | 2161 x6 = WRAPLOW(s2 - s6, bd); |
| 2160 x7 = WRAPLOW(s3 - s7, bd); | 2162 x7 = WRAPLOW(s3 - s7, bd); |
| 2161 x8 = WRAPLOW(dct_const_round_shift(s8 + s12), bd); | 2163 x8 = WRAPLOW(highbd_dct_const_round_shift(s8 + s12, bd), bd); |
| 2162 x9 = WRAPLOW(dct_const_round_shift(s9 + s13), bd); | 2164 x9 = WRAPLOW(highbd_dct_const_round_shift(s9 + s13, bd), bd); |
| 2163 x10 = WRAPLOW(dct_const_round_shift(s10 + s14), bd); | 2165 x10 = WRAPLOW(highbd_dct_const_round_shift(s10 + s14, bd), bd); |
| 2164 x11 = WRAPLOW(dct_const_round_shift(s11 + s15), bd); | 2166 x11 = WRAPLOW(highbd_dct_const_round_shift(s11 + s15, bd), bd); |
| 2165 x12 = WRAPLOW(dct_const_round_shift(s8 - s12), bd); | 2167 x12 = WRAPLOW(highbd_dct_const_round_shift(s8 - s12, bd), bd); |
| 2166 x13 = WRAPLOW(dct_const_round_shift(s9 - s13), bd); | 2168 x13 = WRAPLOW(highbd_dct_const_round_shift(s9 - s13, bd), bd); |
| 2167 x14 = WRAPLOW(dct_const_round_shift(s10 - s14), bd); | 2169 x14 = WRAPLOW(highbd_dct_const_round_shift(s10 - s14, bd), bd); |
| 2168 x15 = WRAPLOW(dct_const_round_shift(s11 - s15), bd); | 2170 x15 = WRAPLOW(highbd_dct_const_round_shift(s11 - s15, bd), bd); |
| 2169 | 2171 |
| 2170 // stage 3 | 2172 // stage 3 |
| 2171 s0 = x0; | 2173 s0 = x0; |
| 2172 s1 = x1; | 2174 s1 = x1; |
| 2173 s2 = x2; | 2175 s2 = x2; |
| 2174 s3 = x3; | 2176 s3 = x3; |
| 2175 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; | 2177 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
| 2176 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; | 2178 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
| 2177 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; | 2179 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; |
| 2178 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; | 2180 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
| 2179 s8 = x8; | 2181 s8 = x8; |
| 2180 s9 = x9; | 2182 s9 = x9; |
| 2181 s10 = x10; | 2183 s10 = x10; |
| 2182 s11 = x11; | 2184 s11 = x11; |
| 2183 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; | 2185 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
| 2184 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; | 2186 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
| 2185 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; | 2187 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; |
| 2186 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; | 2188 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
| 2187 | 2189 |
| 2188 x0 = WRAPLOW(s0 + s2, bd); | 2190 x0 = WRAPLOW(s0 + s2, bd); |
| 2189 x1 = WRAPLOW(s1 + s3, bd); | 2191 x1 = WRAPLOW(s1 + s3, bd); |
| 2190 x2 = WRAPLOW(s0 - s2, bd); | 2192 x2 = WRAPLOW(s0 - s2, bd); |
| 2191 x3 = WRAPLOW(s1 - s3, bd); | 2193 x3 = WRAPLOW(s1 - s3, bd); |
| 2192 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); | 2194 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd); |
| 2193 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); | 2195 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd); |
| 2194 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); | 2196 x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd); |
| 2195 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); | 2197 x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd); |
| 2196 x8 = WRAPLOW(s8 + s10, bd); | 2198 x8 = WRAPLOW(s8 + s10, bd); |
| 2197 x9 = WRAPLOW(s9 + s11, bd); | 2199 x9 = WRAPLOW(s9 + s11, bd); |
| 2198 x10 = WRAPLOW(s8 - s10, bd); | 2200 x10 = WRAPLOW(s8 - s10, bd); |
| 2199 x11 = WRAPLOW(s9 - s11, bd); | 2201 x11 = WRAPLOW(s9 - s11, bd); |
| 2200 x12 = WRAPLOW(dct_const_round_shift(s12 + s14), bd); | 2202 x12 = WRAPLOW(highbd_dct_const_round_shift(s12 + s14, bd), bd); |
| 2201 x13 = WRAPLOW(dct_const_round_shift(s13 + s15), bd); | 2203 x13 = WRAPLOW(highbd_dct_const_round_shift(s13 + s15, bd), bd); |
| 2202 x14 = WRAPLOW(dct_const_round_shift(s12 - s14), bd); | 2204 x14 = WRAPLOW(highbd_dct_const_round_shift(s12 - s14, bd), bd); |
| 2203 x15 = WRAPLOW(dct_const_round_shift(s13 - s15), bd); | 2205 x15 = WRAPLOW(highbd_dct_const_round_shift(s13 - s15, bd), bd); |
| 2204 | 2206 |
| 2205 // stage 4 | 2207 // stage 4 |
| 2206 s2 = (- cospi_16_64) * (x2 + x3); | 2208 s2 = (- cospi_16_64) * (x2 + x3); |
| 2207 s3 = cospi_16_64 * (x2 - x3); | 2209 s3 = cospi_16_64 * (x2 - x3); |
| 2208 s6 = cospi_16_64 * (x6 + x7); | 2210 s6 = cospi_16_64 * (x6 + x7); |
| 2209 s7 = cospi_16_64 * (-x6 + x7); | 2211 s7 = cospi_16_64 * (-x6 + x7); |
| 2210 s10 = cospi_16_64 * (x10 + x11); | 2212 s10 = cospi_16_64 * (x10 + x11); |
| 2211 s11 = cospi_16_64 * (-x10 + x11); | 2213 s11 = cospi_16_64 * (-x10 + x11); |
| 2212 s14 = (- cospi_16_64) * (x14 + x15); | 2214 s14 = (- cospi_16_64) * (x14 + x15); |
| 2213 s15 = cospi_16_64 * (x14 - x15); | 2215 s15 = cospi_16_64 * (x14 - x15); |
| 2214 | 2216 |
| 2215 x2 = WRAPLOW(dct_const_round_shift(s2), bd); | 2217 x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd); |
| 2216 x3 = WRAPLOW(dct_const_round_shift(s3), bd); | 2218 x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd); |
| 2217 x6 = WRAPLOW(dct_const_round_shift(s6), bd); | 2219 x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd); |
| 2218 x7 = WRAPLOW(dct_const_round_shift(s7), bd); | 2220 x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd); |
| 2219 x10 = WRAPLOW(dct_const_round_shift(s10), bd); | 2221 x10 = WRAPLOW(highbd_dct_const_round_shift(s10, bd), bd); |
| 2220 x11 = WRAPLOW(dct_const_round_shift(s11), bd); | 2222 x11 = WRAPLOW(highbd_dct_const_round_shift(s11, bd), bd); |
| 2221 x14 = WRAPLOW(dct_const_round_shift(s14), bd); | 2223 x14 = WRAPLOW(highbd_dct_const_round_shift(s14, bd), bd); |
| 2222 x15 = WRAPLOW(dct_const_round_shift(s15), bd); | 2224 x15 = WRAPLOW(highbd_dct_const_round_shift(s15, bd), bd); |
| 2223 | 2225 |
| 2224 output[0] = WRAPLOW(x0, bd); | 2226 output[0] = WRAPLOW(x0, bd); |
| 2225 output[1] = WRAPLOW(-x8, bd); | 2227 output[1] = WRAPLOW(-x8, bd); |
| 2226 output[2] = WRAPLOW(x12, bd); | 2228 output[2] = WRAPLOW(x12, bd); |
| 2227 output[3] = WRAPLOW(-x4, bd); | 2229 output[3] = WRAPLOW(-x4, bd); |
| 2228 output[4] = WRAPLOW(x6, bd); | 2230 output[4] = WRAPLOW(x6, bd); |
| 2229 output[5] = WRAPLOW(x14, bd); | 2231 output[5] = WRAPLOW(x14, bd); |
| 2230 output[6] = WRAPLOW(x10, bd); | 2232 output[6] = WRAPLOW(x10, bd); |
| 2231 output[7] = WRAPLOW(x2, bd); | 2233 output[7] = WRAPLOW(x2, bd); |
| 2232 output[8] = WRAPLOW(x3, bd); | 2234 output[8] = WRAPLOW(x3, bd); |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2299 dest[j * stride + i] = highbd_clip_pixel_add( | 2301 dest[j * stride + i] = highbd_clip_pixel_add( |
| 2300 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); | 2302 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); |
| 2301 } | 2303 } |
| 2302 } | 2304 } |
| 2303 } | 2305 } |
| 2304 | 2306 |
| 2305 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2307 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 2306 int stride, int bd) { | 2308 int stride, int bd) { |
| 2307 int i, j; | 2309 int i, j; |
| 2308 tran_high_t a1; | 2310 tran_high_t a1; |
| 2309 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 2311 tran_low_t out = WRAPLOW( |
| 2312 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 2310 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2313 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 2311 | 2314 |
| 2312 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 2315 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 2313 a1 = ROUND_POWER_OF_TWO(out, 6); | 2316 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 2314 for (j = 0; j < 16; ++j) { | 2317 for (j = 0; j < 16; ++j) { |
| 2315 for (i = 0; i < 16; ++i) | 2318 for (i = 0; i < 16; ++i) |
| 2316 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2319 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 2317 dest += stride; | 2320 dest += stride; |
| 2318 } | 2321 } |
| 2319 } | 2322 } |
| 2320 | 2323 |
| 2321 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) { | 2324 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) { |
| 2322 tran_low_t step1[32], step2[32]; | 2325 tran_low_t step1[32], step2[32]; |
| (...skipping 13 matching lines...) Expand all Loading... |
| 2336 step1[9] = input[18]; | 2339 step1[9] = input[18]; |
| 2337 step1[10] = input[10]; | 2340 step1[10] = input[10]; |
| 2338 step1[11] = input[26]; | 2341 step1[11] = input[26]; |
| 2339 step1[12] = input[6]; | 2342 step1[12] = input[6]; |
| 2340 step1[13] = input[22]; | 2343 step1[13] = input[22]; |
| 2341 step1[14] = input[14]; | 2344 step1[14] = input[14]; |
| 2342 step1[15] = input[30]; | 2345 step1[15] = input[30]; |
| 2343 | 2346 |
| 2344 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; | 2347 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; |
| 2345 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; | 2348 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; |
| 2346 step1[16] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2349 step1[16] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2347 step1[31] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2350 step1[31] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2348 | 2351 |
| 2349 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; | 2352 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; |
| 2350 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; | 2353 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; |
| 2351 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2354 step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2352 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2355 step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2353 | 2356 |
| 2354 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; | 2357 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; |
| 2355 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; | 2358 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; |
| 2356 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2359 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2357 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2360 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2358 | 2361 |
| 2359 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; | 2362 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; |
| 2360 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; | 2363 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; |
| 2361 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2364 step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2362 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2365 step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2363 | 2366 |
| 2364 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; | 2367 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; |
| 2365 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; | 2368 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; |
| 2366 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2369 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2367 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2370 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2368 | 2371 |
| 2369 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; | 2372 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; |
| 2370 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; | 2373 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; |
| 2371 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2374 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2372 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2375 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2373 | 2376 |
| 2374 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; | 2377 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; |
| 2375 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; | 2378 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; |
| 2376 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2379 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2377 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2380 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2378 | 2381 |
| 2379 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; | 2382 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; |
| 2380 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; | 2383 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; |
| 2381 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2384 step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2382 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2385 step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2383 | 2386 |
| 2384 // stage 2 | 2387 // stage 2 |
| 2385 step2[0] = step1[0]; | 2388 step2[0] = step1[0]; |
| 2386 step2[1] = step1[1]; | 2389 step2[1] = step1[1]; |
| 2387 step2[2] = step1[2]; | 2390 step2[2] = step1[2]; |
| 2388 step2[3] = step1[3]; | 2391 step2[3] = step1[3]; |
| 2389 step2[4] = step1[4]; | 2392 step2[4] = step1[4]; |
| 2390 step2[5] = step1[5]; | 2393 step2[5] = step1[5]; |
| 2391 step2[6] = step1[6]; | 2394 step2[6] = step1[6]; |
| 2392 step2[7] = step1[7]; | 2395 step2[7] = step1[7]; |
| 2393 | 2396 |
| 2394 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; | 2397 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
| 2395 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; | 2398 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
| 2396 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2399 step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2397 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2400 step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2398 | 2401 |
| 2399 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; | 2402 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
| 2400 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; | 2403 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
| 2401 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2404 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2402 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2405 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2403 | 2406 |
| 2404 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; | 2407 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
| 2405 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; | 2408 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
| 2406 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2409 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2407 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2410 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2408 | 2411 |
| 2409 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; | 2412 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
| 2410 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; | 2413 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
| 2411 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2414 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2412 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2415 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2413 | 2416 |
| 2414 step2[16] = WRAPLOW(step1[16] + step1[17], bd); | 2417 step2[16] = WRAPLOW(step1[16] + step1[17], bd); |
| 2415 step2[17] = WRAPLOW(step1[16] - step1[17], bd); | 2418 step2[17] = WRAPLOW(step1[16] - step1[17], bd); |
| 2416 step2[18] = WRAPLOW(-step1[18] + step1[19], bd); | 2419 step2[18] = WRAPLOW(-step1[18] + step1[19], bd); |
| 2417 step2[19] = WRAPLOW(step1[18] + step1[19], bd); | 2420 step2[19] = WRAPLOW(step1[18] + step1[19], bd); |
| 2418 step2[20] = WRAPLOW(step1[20] + step1[21], bd); | 2421 step2[20] = WRAPLOW(step1[20] + step1[21], bd); |
| 2419 step2[21] = WRAPLOW(step1[20] - step1[21], bd); | 2422 step2[21] = WRAPLOW(step1[20] - step1[21], bd); |
| 2420 step2[22] = WRAPLOW(-step1[22] + step1[23], bd); | 2423 step2[22] = WRAPLOW(-step1[22] + step1[23], bd); |
| 2421 step2[23] = WRAPLOW(step1[22] + step1[23], bd); | 2424 step2[23] = WRAPLOW(step1[22] + step1[23], bd); |
| 2422 step2[24] = WRAPLOW(step1[24] + step1[25], bd); | 2425 step2[24] = WRAPLOW(step1[24] + step1[25], bd); |
| 2423 step2[25] = WRAPLOW(step1[24] - step1[25], bd); | 2426 step2[25] = WRAPLOW(step1[24] - step1[25], bd); |
| 2424 step2[26] = WRAPLOW(-step1[26] + step1[27], bd); | 2427 step2[26] = WRAPLOW(-step1[26] + step1[27], bd); |
| 2425 step2[27] = WRAPLOW(step1[26] + step1[27], bd); | 2428 step2[27] = WRAPLOW(step1[26] + step1[27], bd); |
| 2426 step2[28] = WRAPLOW(step1[28] + step1[29], bd); | 2429 step2[28] = WRAPLOW(step1[28] + step1[29], bd); |
| 2427 step2[29] = WRAPLOW(step1[28] - step1[29], bd); | 2430 step2[29] = WRAPLOW(step1[28] - step1[29], bd); |
| 2428 step2[30] = WRAPLOW(-step1[30] + step1[31], bd); | 2431 step2[30] = WRAPLOW(-step1[30] + step1[31], bd); |
| 2429 step2[31] = WRAPLOW(step1[30] + step1[31], bd); | 2432 step2[31] = WRAPLOW(step1[30] + step1[31], bd); |
| 2430 | 2433 |
| 2431 // stage 3 | 2434 // stage 3 |
| 2432 step1[0] = step2[0]; | 2435 step1[0] = step2[0]; |
| 2433 step1[1] = step2[1]; | 2436 step1[1] = step2[1]; |
| 2434 step1[2] = step2[2]; | 2437 step1[2] = step2[2]; |
| 2435 step1[3] = step2[3]; | 2438 step1[3] = step2[3]; |
| 2436 | 2439 |
| 2437 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; | 2440 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
| 2438 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; | 2441 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
| 2439 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2442 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2440 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2443 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2441 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; | 2444 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
| 2442 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; | 2445 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
| 2443 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2446 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2444 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2447 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2445 | 2448 |
| 2446 step1[8] = WRAPLOW(step2[8] + step2[9], bd); | 2449 step1[8] = WRAPLOW(step2[8] + step2[9], bd); |
| 2447 step1[9] = WRAPLOW(step2[8] - step2[9], bd); | 2450 step1[9] = WRAPLOW(step2[8] - step2[9], bd); |
| 2448 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); | 2451 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); |
| 2449 step1[11] = WRAPLOW(step2[10] + step2[11], bd); | 2452 step1[11] = WRAPLOW(step2[10] + step2[11], bd); |
| 2450 step1[12] = WRAPLOW(step2[12] + step2[13], bd); | 2453 step1[12] = WRAPLOW(step2[12] + step2[13], bd); |
| 2451 step1[13] = WRAPLOW(step2[12] - step2[13], bd); | 2454 step1[13] = WRAPLOW(step2[12] - step2[13], bd); |
| 2452 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); | 2455 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); |
| 2453 step1[15] = WRAPLOW(step2[14] + step2[15], bd); | 2456 step1[15] = WRAPLOW(step2[14] + step2[15], bd); |
| 2454 | 2457 |
| 2455 step1[16] = step2[16]; | 2458 step1[16] = step2[16]; |
| 2456 step1[31] = step2[31]; | 2459 step1[31] = step2[31]; |
| 2457 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; | 2460 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; |
| 2458 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; | 2461 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; |
| 2459 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2462 step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2460 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2463 step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2461 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; | 2464 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; |
| 2462 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; | 2465 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; |
| 2463 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2466 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2464 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2467 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2465 step1[19] = step2[19]; | 2468 step1[19] = step2[19]; |
| 2466 step1[20] = step2[20]; | 2469 step1[20] = step2[20]; |
| 2467 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; | 2470 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; |
| 2468 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; | 2471 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; |
| 2469 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2472 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2470 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2473 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2471 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; | 2474 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; |
| 2472 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; | 2475 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; |
| 2473 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2476 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2474 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2477 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2475 step1[23] = step2[23]; | 2478 step1[23] = step2[23]; |
| 2476 step1[24] = step2[24]; | 2479 step1[24] = step2[24]; |
| 2477 step1[27] = step2[27]; | 2480 step1[27] = step2[27]; |
| 2478 step1[28] = step2[28]; | 2481 step1[28] = step2[28]; |
| 2479 | 2482 |
| 2480 // stage 4 | 2483 // stage 4 |
| 2481 temp1 = (step1[0] + step1[1]) * cospi_16_64; | 2484 temp1 = (step1[0] + step1[1]) * cospi_16_64; |
| 2482 temp2 = (step1[0] - step1[1]) * cospi_16_64; | 2485 temp2 = (step1[0] - step1[1]) * cospi_16_64; |
| 2483 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2486 step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2484 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2487 step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2485 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; | 2488 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
| 2486 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; | 2489 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
| 2487 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2490 step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2488 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2491 step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2489 step2[4] = WRAPLOW(step1[4] + step1[5], bd); | 2492 step2[4] = WRAPLOW(step1[4] + step1[5], bd); |
| 2490 step2[5] = WRAPLOW(step1[4] - step1[5], bd); | 2493 step2[5] = WRAPLOW(step1[4] - step1[5], bd); |
| 2491 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); | 2494 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); |
| 2492 step2[7] = WRAPLOW(step1[6] + step1[7], bd); | 2495 step2[7] = WRAPLOW(step1[6] + step1[7], bd); |
| 2493 | 2496 |
| 2494 step2[8] = step1[8]; | 2497 step2[8] = step1[8]; |
| 2495 step2[15] = step1[15]; | 2498 step2[15] = step1[15]; |
| 2496 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; | 2499 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
| 2497 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; | 2500 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
| 2498 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2501 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2499 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2502 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2500 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; | 2503 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
| 2501 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; | 2504 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
| 2502 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2505 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2503 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2506 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2504 step2[11] = step1[11]; | 2507 step2[11] = step1[11]; |
| 2505 step2[12] = step1[12]; | 2508 step2[12] = step1[12]; |
| 2506 | 2509 |
| 2507 step2[16] = WRAPLOW(step1[16] + step1[19], bd); | 2510 step2[16] = WRAPLOW(step1[16] + step1[19], bd); |
| 2508 step2[17] = WRAPLOW(step1[17] + step1[18], bd); | 2511 step2[17] = WRAPLOW(step1[17] + step1[18], bd); |
| 2509 step2[18] = WRAPLOW(step1[17] - step1[18], bd); | 2512 step2[18] = WRAPLOW(step1[17] - step1[18], bd); |
| 2510 step2[19] = WRAPLOW(step1[16] - step1[19], bd); | 2513 step2[19] = WRAPLOW(step1[16] - step1[19], bd); |
| 2511 step2[20] = WRAPLOW(-step1[20] + step1[23], bd); | 2514 step2[20] = WRAPLOW(-step1[20] + step1[23], bd); |
| 2512 step2[21] = WRAPLOW(-step1[21] + step1[22], bd); | 2515 step2[21] = WRAPLOW(-step1[21] + step1[22], bd); |
| 2513 step2[22] = WRAPLOW(step1[21] + step1[22], bd); | 2516 step2[22] = WRAPLOW(step1[21] + step1[22], bd); |
| 2514 step2[23] = WRAPLOW(step1[20] + step1[23], bd); | 2517 step2[23] = WRAPLOW(step1[20] + step1[23], bd); |
| 2515 | 2518 |
| 2516 step2[24] = WRAPLOW(step1[24] + step1[27], bd); | 2519 step2[24] = WRAPLOW(step1[24] + step1[27], bd); |
| 2517 step2[25] = WRAPLOW(step1[25] + step1[26], bd); | 2520 step2[25] = WRAPLOW(step1[25] + step1[26], bd); |
| 2518 step2[26] = WRAPLOW(step1[25] - step1[26], bd); | 2521 step2[26] = WRAPLOW(step1[25] - step1[26], bd); |
| 2519 step2[27] = WRAPLOW(step1[24] - step1[27], bd); | 2522 step2[27] = WRAPLOW(step1[24] - step1[27], bd); |
| 2520 step2[28] = WRAPLOW(-step1[28] + step1[31], bd); | 2523 step2[28] = WRAPLOW(-step1[28] + step1[31], bd); |
| 2521 step2[29] = WRAPLOW(-step1[29] + step1[30], bd); | 2524 step2[29] = WRAPLOW(-step1[29] + step1[30], bd); |
| 2522 step2[30] = WRAPLOW(step1[29] + step1[30], bd); | 2525 step2[30] = WRAPLOW(step1[29] + step1[30], bd); |
| 2523 step2[31] = WRAPLOW(step1[28] + step1[31], bd); | 2526 step2[31] = WRAPLOW(step1[28] + step1[31], bd); |
| 2524 | 2527 |
| 2525 // stage 5 | 2528 // stage 5 |
| 2526 step1[0] = WRAPLOW(step2[0] + step2[3], bd); | 2529 step1[0] = WRAPLOW(step2[0] + step2[3], bd); |
| 2527 step1[1] = WRAPLOW(step2[1] + step2[2], bd); | 2530 step1[1] = WRAPLOW(step2[1] + step2[2], bd); |
| 2528 step1[2] = WRAPLOW(step2[1] - step2[2], bd); | 2531 step1[2] = WRAPLOW(step2[1] - step2[2], bd); |
| 2529 step1[3] = WRAPLOW(step2[0] - step2[3], bd); | 2532 step1[3] = WRAPLOW(step2[0] - step2[3], bd); |
| 2530 step1[4] = step2[4]; | 2533 step1[4] = step2[4]; |
| 2531 temp1 = (step2[6] - step2[5]) * cospi_16_64; | 2534 temp1 = (step2[6] - step2[5]) * cospi_16_64; |
| 2532 temp2 = (step2[5] + step2[6]) * cospi_16_64; | 2535 temp2 = (step2[5] + step2[6]) * cospi_16_64; |
| 2533 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2536 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2534 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2537 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2535 step1[7] = step2[7]; | 2538 step1[7] = step2[7]; |
| 2536 | 2539 |
| 2537 step1[8] = WRAPLOW(step2[8] + step2[11], bd); | 2540 step1[8] = WRAPLOW(step2[8] + step2[11], bd); |
| 2538 step1[9] = WRAPLOW(step2[9] + step2[10], bd); | 2541 step1[9] = WRAPLOW(step2[9] + step2[10], bd); |
| 2539 step1[10] = WRAPLOW(step2[9] - step2[10], bd); | 2542 step1[10] = WRAPLOW(step2[9] - step2[10], bd); |
| 2540 step1[11] = WRAPLOW(step2[8] - step2[11], bd); | 2543 step1[11] = WRAPLOW(step2[8] - step2[11], bd); |
| 2541 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); | 2544 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); |
| 2542 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); | 2545 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); |
| 2543 step1[14] = WRAPLOW(step2[13] + step2[14], bd); | 2546 step1[14] = WRAPLOW(step2[13] + step2[14], bd); |
| 2544 step1[15] = WRAPLOW(step2[12] + step2[15], bd); | 2547 step1[15] = WRAPLOW(step2[12] + step2[15], bd); |
| 2545 | 2548 |
| 2546 step1[16] = step2[16]; | 2549 step1[16] = step2[16]; |
| 2547 step1[17] = step2[17]; | 2550 step1[17] = step2[17]; |
| 2548 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; | 2551 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; |
| 2549 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; | 2552 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; |
| 2550 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2553 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2551 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2554 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2552 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; | 2555 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; |
| 2553 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; | 2556 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; |
| 2554 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2557 step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2555 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2558 step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2556 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; | 2559 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; |
| 2557 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; | 2560 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; |
| 2558 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2561 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2559 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2562 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2560 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; | 2563 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; |
| 2561 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; | 2564 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; |
| 2562 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2565 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2563 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2566 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2564 step1[22] = step2[22]; | 2567 step1[22] = step2[22]; |
| 2565 step1[23] = step2[23]; | 2568 step1[23] = step2[23]; |
| 2566 step1[24] = step2[24]; | 2569 step1[24] = step2[24]; |
| 2567 step1[25] = step2[25]; | 2570 step1[25] = step2[25]; |
| 2568 step1[30] = step2[30]; | 2571 step1[30] = step2[30]; |
| 2569 step1[31] = step2[31]; | 2572 step1[31] = step2[31]; |
| 2570 | 2573 |
| 2571 // stage 6 | 2574 // stage 6 |
| 2572 step2[0] = WRAPLOW(step1[0] + step1[7], bd); | 2575 step2[0] = WRAPLOW(step1[0] + step1[7], bd); |
| 2573 step2[1] = WRAPLOW(step1[1] + step1[6], bd); | 2576 step2[1] = WRAPLOW(step1[1] + step1[6], bd); |
| 2574 step2[2] = WRAPLOW(step1[2] + step1[5], bd); | 2577 step2[2] = WRAPLOW(step1[2] + step1[5], bd); |
| 2575 step2[3] = WRAPLOW(step1[3] + step1[4], bd); | 2578 step2[3] = WRAPLOW(step1[3] + step1[4], bd); |
| 2576 step2[4] = WRAPLOW(step1[3] - step1[4], bd); | 2579 step2[4] = WRAPLOW(step1[3] - step1[4], bd); |
| 2577 step2[5] = WRAPLOW(step1[2] - step1[5], bd); | 2580 step2[5] = WRAPLOW(step1[2] - step1[5], bd); |
| 2578 step2[6] = WRAPLOW(step1[1] - step1[6], bd); | 2581 step2[6] = WRAPLOW(step1[1] - step1[6], bd); |
| 2579 step2[7] = WRAPLOW(step1[0] - step1[7], bd); | 2582 step2[7] = WRAPLOW(step1[0] - step1[7], bd); |
| 2580 step2[8] = step1[8]; | 2583 step2[8] = step1[8]; |
| 2581 step2[9] = step1[9]; | 2584 step2[9] = step1[9]; |
| 2582 temp1 = (-step1[10] + step1[13]) * cospi_16_64; | 2585 temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
| 2583 temp2 = (step1[10] + step1[13]) * cospi_16_64; | 2586 temp2 = (step1[10] + step1[13]) * cospi_16_64; |
| 2584 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2587 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2585 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2588 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2586 temp1 = (-step1[11] + step1[12]) * cospi_16_64; | 2589 temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
| 2587 temp2 = (step1[11] + step1[12]) * cospi_16_64; | 2590 temp2 = (step1[11] + step1[12]) * cospi_16_64; |
| 2588 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2591 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2589 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2592 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2590 step2[14] = step1[14]; | 2593 step2[14] = step1[14]; |
| 2591 step2[15] = step1[15]; | 2594 step2[15] = step1[15]; |
| 2592 | 2595 |
| 2593 step2[16] = WRAPLOW(step1[16] + step1[23], bd); | 2596 step2[16] = WRAPLOW(step1[16] + step1[23], bd); |
| 2594 step2[17] = WRAPLOW(step1[17] + step1[22], bd); | 2597 step2[17] = WRAPLOW(step1[17] + step1[22], bd); |
| 2595 step2[18] = WRAPLOW(step1[18] + step1[21], bd); | 2598 step2[18] = WRAPLOW(step1[18] + step1[21], bd); |
| 2596 step2[19] = WRAPLOW(step1[19] + step1[20], bd); | 2599 step2[19] = WRAPLOW(step1[19] + step1[20], bd); |
| 2597 step2[20] = WRAPLOW(step1[19] - step1[20], bd); | 2600 step2[20] = WRAPLOW(step1[19] - step1[20], bd); |
| 2598 step2[21] = WRAPLOW(step1[18] - step1[21], bd); | 2601 step2[21] = WRAPLOW(step1[18] - step1[21], bd); |
| 2599 step2[22] = WRAPLOW(step1[17] - step1[22], bd); | 2602 step2[22] = WRAPLOW(step1[17] - step1[22], bd); |
| (...skipping 25 matching lines...) Expand all Loading... |
| 2625 step1[13] = WRAPLOW(step2[2] - step2[13], bd); | 2628 step1[13] = WRAPLOW(step2[2] - step2[13], bd); |
| 2626 step1[14] = WRAPLOW(step2[1] - step2[14], bd); | 2629 step1[14] = WRAPLOW(step2[1] - step2[14], bd); |
| 2627 step1[15] = WRAPLOW(step2[0] - step2[15], bd); | 2630 step1[15] = WRAPLOW(step2[0] - step2[15], bd); |
| 2628 | 2631 |
| 2629 step1[16] = step2[16]; | 2632 step1[16] = step2[16]; |
| 2630 step1[17] = step2[17]; | 2633 step1[17] = step2[17]; |
| 2631 step1[18] = step2[18]; | 2634 step1[18] = step2[18]; |
| 2632 step1[19] = step2[19]; | 2635 step1[19] = step2[19]; |
| 2633 temp1 = (-step2[20] + step2[27]) * cospi_16_64; | 2636 temp1 = (-step2[20] + step2[27]) * cospi_16_64; |
| 2634 temp2 = (step2[20] + step2[27]) * cospi_16_64; | 2637 temp2 = (step2[20] + step2[27]) * cospi_16_64; |
| 2635 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2638 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2636 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2639 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2637 temp1 = (-step2[21] + step2[26]) * cospi_16_64; | 2640 temp1 = (-step2[21] + step2[26]) * cospi_16_64; |
| 2638 temp2 = (step2[21] + step2[26]) * cospi_16_64; | 2641 temp2 = (step2[21] + step2[26]) * cospi_16_64; |
| 2639 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2642 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2640 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2643 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2641 temp1 = (-step2[22] + step2[25]) * cospi_16_64; | 2644 temp1 = (-step2[22] + step2[25]) * cospi_16_64; |
| 2642 temp2 = (step2[22] + step2[25]) * cospi_16_64; | 2645 temp2 = (step2[22] + step2[25]) * cospi_16_64; |
| 2643 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2646 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2644 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2647 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2645 temp1 = (-step2[23] + step2[24]) * cospi_16_64; | 2648 temp1 = (-step2[23] + step2[24]) * cospi_16_64; |
| 2646 temp2 = (step2[23] + step2[24]) * cospi_16_64; | 2649 temp2 = (step2[23] + step2[24]) * cospi_16_64; |
| 2647 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); | 2650 step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd); |
| 2648 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); | 2651 step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd); |
| 2649 step1[28] = step2[28]; | 2652 step1[28] = step2[28]; |
| 2650 step1[29] = step2[29]; | 2653 step1[29] = step2[29]; |
| 2651 step1[30] = step2[30]; | 2654 step1[30] = step2[30]; |
| 2652 step1[31] = step2[31]; | 2655 step1[31] = step2[31]; |
| 2653 | 2656 |
| 2654 // final stage | 2657 // final stage |
| 2655 output[0] = WRAPLOW(step1[0] + step1[31], bd); | 2658 output[0] = WRAPLOW(step1[0] + step1[31], bd); |
| 2656 output[1] = WRAPLOW(step1[1] + step1[30], bd); | 2659 output[1] = WRAPLOW(step1[1] + step1[30], bd); |
| 2657 output[2] = WRAPLOW(step1[2] + step1[29], bd); | 2660 output[2] = WRAPLOW(step1[2] + step1[29], bd); |
| 2658 output[3] = WRAPLOW(step1[3] + step1[28], bd); | 2661 output[3] = WRAPLOW(step1[3] + step1[28], bd); |
| (...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2752 } | 2755 } |
| 2753 } | 2756 } |
| 2754 } | 2757 } |
| 2755 | 2758 |
| 2756 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, | 2759 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, |
| 2757 int stride, int bd) { | 2760 int stride, int bd) { |
| 2758 int i, j; | 2761 int i, j; |
| 2759 int a1; | 2762 int a1; |
| 2760 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); | 2763 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); |
| 2761 | 2764 |
| 2762 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); | 2765 tran_low_t out = WRAPLOW( |
| 2763 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); | 2766 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd); |
| 2767 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd); |
| 2764 a1 = ROUND_POWER_OF_TWO(out, 6); | 2768 a1 = ROUND_POWER_OF_TWO(out, 6); |
| 2765 | 2769 |
| 2766 for (j = 0; j < 32; ++j) { | 2770 for (j = 0; j < 32; ++j) { |
| 2767 for (i = 0; i < 32; ++i) | 2771 for (i = 0; i < 32; ++i) |
| 2768 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); | 2772 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); |
| 2769 dest += stride; | 2773 dest += stride; |
| 2770 } | 2774 } |
| 2771 } | 2775 } |
| 2772 | 2776 |
| 2773 // idct | 2777 // idct |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2853 | 2857 |
| 2854 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, | 2858 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, |
| 2855 uint8_t *dest, int stride, int eob, int bd) { | 2859 uint8_t *dest, int stride, int eob, int bd) { |
| 2856 if (tx_type == DCT_DCT) { | 2860 if (tx_type == DCT_DCT) { |
| 2857 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); | 2861 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); |
| 2858 } else { | 2862 } else { |
| 2859 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); | 2863 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); |
| 2860 } | 2864 } |
| 2861 } | 2865 } |
| 2862 #endif // CONFIG_VP9_HIGHBITDEPTH | 2866 #endif // CONFIG_VP9_HIGHBITDEPTH |
| OLD | NEW |