Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(478)

Side by Side Diff: source/libvpx/vp9/common/vp9_idct.c

Issue 812033011: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « source/libvpx/vp9/common/vp9_idct.h ('k') | source/libvpx/vp9/common/vp9_loopfilter.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 1499 matching lines...) Expand 10 before | Expand all | Expand 10 after
1510 } 1510 }
1511 } 1511 }
1512 1512
1513 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) { 1513 void vp9_highbd_idct4(const tran_low_t *input, tran_low_t *output, int bd) {
1514 tran_low_t step[4]; 1514 tran_low_t step[4];
1515 tran_high_t temp1, temp2; 1515 tran_high_t temp1, temp2;
1516 (void) bd; 1516 (void) bd;
1517 // stage 1 1517 // stage 1
1518 temp1 = (input[0] + input[2]) * cospi_16_64; 1518 temp1 = (input[0] + input[2]) * cospi_16_64;
1519 temp2 = (input[0] - input[2]) * cospi_16_64; 1519 temp2 = (input[0] - input[2]) * cospi_16_64;
1520 step[0] = WRAPLOW(dct_const_round_shift(temp1), bd); 1520 step[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1521 step[1] = WRAPLOW(dct_const_round_shift(temp2), bd); 1521 step[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1522 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; 1522 temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64;
1523 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; 1523 temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64;
1524 step[2] = WRAPLOW(dct_const_round_shift(temp1), bd); 1524 step[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1525 step[3] = WRAPLOW(dct_const_round_shift(temp2), bd); 1525 step[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1526 1526
1527 // stage 2 1527 // stage 2
1528 output[0] = WRAPLOW(step[0] + step[3], bd); 1528 output[0] = WRAPLOW(step[0] + step[3], bd);
1529 output[1] = WRAPLOW(step[1] + step[2], bd); 1529 output[1] = WRAPLOW(step[1] + step[2], bd);
1530 output[2] = WRAPLOW(step[1] - step[2], bd); 1530 output[2] = WRAPLOW(step[1] - step[2], bd);
1531 output[3] = WRAPLOW(step[0] - step[3], bd); 1531 output[3] = WRAPLOW(step[0] - step[3], bd);
1532 } 1532 }
1533 1533
1534 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, 1534 void vp9_highbd_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1535 int stride, int bd) { 1535 int stride, int bd) {
(...skipping 19 matching lines...) Expand all
1555 dest[j * stride + i] = highbd_clip_pixel_add( 1555 dest[j * stride + i] = highbd_clip_pixel_add(
1556 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); 1556 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
1557 } 1557 }
1558 } 1558 }
1559 } 1559 }
1560 1560
1561 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8, 1561 void vp9_highbd_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest8,
1562 int dest_stride, int bd) { 1562 int dest_stride, int bd) {
1563 int i; 1563 int i;
1564 tran_high_t a1; 1564 tran_high_t a1;
1565 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); 1565 tran_low_t out = WRAPLOW(
1566 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
1566 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1567 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1567 1568
1568 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); 1569 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
1569 a1 = ROUND_POWER_OF_TWO(out, 4); 1570 a1 = ROUND_POWER_OF_TWO(out, 4);
1570 1571
1571 for (i = 0; i < 4; i++) { 1572 for (i = 0; i < 4; i++) {
1572 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); 1573 dest[0] = highbd_clip_pixel_add(dest[0], a1, bd);
1573 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); 1574 dest[1] = highbd_clip_pixel_add(dest[1], a1, bd);
1574 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); 1575 dest[2] = highbd_clip_pixel_add(dest[2], a1, bd);
1575 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); 1576 dest[3] = highbd_clip_pixel_add(dest[3], a1, bd);
1576 dest += dest_stride; 1577 dest += dest_stride;
1577 } 1578 }
1578 } 1579 }
1579 1580
1580 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) { 1581 void vp9_highbd_idct8(const tran_low_t *input, tran_low_t *output, int bd) {
1581 tran_low_t step1[8], step2[8]; 1582 tran_low_t step1[8], step2[8];
1582 tran_high_t temp1, temp2; 1583 tran_high_t temp1, temp2;
1583 // stage 1 1584 // stage 1
1584 step1[0] = input[0]; 1585 step1[0] = input[0];
1585 step1[2] = input[4]; 1586 step1[2] = input[4];
1586 step1[1] = input[2]; 1587 step1[1] = input[2];
1587 step1[3] = input[6]; 1588 step1[3] = input[6];
1588 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; 1589 temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64;
1589 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; 1590 temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64;
1590 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); 1591 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1591 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); 1592 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1592 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; 1593 temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64;
1593 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; 1594 temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64;
1594 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); 1595 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1595 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); 1596 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1596 1597
1597 // stage 2 & stage 3 - even half 1598 // stage 2 & stage 3 - even half
1598 vp9_highbd_idct4(step1, step1, bd); 1599 vp9_highbd_idct4(step1, step1, bd);
1599 1600
1600 // stage 2 - odd half 1601 // stage 2 - odd half
1601 step2[4] = WRAPLOW(step1[4] + step1[5], bd); 1602 step2[4] = WRAPLOW(step1[4] + step1[5], bd);
1602 step2[5] = WRAPLOW(step1[4] - step1[5], bd); 1603 step2[5] = WRAPLOW(step1[4] - step1[5], bd);
1603 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); 1604 step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
1604 step2[7] = WRAPLOW(step1[6] + step1[7], bd); 1605 step2[7] = WRAPLOW(step1[6] + step1[7], bd);
1605 1606
1606 // stage 3 - odd half 1607 // stage 3 - odd half
1607 step1[4] = step2[4]; 1608 step1[4] = step2[4];
1608 temp1 = (step2[6] - step2[5]) * cospi_16_64; 1609 temp1 = (step2[6] - step2[5]) * cospi_16_64;
1609 temp2 = (step2[5] + step2[6]) * cospi_16_64; 1610 temp2 = (step2[5] + step2[6]) * cospi_16_64;
1610 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); 1611 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1611 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); 1612 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1612 step1[7] = step2[7]; 1613 step1[7] = step2[7];
1613 1614
1614 // stage 4 1615 // stage 4
1615 output[0] = WRAPLOW(step1[0] + step1[7], bd); 1616 output[0] = WRAPLOW(step1[0] + step1[7], bd);
1616 output[1] = WRAPLOW(step1[1] + step1[6], bd); 1617 output[1] = WRAPLOW(step1[1] + step1[6], bd);
1617 output[2] = WRAPLOW(step1[2] + step1[5], bd); 1618 output[2] = WRAPLOW(step1[2] + step1[5], bd);
1618 output[3] = WRAPLOW(step1[3] + step1[4], bd); 1619 output[3] = WRAPLOW(step1[3] + step1[4], bd);
1619 output[4] = WRAPLOW(step1[3] - step1[4], bd); 1620 output[4] = WRAPLOW(step1[3] - step1[4], bd);
1620 output[5] = WRAPLOW(step1[2] - step1[5], bd); 1621 output[5] = WRAPLOW(step1[2] - step1[5], bd);
1621 output[6] = WRAPLOW(step1[1] - step1[6], bd); 1622 output[6] = WRAPLOW(step1[1] - step1[6], bd);
(...skipping 24 matching lines...) Expand all
1646 dest[j * stride + i] = highbd_clip_pixel_add( 1647 dest[j * stride + i] = highbd_clip_pixel_add(
1647 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); 1648 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
1648 } 1649 }
1649 } 1650 }
1650 } 1651 }
1651 1652
1652 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8, 1653 void vp9_highbd_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest8,
1653 int stride, int bd) { 1654 int stride, int bd) {
1654 int i, j; 1655 int i, j;
1655 tran_high_t a1; 1656 tran_high_t a1;
1656 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); 1657 tran_low_t out = WRAPLOW(
1658 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
1657 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 1659 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
1658 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); 1660 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
1659 a1 = ROUND_POWER_OF_TWO(out, 5); 1661 a1 = ROUND_POWER_OF_TWO(out, 5);
1660 for (j = 0; j < 8; ++j) { 1662 for (j = 0; j < 8; ++j) {
1661 for (i = 0; i < 8; ++i) 1663 for (i = 0; i < 8; ++i)
1662 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); 1664 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
1663 dest += stride; 1665 dest += stride;
1664 } 1666 }
1665 } 1667 }
1666 1668
1667 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) { 1669 static void highbd_iadst4(const tran_low_t *input, tran_low_t *output, int bd) {
1668 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; 1670 tran_high_t s0, s1, s2, s3, s4, s5, s6, s7;
(...skipping 20 matching lines...) Expand all
1689 1691
1690 s0 = s0 + s3 + s5; 1692 s0 = s0 + s3 + s5;
1691 s1 = s1 - s4 - s6; 1693 s1 = s1 - s4 - s6;
1692 s3 = s2; 1694 s3 = s2;
1693 s2 = sinpi_3_9 * s7; 1695 s2 = sinpi_3_9 * s7;
1694 1696
1695 // 1-D transform scaling factor is sqrt(2). 1697 // 1-D transform scaling factor is sqrt(2).
1696 // The overall dynamic range is 14b (input) + 14b (multiplication scaling) 1698 // The overall dynamic range is 14b (input) + 14b (multiplication scaling)
1697 // + 1b (addition) = 29b. 1699 // + 1b (addition) = 29b.
1698 // Hence the output bit depth is 15b. 1700 // Hence the output bit depth is 15b.
1699 output[0] = WRAPLOW(dct_const_round_shift(s0 + s3), bd); 1701 output[0] = WRAPLOW(highbd_dct_const_round_shift(s0 + s3, bd), bd);
1700 output[1] = WRAPLOW(dct_const_round_shift(s1 + s3), bd); 1702 output[1] = WRAPLOW(highbd_dct_const_round_shift(s1 + s3, bd), bd);
1701 output[2] = WRAPLOW(dct_const_round_shift(s2), bd); 1703 output[2] = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
1702 output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); 1704 output[3] = WRAPLOW(highbd_dct_const_round_shift(s0 + s1 - s3, bd), bd);
1703 } 1705 }
1704 1706
1705 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, 1707 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
1706 int stride, int tx_type, int bd) { 1708 int stride, int tx_type, int bd) {
1707 const highbd_transform_2d IHT_4[] = { 1709 const highbd_transform_2d IHT_4[] = {
1708 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0 1710 { vp9_highbd_idct4, vp9_highbd_idct4 }, // DCT_DCT = 0
1709 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1 1711 { highbd_iadst4, vp9_highbd_idct4 }, // ADST_DCT = 1
1710 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2 1712 { vp9_highbd_idct4, highbd_iadst4 }, // DCT_ADST = 2
1711 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3 1713 { highbd_iadst4, highbd_iadst4 } // ADST_ADST = 3
1712 }; 1714 };
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
1757 // stage 1 1759 // stage 1
1758 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; 1760 s0 = cospi_2_64 * x0 + cospi_30_64 * x1;
1759 s1 = cospi_30_64 * x0 - cospi_2_64 * x1; 1761 s1 = cospi_30_64 * x0 - cospi_2_64 * x1;
1760 s2 = cospi_10_64 * x2 + cospi_22_64 * x3; 1762 s2 = cospi_10_64 * x2 + cospi_22_64 * x3;
1761 s3 = cospi_22_64 * x2 - cospi_10_64 * x3; 1763 s3 = cospi_22_64 * x2 - cospi_10_64 * x3;
1762 s4 = cospi_18_64 * x4 + cospi_14_64 * x5; 1764 s4 = cospi_18_64 * x4 + cospi_14_64 * x5;
1763 s5 = cospi_14_64 * x4 - cospi_18_64 * x5; 1765 s5 = cospi_14_64 * x4 - cospi_18_64 * x5;
1764 s6 = cospi_26_64 * x6 + cospi_6_64 * x7; 1766 s6 = cospi_26_64 * x6 + cospi_6_64 * x7;
1765 s7 = cospi_6_64 * x6 - cospi_26_64 * x7; 1767 s7 = cospi_6_64 * x6 - cospi_26_64 * x7;
1766 1768
1767 x0 = WRAPLOW(dct_const_round_shift(s0 + s4), bd); 1769 x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s4, bd), bd);
1768 x1 = WRAPLOW(dct_const_round_shift(s1 + s5), bd); 1770 x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s5, bd), bd);
1769 x2 = WRAPLOW(dct_const_round_shift(s2 + s6), bd); 1771 x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s6, bd), bd);
1770 x3 = WRAPLOW(dct_const_round_shift(s3 + s7), bd); 1772 x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s7, bd), bd);
1771 x4 = WRAPLOW(dct_const_round_shift(s0 - s4), bd); 1773 x4 = WRAPLOW(highbd_dct_const_round_shift(s0 - s4, bd), bd);
1772 x5 = WRAPLOW(dct_const_round_shift(s1 - s5), bd); 1774 x5 = WRAPLOW(highbd_dct_const_round_shift(s1 - s5, bd), bd);
1773 x6 = WRAPLOW(dct_const_round_shift(s2 - s6), bd); 1775 x6 = WRAPLOW(highbd_dct_const_round_shift(s2 - s6, bd), bd);
1774 x7 = WRAPLOW(dct_const_round_shift(s3 - s7), bd); 1776 x7 = WRAPLOW(highbd_dct_const_round_shift(s3 - s7, bd), bd);
1775 1777
1776 // stage 2 1778 // stage 2
1777 s0 = x0; 1779 s0 = x0;
1778 s1 = x1; 1780 s1 = x1;
1779 s2 = x2; 1781 s2 = x2;
1780 s3 = x3; 1782 s3 = x3;
1781 s4 = cospi_8_64 * x4 + cospi_24_64 * x5; 1783 s4 = cospi_8_64 * x4 + cospi_24_64 * x5;
1782 s5 = cospi_24_64 * x4 - cospi_8_64 * x5; 1784 s5 = cospi_24_64 * x4 - cospi_8_64 * x5;
1783 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; 1785 s6 = -cospi_24_64 * x6 + cospi_8_64 * x7;
1784 s7 = cospi_8_64 * x6 + cospi_24_64 * x7; 1786 s7 = cospi_8_64 * x6 + cospi_24_64 * x7;
1785 1787
1786 x0 = WRAPLOW(s0 + s2, bd); 1788 x0 = WRAPLOW(s0 + s2, bd);
1787 x1 = WRAPLOW(s1 + s3, bd); 1789 x1 = WRAPLOW(s1 + s3, bd);
1788 x2 = WRAPLOW(s0 - s2, bd); 1790 x2 = WRAPLOW(s0 - s2, bd);
1789 x3 = WRAPLOW(s1 - s3, bd); 1791 x3 = WRAPLOW(s1 - s3, bd);
1790 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); 1792 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd);
1791 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); 1793 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd);
1792 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); 1794 x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd);
1793 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); 1795 x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd);
1794 1796
1795 // stage 3 1797 // stage 3
1796 s2 = cospi_16_64 * (x2 + x3); 1798 s2 = cospi_16_64 * (x2 + x3);
1797 s3 = cospi_16_64 * (x2 - x3); 1799 s3 = cospi_16_64 * (x2 - x3);
1798 s6 = cospi_16_64 * (x6 + x7); 1800 s6 = cospi_16_64 * (x6 + x7);
1799 s7 = cospi_16_64 * (x6 - x7); 1801 s7 = cospi_16_64 * (x6 - x7);
1800 1802
1801 x2 = WRAPLOW(dct_const_round_shift(s2), bd); 1803 x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
1802 x3 = WRAPLOW(dct_const_round_shift(s3), bd); 1804 x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd);
1803 x6 = WRAPLOW(dct_const_round_shift(s6), bd); 1805 x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd);
1804 x7 = WRAPLOW(dct_const_round_shift(s7), bd); 1806 x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd);
1805 1807
1806 output[0] = WRAPLOW(x0, bd); 1808 output[0] = WRAPLOW(x0, bd);
1807 output[1] = WRAPLOW(-x4, bd); 1809 output[1] = WRAPLOW(-x4, bd);
1808 output[2] = WRAPLOW(x6, bd); 1810 output[2] = WRAPLOW(x6, bd);
1809 output[3] = WRAPLOW(-x2, bd); 1811 output[3] = WRAPLOW(-x2, bd);
1810 output[4] = WRAPLOW(x3, bd); 1812 output[4] = WRAPLOW(x3, bd);
1811 output[5] = WRAPLOW(-x7, bd); 1813 output[5] = WRAPLOW(-x7, bd);
1812 output[6] = WRAPLOW(x5, bd); 1814 output[6] = WRAPLOW(x5, bd);
1813 output[7] = WRAPLOW(-x1, bd); 1815 output[7] = WRAPLOW(-x1, bd);
1814 } 1816 }
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
1903 step2[1] = step1[1]; 1905 step2[1] = step1[1];
1904 step2[2] = step1[2]; 1906 step2[2] = step1[2];
1905 step2[3] = step1[3]; 1907 step2[3] = step1[3];
1906 step2[4] = step1[4]; 1908 step2[4] = step1[4];
1907 step2[5] = step1[5]; 1909 step2[5] = step1[5];
1908 step2[6] = step1[6]; 1910 step2[6] = step1[6];
1909 step2[7] = step1[7]; 1911 step2[7] = step1[7];
1910 1912
1911 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; 1913 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
1912 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; 1914 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
1913 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); 1915 step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1914 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); 1916 step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1915 1917
1916 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; 1918 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
1917 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; 1919 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
1918 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); 1920 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1919 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); 1921 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1920 1922
1921 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; 1923 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
1922 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; 1924 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
1923 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); 1925 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1924 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); 1926 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1925 1927
1926 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; 1928 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
1927 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; 1929 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
1928 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); 1930 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1929 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); 1931 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1930 1932
1931 // stage 3 1933 // stage 3
1932 step1[0] = step2[0]; 1934 step1[0] = step2[0];
1933 step1[1] = step2[1]; 1935 step1[1] = step2[1];
1934 step1[2] = step2[2]; 1936 step1[2] = step2[2];
1935 step1[3] = step2[3]; 1937 step1[3] = step2[3];
1936 1938
1937 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; 1939 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
1938 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; 1940 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
1939 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); 1941 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1940 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); 1942 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1941 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; 1943 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
1942 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; 1944 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
1943 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); 1945 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1944 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); 1946 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1945 1947
1946 step1[8] = WRAPLOW(step2[8] + step2[9], bd); 1948 step1[8] = WRAPLOW(step2[8] + step2[9], bd);
1947 step1[9] = WRAPLOW(step2[8] - step2[9], bd); 1949 step1[9] = WRAPLOW(step2[8] - step2[9], bd);
1948 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); 1950 step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
1949 step1[11] = WRAPLOW(step2[10] + step2[11], bd); 1951 step1[11] = WRAPLOW(step2[10] + step2[11], bd);
1950 step1[12] = WRAPLOW(step2[12] + step2[13], bd); 1952 step1[12] = WRAPLOW(step2[12] + step2[13], bd);
1951 step1[13] = WRAPLOW(step2[12] - step2[13], bd); 1953 step1[13] = WRAPLOW(step2[12] - step2[13], bd);
1952 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); 1954 step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
1953 step1[15] = WRAPLOW(step2[14] + step2[15], bd); 1955 step1[15] = WRAPLOW(step2[14] + step2[15], bd);
1954 1956
1955 // stage 4 1957 // stage 4
1956 temp1 = (step1[0] + step1[1]) * cospi_16_64; 1958 temp1 = (step1[0] + step1[1]) * cospi_16_64;
1957 temp2 = (step1[0] - step1[1]) * cospi_16_64; 1959 temp2 = (step1[0] - step1[1]) * cospi_16_64;
1958 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); 1960 step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1959 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); 1961 step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1960 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; 1962 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
1961 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; 1963 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
1962 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); 1964 step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1963 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); 1965 step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1964 step2[4] = WRAPLOW(step1[4] + step1[5], bd); 1966 step2[4] = WRAPLOW(step1[4] + step1[5], bd);
1965 step2[5] = WRAPLOW(step1[4] - step1[5], bd); 1967 step2[5] = WRAPLOW(step1[4] - step1[5], bd);
1966 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); 1968 step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
1967 step2[7] = WRAPLOW(step1[6] + step1[7], bd); 1969 step2[7] = WRAPLOW(step1[6] + step1[7], bd);
1968 1970
1969 step2[8] = step1[8]; 1971 step2[8] = step1[8];
1970 step2[15] = step1[15]; 1972 step2[15] = step1[15];
1971 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; 1973 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
1972 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; 1974 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
1973 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); 1975 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1974 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); 1976 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1975 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; 1977 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
1976 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; 1978 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
1977 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); 1979 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1978 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); 1980 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1979 step2[11] = step1[11]; 1981 step2[11] = step1[11];
1980 step2[12] = step1[12]; 1982 step2[12] = step1[12];
1981 1983
1982 // stage 5 1984 // stage 5
1983 step1[0] = WRAPLOW(step2[0] + step2[3], bd); 1985 step1[0] = WRAPLOW(step2[0] + step2[3], bd);
1984 step1[1] = WRAPLOW(step2[1] + step2[2], bd); 1986 step1[1] = WRAPLOW(step2[1] + step2[2], bd);
1985 step1[2] = WRAPLOW(step2[1] - step2[2], bd); 1987 step1[2] = WRAPLOW(step2[1] - step2[2], bd);
1986 step1[3] = WRAPLOW(step2[0] - step2[3], bd); 1988 step1[3] = WRAPLOW(step2[0] - step2[3], bd);
1987 step1[4] = step2[4]; 1989 step1[4] = step2[4];
1988 temp1 = (step2[6] - step2[5]) * cospi_16_64; 1990 temp1 = (step2[6] - step2[5]) * cospi_16_64;
1989 temp2 = (step2[5] + step2[6]) * cospi_16_64; 1991 temp2 = (step2[5] + step2[6]) * cospi_16_64;
1990 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); 1992 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
1991 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); 1993 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
1992 step1[7] = step2[7]; 1994 step1[7] = step2[7];
1993 1995
1994 step1[8] = WRAPLOW(step2[8] + step2[11], bd); 1996 step1[8] = WRAPLOW(step2[8] + step2[11], bd);
1995 step1[9] = WRAPLOW(step2[9] + step2[10], bd); 1997 step1[9] = WRAPLOW(step2[9] + step2[10], bd);
1996 step1[10] = WRAPLOW(step2[9] - step2[10], bd); 1998 step1[10] = WRAPLOW(step2[9] - step2[10], bd);
1997 step1[11] = WRAPLOW(step2[8] - step2[11], bd); 1999 step1[11] = WRAPLOW(step2[8] - step2[11], bd);
1998 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); 2000 step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
1999 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); 2001 step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
2000 step1[14] = WRAPLOW(step2[13] + step2[14], bd); 2002 step1[14] = WRAPLOW(step2[13] + step2[14], bd);
2001 step1[15] = WRAPLOW(step2[12] + step2[15], bd); 2003 step1[15] = WRAPLOW(step2[12] + step2[15], bd);
2002 2004
2003 // stage 6 2005 // stage 6
2004 step2[0] = WRAPLOW(step1[0] + step1[7], bd); 2006 step2[0] = WRAPLOW(step1[0] + step1[7], bd);
2005 step2[1] = WRAPLOW(step1[1] + step1[6], bd); 2007 step2[1] = WRAPLOW(step1[1] + step1[6], bd);
2006 step2[2] = WRAPLOW(step1[2] + step1[5], bd); 2008 step2[2] = WRAPLOW(step1[2] + step1[5], bd);
2007 step2[3] = WRAPLOW(step1[3] + step1[4], bd); 2009 step2[3] = WRAPLOW(step1[3] + step1[4], bd);
2008 step2[4] = WRAPLOW(step1[3] - step1[4], bd); 2010 step2[4] = WRAPLOW(step1[3] - step1[4], bd);
2009 step2[5] = WRAPLOW(step1[2] - step1[5], bd); 2011 step2[5] = WRAPLOW(step1[2] - step1[5], bd);
2010 step2[6] = WRAPLOW(step1[1] - step1[6], bd); 2012 step2[6] = WRAPLOW(step1[1] - step1[6], bd);
2011 step2[7] = WRAPLOW(step1[0] - step1[7], bd); 2013 step2[7] = WRAPLOW(step1[0] - step1[7], bd);
2012 step2[8] = step1[8]; 2014 step2[8] = step1[8];
2013 step2[9] = step1[9]; 2015 step2[9] = step1[9];
2014 temp1 = (-step1[10] + step1[13]) * cospi_16_64; 2016 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
2015 temp2 = (step1[10] + step1[13]) * cospi_16_64; 2017 temp2 = (step1[10] + step1[13]) * cospi_16_64;
2016 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); 2018 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2017 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); 2019 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2018 temp1 = (-step1[11] + step1[12]) * cospi_16_64; 2020 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
2019 temp2 = (step1[11] + step1[12]) * cospi_16_64; 2021 temp2 = (step1[11] + step1[12]) * cospi_16_64;
2020 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); 2022 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2021 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); 2023 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2022 step2[14] = step1[14]; 2024 step2[14] = step1[14];
2023 step2[15] = step1[15]; 2025 step2[15] = step1[15];
2024 2026
2025 // stage 7 2027 // stage 7
2026 output[0] = WRAPLOW(step2[0] + step2[15], bd); 2028 output[0] = WRAPLOW(step2[0] + step2[15], bd);
2027 output[1] = WRAPLOW(step2[1] + step2[14], bd); 2029 output[1] = WRAPLOW(step2[1] + step2[14], bd);
2028 output[2] = WRAPLOW(step2[2] + step2[13], bd); 2030 output[2] = WRAPLOW(step2[2] + step2[13], bd);
2029 output[3] = WRAPLOW(step2[3] + step2[12], bd); 2031 output[3] = WRAPLOW(step2[3] + step2[12], bd);
2030 output[4] = WRAPLOW(step2[4] + step2[11], bd); 2032 output[4] = WRAPLOW(step2[4] + step2[11], bd);
2031 output[5] = WRAPLOW(step2[5] + step2[10], bd); 2033 output[5] = WRAPLOW(step2[5] + step2[10], bd);
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
2108 s7 = x6 * cospi_19_64 - x7 * cospi_13_64; 2110 s7 = x6 * cospi_19_64 - x7 * cospi_13_64;
2109 s8 = x8 * cospi_17_64 + x9 * cospi_15_64; 2111 s8 = x8 * cospi_17_64 + x9 * cospi_15_64;
2110 s9 = x8 * cospi_15_64 - x9 * cospi_17_64; 2112 s9 = x8 * cospi_15_64 - x9 * cospi_17_64;
2111 s10 = x10 * cospi_21_64 + x11 * cospi_11_64; 2113 s10 = x10 * cospi_21_64 + x11 * cospi_11_64;
2112 s11 = x10 * cospi_11_64 - x11 * cospi_21_64; 2114 s11 = x10 * cospi_11_64 - x11 * cospi_21_64;
2113 s12 = x12 * cospi_25_64 + x13 * cospi_7_64; 2115 s12 = x12 * cospi_25_64 + x13 * cospi_7_64;
2114 s13 = x12 * cospi_7_64 - x13 * cospi_25_64; 2116 s13 = x12 * cospi_7_64 - x13 * cospi_25_64;
2115 s14 = x14 * cospi_29_64 + x15 * cospi_3_64; 2117 s14 = x14 * cospi_29_64 + x15 * cospi_3_64;
2116 s15 = x14 * cospi_3_64 - x15 * cospi_29_64; 2118 s15 = x14 * cospi_3_64 - x15 * cospi_29_64;
2117 2119
2118 x0 = WRAPLOW(dct_const_round_shift(s0 + s8), bd); 2120 x0 = WRAPLOW(highbd_dct_const_round_shift(s0 + s8, bd), bd);
2119 x1 = WRAPLOW(dct_const_round_shift(s1 + s9), bd); 2121 x1 = WRAPLOW(highbd_dct_const_round_shift(s1 + s9, bd), bd);
2120 x2 = WRAPLOW(dct_const_round_shift(s2 + s10), bd); 2122 x2 = WRAPLOW(highbd_dct_const_round_shift(s2 + s10, bd), bd);
2121 x3 = WRAPLOW(dct_const_round_shift(s3 + s11), bd); 2123 x3 = WRAPLOW(highbd_dct_const_round_shift(s3 + s11, bd), bd);
2122 x4 = WRAPLOW(dct_const_round_shift(s4 + s12), bd); 2124 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s12, bd), bd);
2123 x5 = WRAPLOW(dct_const_round_shift(s5 + s13), bd); 2125 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s13, bd), bd);
2124 x6 = WRAPLOW(dct_const_round_shift(s6 + s14), bd); 2126 x6 = WRAPLOW(highbd_dct_const_round_shift(s6 + s14, bd), bd);
2125 x7 = WRAPLOW(dct_const_round_shift(s7 + s15), bd); 2127 x7 = WRAPLOW(highbd_dct_const_round_shift(s7 + s15, bd), bd);
2126 x8 = WRAPLOW(dct_const_round_shift(s0 - s8), bd); 2128 x8 = WRAPLOW(highbd_dct_const_round_shift(s0 - s8, bd), bd);
2127 x9 = WRAPLOW(dct_const_round_shift(s1 - s9), bd); 2129 x9 = WRAPLOW(highbd_dct_const_round_shift(s1 - s9, bd), bd);
2128 x10 = WRAPLOW(dct_const_round_shift(s2 - s10), bd); 2130 x10 = WRAPLOW(highbd_dct_const_round_shift(s2 - s10, bd), bd);
2129 x11 = WRAPLOW(dct_const_round_shift(s3 - s11), bd); 2131 x11 = WRAPLOW(highbd_dct_const_round_shift(s3 - s11, bd), bd);
2130 x12 = WRAPLOW(dct_const_round_shift(s4 - s12), bd); 2132 x12 = WRAPLOW(highbd_dct_const_round_shift(s4 - s12, bd), bd);
2131 x13 = WRAPLOW(dct_const_round_shift(s5 - s13), bd); 2133 x13 = WRAPLOW(highbd_dct_const_round_shift(s5 - s13, bd), bd);
2132 x14 = WRAPLOW(dct_const_round_shift(s6 - s14), bd); 2134 x14 = WRAPLOW(highbd_dct_const_round_shift(s6 - s14, bd), bd);
2133 x15 = WRAPLOW(dct_const_round_shift(s7 - s15), bd); 2135 x15 = WRAPLOW(highbd_dct_const_round_shift(s7 - s15, bd), bd);
2134 2136
2135 // stage 2 2137 // stage 2
2136 s0 = x0; 2138 s0 = x0;
2137 s1 = x1; 2139 s1 = x1;
2138 s2 = x2; 2140 s2 = x2;
2139 s3 = x3; 2141 s3 = x3;
2140 s4 = x4; 2142 s4 = x4;
2141 s5 = x5; 2143 s5 = x5;
2142 s6 = x6; 2144 s6 = x6;
2143 s7 = x7; 2145 s7 = x7;
2144 s8 = x8 * cospi_4_64 + x9 * cospi_28_64; 2146 s8 = x8 * cospi_4_64 + x9 * cospi_28_64;
2145 s9 = x8 * cospi_28_64 - x9 * cospi_4_64; 2147 s9 = x8 * cospi_28_64 - x9 * cospi_4_64;
2146 s10 = x10 * cospi_20_64 + x11 * cospi_12_64; 2148 s10 = x10 * cospi_20_64 + x11 * cospi_12_64;
2147 s11 = x10 * cospi_12_64 - x11 * cospi_20_64; 2149 s11 = x10 * cospi_12_64 - x11 * cospi_20_64;
2148 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; 2150 s12 = -x12 * cospi_28_64 + x13 * cospi_4_64;
2149 s13 = x12 * cospi_4_64 + x13 * cospi_28_64; 2151 s13 = x12 * cospi_4_64 + x13 * cospi_28_64;
2150 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; 2152 s14 = -x14 * cospi_12_64 + x15 * cospi_20_64;
2151 s15 = x14 * cospi_20_64 + x15 * cospi_12_64; 2153 s15 = x14 * cospi_20_64 + x15 * cospi_12_64;
2152 2154
2153 x0 = WRAPLOW(s0 + s4, bd); 2155 x0 = WRAPLOW(s0 + s4, bd);
2154 x1 = WRAPLOW(s1 + s5, bd); 2156 x1 = WRAPLOW(s1 + s5, bd);
2155 x2 = WRAPLOW(s2 + s6, bd); 2157 x2 = WRAPLOW(s2 + s6, bd);
2156 x3 = WRAPLOW(s3 + s7, bd); 2158 x3 = WRAPLOW(s3 + s7, bd);
2157 x4 = WRAPLOW(s0 - s4, bd); 2159 x4 = WRAPLOW(s0 - s4, bd);
2158 x5 = WRAPLOW(s1 - s5, bd); 2160 x5 = WRAPLOW(s1 - s5, bd);
2159 x6 = WRAPLOW(s2 - s6, bd); 2161 x6 = WRAPLOW(s2 - s6, bd);
2160 x7 = WRAPLOW(s3 - s7, bd); 2162 x7 = WRAPLOW(s3 - s7, bd);
2161 x8 = WRAPLOW(dct_const_round_shift(s8 + s12), bd); 2163 x8 = WRAPLOW(highbd_dct_const_round_shift(s8 + s12, bd), bd);
2162 x9 = WRAPLOW(dct_const_round_shift(s9 + s13), bd); 2164 x9 = WRAPLOW(highbd_dct_const_round_shift(s9 + s13, bd), bd);
2163 x10 = WRAPLOW(dct_const_round_shift(s10 + s14), bd); 2165 x10 = WRAPLOW(highbd_dct_const_round_shift(s10 + s14, bd), bd);
2164 x11 = WRAPLOW(dct_const_round_shift(s11 + s15), bd); 2166 x11 = WRAPLOW(highbd_dct_const_round_shift(s11 + s15, bd), bd);
2165 x12 = WRAPLOW(dct_const_round_shift(s8 - s12), bd); 2167 x12 = WRAPLOW(highbd_dct_const_round_shift(s8 - s12, bd), bd);
2166 x13 = WRAPLOW(dct_const_round_shift(s9 - s13), bd); 2168 x13 = WRAPLOW(highbd_dct_const_round_shift(s9 - s13, bd), bd);
2167 x14 = WRAPLOW(dct_const_round_shift(s10 - s14), bd); 2169 x14 = WRAPLOW(highbd_dct_const_round_shift(s10 - s14, bd), bd);
2168 x15 = WRAPLOW(dct_const_round_shift(s11 - s15), bd); 2170 x15 = WRAPLOW(highbd_dct_const_round_shift(s11 - s15, bd), bd);
2169 2171
2170 // stage 3 2172 // stage 3
2171 s0 = x0; 2173 s0 = x0;
2172 s1 = x1; 2174 s1 = x1;
2173 s2 = x2; 2175 s2 = x2;
2174 s3 = x3; 2176 s3 = x3;
2175 s4 = x4 * cospi_8_64 + x5 * cospi_24_64; 2177 s4 = x4 * cospi_8_64 + x5 * cospi_24_64;
2176 s5 = x4 * cospi_24_64 - x5 * cospi_8_64; 2178 s5 = x4 * cospi_24_64 - x5 * cospi_8_64;
2177 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; 2179 s6 = -x6 * cospi_24_64 + x7 * cospi_8_64;
2178 s7 = x6 * cospi_8_64 + x7 * cospi_24_64; 2180 s7 = x6 * cospi_8_64 + x7 * cospi_24_64;
2179 s8 = x8; 2181 s8 = x8;
2180 s9 = x9; 2182 s9 = x9;
2181 s10 = x10; 2183 s10 = x10;
2182 s11 = x11; 2184 s11 = x11;
2183 s12 = x12 * cospi_8_64 + x13 * cospi_24_64; 2185 s12 = x12 * cospi_8_64 + x13 * cospi_24_64;
2184 s13 = x12 * cospi_24_64 - x13 * cospi_8_64; 2186 s13 = x12 * cospi_24_64 - x13 * cospi_8_64;
2185 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; 2187 s14 = -x14 * cospi_24_64 + x15 * cospi_8_64;
2186 s15 = x14 * cospi_8_64 + x15 * cospi_24_64; 2188 s15 = x14 * cospi_8_64 + x15 * cospi_24_64;
2187 2189
2188 x0 = WRAPLOW(s0 + s2, bd); 2190 x0 = WRAPLOW(s0 + s2, bd);
2189 x1 = WRAPLOW(s1 + s3, bd); 2191 x1 = WRAPLOW(s1 + s3, bd);
2190 x2 = WRAPLOW(s0 - s2, bd); 2192 x2 = WRAPLOW(s0 - s2, bd);
2191 x3 = WRAPLOW(s1 - s3, bd); 2193 x3 = WRAPLOW(s1 - s3, bd);
2192 x4 = WRAPLOW(dct_const_round_shift(s4 + s6), bd); 2194 x4 = WRAPLOW(highbd_dct_const_round_shift(s4 + s6, bd), bd);
2193 x5 = WRAPLOW(dct_const_round_shift(s5 + s7), bd); 2195 x5 = WRAPLOW(highbd_dct_const_round_shift(s5 + s7, bd), bd);
2194 x6 = WRAPLOW(dct_const_round_shift(s4 - s6), bd); 2196 x6 = WRAPLOW(highbd_dct_const_round_shift(s4 - s6, bd), bd);
2195 x7 = WRAPLOW(dct_const_round_shift(s5 - s7), bd); 2197 x7 = WRAPLOW(highbd_dct_const_round_shift(s5 - s7, bd), bd);
2196 x8 = WRAPLOW(s8 + s10, bd); 2198 x8 = WRAPLOW(s8 + s10, bd);
2197 x9 = WRAPLOW(s9 + s11, bd); 2199 x9 = WRAPLOW(s9 + s11, bd);
2198 x10 = WRAPLOW(s8 - s10, bd); 2200 x10 = WRAPLOW(s8 - s10, bd);
2199 x11 = WRAPLOW(s9 - s11, bd); 2201 x11 = WRAPLOW(s9 - s11, bd);
2200 x12 = WRAPLOW(dct_const_round_shift(s12 + s14), bd); 2202 x12 = WRAPLOW(highbd_dct_const_round_shift(s12 + s14, bd), bd);
2201 x13 = WRAPLOW(dct_const_round_shift(s13 + s15), bd); 2203 x13 = WRAPLOW(highbd_dct_const_round_shift(s13 + s15, bd), bd);
2202 x14 = WRAPLOW(dct_const_round_shift(s12 - s14), bd); 2204 x14 = WRAPLOW(highbd_dct_const_round_shift(s12 - s14, bd), bd);
2203 x15 = WRAPLOW(dct_const_round_shift(s13 - s15), bd); 2205 x15 = WRAPLOW(highbd_dct_const_round_shift(s13 - s15, bd), bd);
2204 2206
2205 // stage 4 2207 // stage 4
2206 s2 = (- cospi_16_64) * (x2 + x3); 2208 s2 = (- cospi_16_64) * (x2 + x3);
2207 s3 = cospi_16_64 * (x2 - x3); 2209 s3 = cospi_16_64 * (x2 - x3);
2208 s6 = cospi_16_64 * (x6 + x7); 2210 s6 = cospi_16_64 * (x6 + x7);
2209 s7 = cospi_16_64 * (-x6 + x7); 2211 s7 = cospi_16_64 * (-x6 + x7);
2210 s10 = cospi_16_64 * (x10 + x11); 2212 s10 = cospi_16_64 * (x10 + x11);
2211 s11 = cospi_16_64 * (-x10 + x11); 2213 s11 = cospi_16_64 * (-x10 + x11);
2212 s14 = (- cospi_16_64) * (x14 + x15); 2214 s14 = (- cospi_16_64) * (x14 + x15);
2213 s15 = cospi_16_64 * (x14 - x15); 2215 s15 = cospi_16_64 * (x14 - x15);
2214 2216
2215 x2 = WRAPLOW(dct_const_round_shift(s2), bd); 2217 x2 = WRAPLOW(highbd_dct_const_round_shift(s2, bd), bd);
2216 x3 = WRAPLOW(dct_const_round_shift(s3), bd); 2218 x3 = WRAPLOW(highbd_dct_const_round_shift(s3, bd), bd);
2217 x6 = WRAPLOW(dct_const_round_shift(s6), bd); 2219 x6 = WRAPLOW(highbd_dct_const_round_shift(s6, bd), bd);
2218 x7 = WRAPLOW(dct_const_round_shift(s7), bd); 2220 x7 = WRAPLOW(highbd_dct_const_round_shift(s7, bd), bd);
2219 x10 = WRAPLOW(dct_const_round_shift(s10), bd); 2221 x10 = WRAPLOW(highbd_dct_const_round_shift(s10, bd), bd);
2220 x11 = WRAPLOW(dct_const_round_shift(s11), bd); 2222 x11 = WRAPLOW(highbd_dct_const_round_shift(s11, bd), bd);
2221 x14 = WRAPLOW(dct_const_round_shift(s14), bd); 2223 x14 = WRAPLOW(highbd_dct_const_round_shift(s14, bd), bd);
2222 x15 = WRAPLOW(dct_const_round_shift(s15), bd); 2224 x15 = WRAPLOW(highbd_dct_const_round_shift(s15, bd), bd);
2223 2225
2224 output[0] = WRAPLOW(x0, bd); 2226 output[0] = WRAPLOW(x0, bd);
2225 output[1] = WRAPLOW(-x8, bd); 2227 output[1] = WRAPLOW(-x8, bd);
2226 output[2] = WRAPLOW(x12, bd); 2228 output[2] = WRAPLOW(x12, bd);
2227 output[3] = WRAPLOW(-x4, bd); 2229 output[3] = WRAPLOW(-x4, bd);
2228 output[4] = WRAPLOW(x6, bd); 2230 output[4] = WRAPLOW(x6, bd);
2229 output[5] = WRAPLOW(x14, bd); 2231 output[5] = WRAPLOW(x14, bd);
2230 output[6] = WRAPLOW(x10, bd); 2232 output[6] = WRAPLOW(x10, bd);
2231 output[7] = WRAPLOW(x2, bd); 2233 output[7] = WRAPLOW(x2, bd);
2232 output[8] = WRAPLOW(x3, bd); 2234 output[8] = WRAPLOW(x3, bd);
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
2299 dest[j * stride + i] = highbd_clip_pixel_add( 2301 dest[j * stride + i] = highbd_clip_pixel_add(
2300 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 2302 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
2301 } 2303 }
2302 } 2304 }
2303 } 2305 }
2304 2306
2305 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8, 2307 void vp9_highbd_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest8,
2306 int stride, int bd) { 2308 int stride, int bd) {
2307 int i, j; 2309 int i, j;
2308 tran_high_t a1; 2310 tran_high_t a1;
2309 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); 2311 tran_low_t out = WRAPLOW(
2312 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
2310 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 2313 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2311 2314
2312 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); 2315 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
2313 a1 = ROUND_POWER_OF_TWO(out, 6); 2316 a1 = ROUND_POWER_OF_TWO(out, 6);
2314 for (j = 0; j < 16; ++j) { 2317 for (j = 0; j < 16; ++j) {
2315 for (i = 0; i < 16; ++i) 2318 for (i = 0; i < 16; ++i)
2316 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); 2319 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
2317 dest += stride; 2320 dest += stride;
2318 } 2321 }
2319 } 2322 }
2320 2323
2321 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) { 2324 static void highbd_idct32(const tran_low_t *input, tran_low_t *output, int bd) {
2322 tran_low_t step1[32], step2[32]; 2325 tran_low_t step1[32], step2[32];
(...skipping 13 matching lines...) Expand all
2336 step1[9] = input[18]; 2339 step1[9] = input[18];
2337 step1[10] = input[10]; 2340 step1[10] = input[10];
2338 step1[11] = input[26]; 2341 step1[11] = input[26];
2339 step1[12] = input[6]; 2342 step1[12] = input[6];
2340 step1[13] = input[22]; 2343 step1[13] = input[22];
2341 step1[14] = input[14]; 2344 step1[14] = input[14];
2342 step1[15] = input[30]; 2345 step1[15] = input[30];
2343 2346
2344 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; 2347 temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64;
2345 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; 2348 temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64;
2346 step1[16] = WRAPLOW(dct_const_round_shift(temp1), bd); 2349 step1[16] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2347 step1[31] = WRAPLOW(dct_const_round_shift(temp2), bd); 2350 step1[31] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2348 2351
2349 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; 2352 temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64;
2350 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; 2353 temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64;
2351 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); 2354 step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2352 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); 2355 step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2353 2356
2354 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; 2357 temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64;
2355 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; 2358 temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64;
2356 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); 2359 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2357 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); 2360 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2358 2361
2359 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; 2362 temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64;
2360 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; 2363 temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64;
2361 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); 2364 step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2362 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); 2365 step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2363 2366
2364 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; 2367 temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64;
2365 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; 2368 temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64;
2366 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); 2369 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2367 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); 2370 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2368 2371
2369 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; 2372 temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64;
2370 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; 2373 temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64;
2371 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); 2374 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2372 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); 2375 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2373 2376
2374 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; 2377 temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64;
2375 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; 2378 temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64;
2376 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); 2379 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2377 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); 2380 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2378 2381
2379 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; 2382 temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64;
2380 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; 2383 temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64;
2381 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); 2384 step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2382 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); 2385 step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2383 2386
2384 // stage 2 2387 // stage 2
2385 step2[0] = step1[0]; 2388 step2[0] = step1[0];
2386 step2[1] = step1[1]; 2389 step2[1] = step1[1];
2387 step2[2] = step1[2]; 2390 step2[2] = step1[2];
2388 step2[3] = step1[3]; 2391 step2[3] = step1[3];
2389 step2[4] = step1[4]; 2392 step2[4] = step1[4];
2390 step2[5] = step1[5]; 2393 step2[5] = step1[5];
2391 step2[6] = step1[6]; 2394 step2[6] = step1[6];
2392 step2[7] = step1[7]; 2395 step2[7] = step1[7];
2393 2396
2394 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; 2397 temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64;
2395 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; 2398 temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64;
2396 step2[8] = WRAPLOW(dct_const_round_shift(temp1), bd); 2399 step2[8] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2397 step2[15] = WRAPLOW(dct_const_round_shift(temp2), bd); 2400 step2[15] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2398 2401
2399 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; 2402 temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64;
2400 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; 2403 temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64;
2401 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); 2404 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2402 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); 2405 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2403 2406
2404 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; 2407 temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64;
2405 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; 2408 temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64;
2406 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); 2409 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2407 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); 2410 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2408 2411
2409 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; 2412 temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64;
2410 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; 2413 temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64;
2411 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); 2414 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2412 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); 2415 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2413 2416
2414 step2[16] = WRAPLOW(step1[16] + step1[17], bd); 2417 step2[16] = WRAPLOW(step1[16] + step1[17], bd);
2415 step2[17] = WRAPLOW(step1[16] - step1[17], bd); 2418 step2[17] = WRAPLOW(step1[16] - step1[17], bd);
2416 step2[18] = WRAPLOW(-step1[18] + step1[19], bd); 2419 step2[18] = WRAPLOW(-step1[18] + step1[19], bd);
2417 step2[19] = WRAPLOW(step1[18] + step1[19], bd); 2420 step2[19] = WRAPLOW(step1[18] + step1[19], bd);
2418 step2[20] = WRAPLOW(step1[20] + step1[21], bd); 2421 step2[20] = WRAPLOW(step1[20] + step1[21], bd);
2419 step2[21] = WRAPLOW(step1[20] - step1[21], bd); 2422 step2[21] = WRAPLOW(step1[20] - step1[21], bd);
2420 step2[22] = WRAPLOW(-step1[22] + step1[23], bd); 2423 step2[22] = WRAPLOW(-step1[22] + step1[23], bd);
2421 step2[23] = WRAPLOW(step1[22] + step1[23], bd); 2424 step2[23] = WRAPLOW(step1[22] + step1[23], bd);
2422 step2[24] = WRAPLOW(step1[24] + step1[25], bd); 2425 step2[24] = WRAPLOW(step1[24] + step1[25], bd);
2423 step2[25] = WRAPLOW(step1[24] - step1[25], bd); 2426 step2[25] = WRAPLOW(step1[24] - step1[25], bd);
2424 step2[26] = WRAPLOW(-step1[26] + step1[27], bd); 2427 step2[26] = WRAPLOW(-step1[26] + step1[27], bd);
2425 step2[27] = WRAPLOW(step1[26] + step1[27], bd); 2428 step2[27] = WRAPLOW(step1[26] + step1[27], bd);
2426 step2[28] = WRAPLOW(step1[28] + step1[29], bd); 2429 step2[28] = WRAPLOW(step1[28] + step1[29], bd);
2427 step2[29] = WRAPLOW(step1[28] - step1[29], bd); 2430 step2[29] = WRAPLOW(step1[28] - step1[29], bd);
2428 step2[30] = WRAPLOW(-step1[30] + step1[31], bd); 2431 step2[30] = WRAPLOW(-step1[30] + step1[31], bd);
2429 step2[31] = WRAPLOW(step1[30] + step1[31], bd); 2432 step2[31] = WRAPLOW(step1[30] + step1[31], bd);
2430 2433
2431 // stage 3 2434 // stage 3
2432 step1[0] = step2[0]; 2435 step1[0] = step2[0];
2433 step1[1] = step2[1]; 2436 step1[1] = step2[1];
2434 step1[2] = step2[2]; 2437 step1[2] = step2[2];
2435 step1[3] = step2[3]; 2438 step1[3] = step2[3];
2436 2439
2437 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; 2440 temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64;
2438 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; 2441 temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64;
2439 step1[4] = WRAPLOW(dct_const_round_shift(temp1), bd); 2442 step1[4] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2440 step1[7] = WRAPLOW(dct_const_round_shift(temp2), bd); 2443 step1[7] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2441 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; 2444 temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64;
2442 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; 2445 temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64;
2443 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); 2446 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2444 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); 2447 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2445 2448
2446 step1[8] = WRAPLOW(step2[8] + step2[9], bd); 2449 step1[8] = WRAPLOW(step2[8] + step2[9], bd);
2447 step1[9] = WRAPLOW(step2[8] - step2[9], bd); 2450 step1[9] = WRAPLOW(step2[8] - step2[9], bd);
2448 step1[10] = WRAPLOW(-step2[10] + step2[11], bd); 2451 step1[10] = WRAPLOW(-step2[10] + step2[11], bd);
2449 step1[11] = WRAPLOW(step2[10] + step2[11], bd); 2452 step1[11] = WRAPLOW(step2[10] + step2[11], bd);
2450 step1[12] = WRAPLOW(step2[12] + step2[13], bd); 2453 step1[12] = WRAPLOW(step2[12] + step2[13], bd);
2451 step1[13] = WRAPLOW(step2[12] - step2[13], bd); 2454 step1[13] = WRAPLOW(step2[12] - step2[13], bd);
2452 step1[14] = WRAPLOW(-step2[14] + step2[15], bd); 2455 step1[14] = WRAPLOW(-step2[14] + step2[15], bd);
2453 step1[15] = WRAPLOW(step2[14] + step2[15], bd); 2456 step1[15] = WRAPLOW(step2[14] + step2[15], bd);
2454 2457
2455 step1[16] = step2[16]; 2458 step1[16] = step2[16];
2456 step1[31] = step2[31]; 2459 step1[31] = step2[31];
2457 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; 2460 temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64;
2458 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; 2461 temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64;
2459 step1[17] = WRAPLOW(dct_const_round_shift(temp1), bd); 2462 step1[17] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2460 step1[30] = WRAPLOW(dct_const_round_shift(temp2), bd); 2463 step1[30] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2461 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; 2464 temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64;
2462 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; 2465 temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64;
2463 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); 2466 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2464 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); 2467 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2465 step1[19] = step2[19]; 2468 step1[19] = step2[19];
2466 step1[20] = step2[20]; 2469 step1[20] = step2[20];
2467 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; 2470 temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64;
2468 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; 2471 temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64;
2469 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); 2472 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2470 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); 2473 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2471 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; 2474 temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64;
2472 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; 2475 temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64;
2473 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); 2476 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2474 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); 2477 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2475 step1[23] = step2[23]; 2478 step1[23] = step2[23];
2476 step1[24] = step2[24]; 2479 step1[24] = step2[24];
2477 step1[27] = step2[27]; 2480 step1[27] = step2[27];
2478 step1[28] = step2[28]; 2481 step1[28] = step2[28];
2479 2482
2480 // stage 4 2483 // stage 4
2481 temp1 = (step1[0] + step1[1]) * cospi_16_64; 2484 temp1 = (step1[0] + step1[1]) * cospi_16_64;
2482 temp2 = (step1[0] - step1[1]) * cospi_16_64; 2485 temp2 = (step1[0] - step1[1]) * cospi_16_64;
2483 step2[0] = WRAPLOW(dct_const_round_shift(temp1), bd); 2486 step2[0] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2484 step2[1] = WRAPLOW(dct_const_round_shift(temp2), bd); 2487 step2[1] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2485 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; 2488 temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64;
2486 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; 2489 temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64;
2487 step2[2] = WRAPLOW(dct_const_round_shift(temp1), bd); 2490 step2[2] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2488 step2[3] = WRAPLOW(dct_const_round_shift(temp2), bd); 2491 step2[3] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2489 step2[4] = WRAPLOW(step1[4] + step1[5], bd); 2492 step2[4] = WRAPLOW(step1[4] + step1[5], bd);
2490 step2[5] = WRAPLOW(step1[4] - step1[5], bd); 2493 step2[5] = WRAPLOW(step1[4] - step1[5], bd);
2491 step2[6] = WRAPLOW(-step1[6] + step1[7], bd); 2494 step2[6] = WRAPLOW(-step1[6] + step1[7], bd);
2492 step2[7] = WRAPLOW(step1[6] + step1[7], bd); 2495 step2[7] = WRAPLOW(step1[6] + step1[7], bd);
2493 2496
2494 step2[8] = step1[8]; 2497 step2[8] = step1[8];
2495 step2[15] = step1[15]; 2498 step2[15] = step1[15];
2496 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; 2499 temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64;
2497 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; 2500 temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64;
2498 step2[9] = WRAPLOW(dct_const_round_shift(temp1), bd); 2501 step2[9] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2499 step2[14] = WRAPLOW(dct_const_round_shift(temp2), bd); 2502 step2[14] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2500 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; 2503 temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64;
2501 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; 2504 temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64;
2502 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); 2505 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2503 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); 2506 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2504 step2[11] = step1[11]; 2507 step2[11] = step1[11];
2505 step2[12] = step1[12]; 2508 step2[12] = step1[12];
2506 2509
2507 step2[16] = WRAPLOW(step1[16] + step1[19], bd); 2510 step2[16] = WRAPLOW(step1[16] + step1[19], bd);
2508 step2[17] = WRAPLOW(step1[17] + step1[18], bd); 2511 step2[17] = WRAPLOW(step1[17] + step1[18], bd);
2509 step2[18] = WRAPLOW(step1[17] - step1[18], bd); 2512 step2[18] = WRAPLOW(step1[17] - step1[18], bd);
2510 step2[19] = WRAPLOW(step1[16] - step1[19], bd); 2513 step2[19] = WRAPLOW(step1[16] - step1[19], bd);
2511 step2[20] = WRAPLOW(-step1[20] + step1[23], bd); 2514 step2[20] = WRAPLOW(-step1[20] + step1[23], bd);
2512 step2[21] = WRAPLOW(-step1[21] + step1[22], bd); 2515 step2[21] = WRAPLOW(-step1[21] + step1[22], bd);
2513 step2[22] = WRAPLOW(step1[21] + step1[22], bd); 2516 step2[22] = WRAPLOW(step1[21] + step1[22], bd);
2514 step2[23] = WRAPLOW(step1[20] + step1[23], bd); 2517 step2[23] = WRAPLOW(step1[20] + step1[23], bd);
2515 2518
2516 step2[24] = WRAPLOW(step1[24] + step1[27], bd); 2519 step2[24] = WRAPLOW(step1[24] + step1[27], bd);
2517 step2[25] = WRAPLOW(step1[25] + step1[26], bd); 2520 step2[25] = WRAPLOW(step1[25] + step1[26], bd);
2518 step2[26] = WRAPLOW(step1[25] - step1[26], bd); 2521 step2[26] = WRAPLOW(step1[25] - step1[26], bd);
2519 step2[27] = WRAPLOW(step1[24] - step1[27], bd); 2522 step2[27] = WRAPLOW(step1[24] - step1[27], bd);
2520 step2[28] = WRAPLOW(-step1[28] + step1[31], bd); 2523 step2[28] = WRAPLOW(-step1[28] + step1[31], bd);
2521 step2[29] = WRAPLOW(-step1[29] + step1[30], bd); 2524 step2[29] = WRAPLOW(-step1[29] + step1[30], bd);
2522 step2[30] = WRAPLOW(step1[29] + step1[30], bd); 2525 step2[30] = WRAPLOW(step1[29] + step1[30], bd);
2523 step2[31] = WRAPLOW(step1[28] + step1[31], bd); 2526 step2[31] = WRAPLOW(step1[28] + step1[31], bd);
2524 2527
2525 // stage 5 2528 // stage 5
2526 step1[0] = WRAPLOW(step2[0] + step2[3], bd); 2529 step1[0] = WRAPLOW(step2[0] + step2[3], bd);
2527 step1[1] = WRAPLOW(step2[1] + step2[2], bd); 2530 step1[1] = WRAPLOW(step2[1] + step2[2], bd);
2528 step1[2] = WRAPLOW(step2[1] - step2[2], bd); 2531 step1[2] = WRAPLOW(step2[1] - step2[2], bd);
2529 step1[3] = WRAPLOW(step2[0] - step2[3], bd); 2532 step1[3] = WRAPLOW(step2[0] - step2[3], bd);
2530 step1[4] = step2[4]; 2533 step1[4] = step2[4];
2531 temp1 = (step2[6] - step2[5]) * cospi_16_64; 2534 temp1 = (step2[6] - step2[5]) * cospi_16_64;
2532 temp2 = (step2[5] + step2[6]) * cospi_16_64; 2535 temp2 = (step2[5] + step2[6]) * cospi_16_64;
2533 step1[5] = WRAPLOW(dct_const_round_shift(temp1), bd); 2536 step1[5] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2534 step1[6] = WRAPLOW(dct_const_round_shift(temp2), bd); 2537 step1[6] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2535 step1[7] = step2[7]; 2538 step1[7] = step2[7];
2536 2539
2537 step1[8] = WRAPLOW(step2[8] + step2[11], bd); 2540 step1[8] = WRAPLOW(step2[8] + step2[11], bd);
2538 step1[9] = WRAPLOW(step2[9] + step2[10], bd); 2541 step1[9] = WRAPLOW(step2[9] + step2[10], bd);
2539 step1[10] = WRAPLOW(step2[9] - step2[10], bd); 2542 step1[10] = WRAPLOW(step2[9] - step2[10], bd);
2540 step1[11] = WRAPLOW(step2[8] - step2[11], bd); 2543 step1[11] = WRAPLOW(step2[8] - step2[11], bd);
2541 step1[12] = WRAPLOW(-step2[12] + step2[15], bd); 2544 step1[12] = WRAPLOW(-step2[12] + step2[15], bd);
2542 step1[13] = WRAPLOW(-step2[13] + step2[14], bd); 2545 step1[13] = WRAPLOW(-step2[13] + step2[14], bd);
2543 step1[14] = WRAPLOW(step2[13] + step2[14], bd); 2546 step1[14] = WRAPLOW(step2[13] + step2[14], bd);
2544 step1[15] = WRAPLOW(step2[12] + step2[15], bd); 2547 step1[15] = WRAPLOW(step2[12] + step2[15], bd);
2545 2548
2546 step1[16] = step2[16]; 2549 step1[16] = step2[16];
2547 step1[17] = step2[17]; 2550 step1[17] = step2[17];
2548 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; 2551 temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64;
2549 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; 2552 temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64;
2550 step1[18] = WRAPLOW(dct_const_round_shift(temp1), bd); 2553 step1[18] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2551 step1[29] = WRAPLOW(dct_const_round_shift(temp2), bd); 2554 step1[29] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2552 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; 2555 temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64;
2553 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; 2556 temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64;
2554 step1[19] = WRAPLOW(dct_const_round_shift(temp1), bd); 2557 step1[19] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2555 step1[28] = WRAPLOW(dct_const_round_shift(temp2), bd); 2558 step1[28] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2556 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; 2559 temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64;
2557 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; 2560 temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64;
2558 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); 2561 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2559 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); 2562 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2560 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; 2563 temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64;
2561 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; 2564 temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64;
2562 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); 2565 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2563 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); 2566 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2564 step1[22] = step2[22]; 2567 step1[22] = step2[22];
2565 step1[23] = step2[23]; 2568 step1[23] = step2[23];
2566 step1[24] = step2[24]; 2569 step1[24] = step2[24];
2567 step1[25] = step2[25]; 2570 step1[25] = step2[25];
2568 step1[30] = step2[30]; 2571 step1[30] = step2[30];
2569 step1[31] = step2[31]; 2572 step1[31] = step2[31];
2570 2573
2571 // stage 6 2574 // stage 6
2572 step2[0] = WRAPLOW(step1[0] + step1[7], bd); 2575 step2[0] = WRAPLOW(step1[0] + step1[7], bd);
2573 step2[1] = WRAPLOW(step1[1] + step1[6], bd); 2576 step2[1] = WRAPLOW(step1[1] + step1[6], bd);
2574 step2[2] = WRAPLOW(step1[2] + step1[5], bd); 2577 step2[2] = WRAPLOW(step1[2] + step1[5], bd);
2575 step2[3] = WRAPLOW(step1[3] + step1[4], bd); 2578 step2[3] = WRAPLOW(step1[3] + step1[4], bd);
2576 step2[4] = WRAPLOW(step1[3] - step1[4], bd); 2579 step2[4] = WRAPLOW(step1[3] - step1[4], bd);
2577 step2[5] = WRAPLOW(step1[2] - step1[5], bd); 2580 step2[5] = WRAPLOW(step1[2] - step1[5], bd);
2578 step2[6] = WRAPLOW(step1[1] - step1[6], bd); 2581 step2[6] = WRAPLOW(step1[1] - step1[6], bd);
2579 step2[7] = WRAPLOW(step1[0] - step1[7], bd); 2582 step2[7] = WRAPLOW(step1[0] - step1[7], bd);
2580 step2[8] = step1[8]; 2583 step2[8] = step1[8];
2581 step2[9] = step1[9]; 2584 step2[9] = step1[9];
2582 temp1 = (-step1[10] + step1[13]) * cospi_16_64; 2585 temp1 = (-step1[10] + step1[13]) * cospi_16_64;
2583 temp2 = (step1[10] + step1[13]) * cospi_16_64; 2586 temp2 = (step1[10] + step1[13]) * cospi_16_64;
2584 step2[10] = WRAPLOW(dct_const_round_shift(temp1), bd); 2587 step2[10] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2585 step2[13] = WRAPLOW(dct_const_round_shift(temp2), bd); 2588 step2[13] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2586 temp1 = (-step1[11] + step1[12]) * cospi_16_64; 2589 temp1 = (-step1[11] + step1[12]) * cospi_16_64;
2587 temp2 = (step1[11] + step1[12]) * cospi_16_64; 2590 temp2 = (step1[11] + step1[12]) * cospi_16_64;
2588 step2[11] = WRAPLOW(dct_const_round_shift(temp1), bd); 2591 step2[11] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2589 step2[12] = WRAPLOW(dct_const_round_shift(temp2), bd); 2592 step2[12] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2590 step2[14] = step1[14]; 2593 step2[14] = step1[14];
2591 step2[15] = step1[15]; 2594 step2[15] = step1[15];
2592 2595
2593 step2[16] = WRAPLOW(step1[16] + step1[23], bd); 2596 step2[16] = WRAPLOW(step1[16] + step1[23], bd);
2594 step2[17] = WRAPLOW(step1[17] + step1[22], bd); 2597 step2[17] = WRAPLOW(step1[17] + step1[22], bd);
2595 step2[18] = WRAPLOW(step1[18] + step1[21], bd); 2598 step2[18] = WRAPLOW(step1[18] + step1[21], bd);
2596 step2[19] = WRAPLOW(step1[19] + step1[20], bd); 2599 step2[19] = WRAPLOW(step1[19] + step1[20], bd);
2597 step2[20] = WRAPLOW(step1[19] - step1[20], bd); 2600 step2[20] = WRAPLOW(step1[19] - step1[20], bd);
2598 step2[21] = WRAPLOW(step1[18] - step1[21], bd); 2601 step2[21] = WRAPLOW(step1[18] - step1[21], bd);
2599 step2[22] = WRAPLOW(step1[17] - step1[22], bd); 2602 step2[22] = WRAPLOW(step1[17] - step1[22], bd);
(...skipping 25 matching lines...) Expand all
2625 step1[13] = WRAPLOW(step2[2] - step2[13], bd); 2628 step1[13] = WRAPLOW(step2[2] - step2[13], bd);
2626 step1[14] = WRAPLOW(step2[1] - step2[14], bd); 2629 step1[14] = WRAPLOW(step2[1] - step2[14], bd);
2627 step1[15] = WRAPLOW(step2[0] - step2[15], bd); 2630 step1[15] = WRAPLOW(step2[0] - step2[15], bd);
2628 2631
2629 step1[16] = step2[16]; 2632 step1[16] = step2[16];
2630 step1[17] = step2[17]; 2633 step1[17] = step2[17];
2631 step1[18] = step2[18]; 2634 step1[18] = step2[18];
2632 step1[19] = step2[19]; 2635 step1[19] = step2[19];
2633 temp1 = (-step2[20] + step2[27]) * cospi_16_64; 2636 temp1 = (-step2[20] + step2[27]) * cospi_16_64;
2634 temp2 = (step2[20] + step2[27]) * cospi_16_64; 2637 temp2 = (step2[20] + step2[27]) * cospi_16_64;
2635 step1[20] = WRAPLOW(dct_const_round_shift(temp1), bd); 2638 step1[20] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2636 step1[27] = WRAPLOW(dct_const_round_shift(temp2), bd); 2639 step1[27] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2637 temp1 = (-step2[21] + step2[26]) * cospi_16_64; 2640 temp1 = (-step2[21] + step2[26]) * cospi_16_64;
2638 temp2 = (step2[21] + step2[26]) * cospi_16_64; 2641 temp2 = (step2[21] + step2[26]) * cospi_16_64;
2639 step1[21] = WRAPLOW(dct_const_round_shift(temp1), bd); 2642 step1[21] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2640 step1[26] = WRAPLOW(dct_const_round_shift(temp2), bd); 2643 step1[26] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2641 temp1 = (-step2[22] + step2[25]) * cospi_16_64; 2644 temp1 = (-step2[22] + step2[25]) * cospi_16_64;
2642 temp2 = (step2[22] + step2[25]) * cospi_16_64; 2645 temp2 = (step2[22] + step2[25]) * cospi_16_64;
2643 step1[22] = WRAPLOW(dct_const_round_shift(temp1), bd); 2646 step1[22] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2644 step1[25] = WRAPLOW(dct_const_round_shift(temp2), bd); 2647 step1[25] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2645 temp1 = (-step2[23] + step2[24]) * cospi_16_64; 2648 temp1 = (-step2[23] + step2[24]) * cospi_16_64;
2646 temp2 = (step2[23] + step2[24]) * cospi_16_64; 2649 temp2 = (step2[23] + step2[24]) * cospi_16_64;
2647 step1[23] = WRAPLOW(dct_const_round_shift(temp1), bd); 2650 step1[23] = WRAPLOW(highbd_dct_const_round_shift(temp1, bd), bd);
2648 step1[24] = WRAPLOW(dct_const_round_shift(temp2), bd); 2651 step1[24] = WRAPLOW(highbd_dct_const_round_shift(temp2, bd), bd);
2649 step1[28] = step2[28]; 2652 step1[28] = step2[28];
2650 step1[29] = step2[29]; 2653 step1[29] = step2[29];
2651 step1[30] = step2[30]; 2654 step1[30] = step2[30];
2652 step1[31] = step2[31]; 2655 step1[31] = step2[31];
2653 2656
2654 // final stage 2657 // final stage
2655 output[0] = WRAPLOW(step1[0] + step1[31], bd); 2658 output[0] = WRAPLOW(step1[0] + step1[31], bd);
2656 output[1] = WRAPLOW(step1[1] + step1[30], bd); 2659 output[1] = WRAPLOW(step1[1] + step1[30], bd);
2657 output[2] = WRAPLOW(step1[2] + step1[29], bd); 2660 output[2] = WRAPLOW(step1[2] + step1[29], bd);
2658 output[3] = WRAPLOW(step1[3] + step1[28], bd); 2661 output[3] = WRAPLOW(step1[3] + step1[28], bd);
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
2752 } 2755 }
2753 } 2756 }
2754 } 2757 }
2755 2758
2756 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8, 2759 void vp9_highbd_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest8,
2757 int stride, int bd) { 2760 int stride, int bd) {
2758 int i, j; 2761 int i, j;
2759 int a1; 2762 int a1;
2760 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 2763 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
2761 2764
2762 tran_low_t out = WRAPLOW(dct_const_round_shift(input[0] * cospi_16_64), bd); 2765 tran_low_t out = WRAPLOW(
2763 out = WRAPLOW(dct_const_round_shift(out * cospi_16_64), bd); 2766 highbd_dct_const_round_shift(input[0] * cospi_16_64, bd), bd);
2767 out = WRAPLOW(highbd_dct_const_round_shift(out * cospi_16_64, bd), bd);
2764 a1 = ROUND_POWER_OF_TWO(out, 6); 2768 a1 = ROUND_POWER_OF_TWO(out, 6);
2765 2769
2766 for (j = 0; j < 32; ++j) { 2770 for (j = 0; j < 32; ++j) {
2767 for (i = 0; i < 32; ++i) 2771 for (i = 0; i < 32; ++i)
2768 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); 2772 dest[i] = highbd_clip_pixel_add(dest[i], a1, bd);
2769 dest += stride; 2773 dest += stride;
2770 } 2774 }
2771 } 2775 }
2772 2776
2773 // idct 2777 // idct
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
2853 2857
2854 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, 2858 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
2855 uint8_t *dest, int stride, int eob, int bd) { 2859 uint8_t *dest, int stride, int eob, int bd) {
2856 if (tx_type == DCT_DCT) { 2860 if (tx_type == DCT_DCT) {
2857 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); 2861 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
2858 } else { 2862 } else {
2859 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); 2863 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
2860 } 2864 }
2861 } 2865 }
2862 #endif // CONFIG_VP9_HIGHBITDEPTH 2866 #endif // CONFIG_VP9_HIGHBITDEPTH
OLDNEW
« no previous file with comments | « source/libvpx/vp9/common/vp9_idct.h ('k') | source/libvpx/vp9/common/vp9_loopfilter.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698