OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1578 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1589 "punpcklwd %%xmm1,%%xmm5 \n" \ | 1589 "punpcklwd %%xmm1,%%xmm5 \n" \ |
1590 "punpckhwd %%xmm1,%%xmm0 \n" \ | 1590 "punpckhwd %%xmm1,%%xmm0 \n" \ |
1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ | 1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ |
1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ | 1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ |
1593 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" | 1593 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" |
1594 | 1594 |
1595 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, | 1595 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, |
1596 const uint8* u_buf, | 1596 const uint8* u_buf, |
1597 const uint8* v_buf, | 1597 const uint8* v_buf, |
1598 uint8* dst_argb, | 1598 uint8* dst_argb, |
1599 struct YuvConstants* yuvconstants, | 1599 const struct YuvConstants* yuvconstants, |
1600 int width) { | 1600 int width) { |
1601 asm volatile ( | 1601 asm volatile ( |
1602 "sub %[u_buf],%[v_buf] \n" | 1602 "sub %[u_buf],%[v_buf] \n" |
1603 "pcmpeqb %%xmm5,%%xmm5 \n" | 1603 "pcmpeqb %%xmm5,%%xmm5 \n" |
1604 LABELALIGN | 1604 LABELALIGN |
1605 "1: \n" | 1605 "1: \n" |
1606 READYUV444 | 1606 READYUV444 |
1607 YUVTORGB(yuvconstants) | 1607 YUVTORGB(yuvconstants) |
1608 STOREARGB | 1608 STOREARGB |
1609 "sub $0x8,%[width] \n" | 1609 "sub $0x8,%[width] \n" |
1610 "jg 1b \n" | 1610 "jg 1b \n" |
1611 : [y_buf]"+r"(y_buf), // %[y_buf] | 1611 : [y_buf]"+r"(y_buf), // %[y_buf] |
1612 [u_buf]"+r"(u_buf), // %[u_buf] | 1612 [u_buf]"+r"(u_buf), // %[u_buf] |
1613 [v_buf]"+r"(v_buf), // %[v_buf] | 1613 [v_buf]"+r"(v_buf), // %[v_buf] |
1614 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1614 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1615 [width]"+rm"(width) // %[width] | 1615 [width]"+rm"(width) // %[width] |
1616 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1616 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1617 : "memory", "cc", NACL_R14 | 1617 : "memory", "cc", NACL_R14 |
1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1619 ); | 1619 ); |
1620 } | 1620 } |
1621 | 1621 |
1622 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, | 1622 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, |
1623 const uint8* u_buf, | 1623 const uint8* u_buf, |
1624 const uint8* v_buf, | 1624 const uint8* v_buf, |
1625 uint8* dst_abgr, | 1625 uint8* dst_abgr, |
1626 struct YuvConstants* yuvconstants, | 1626 const struct YuvConstants* yuvconstants, |
1627 int width) { | 1627 int width) { |
1628 asm volatile ( | 1628 asm volatile ( |
1629 "sub %[u_buf],%[v_buf] \n" | 1629 "sub %[u_buf],%[v_buf] \n" |
1630 "pcmpeqb %%xmm5,%%xmm5 \n" | 1630 "pcmpeqb %%xmm5,%%xmm5 \n" |
1631 LABELALIGN | 1631 LABELALIGN |
1632 "1: \n" | 1632 "1: \n" |
1633 READYUV444 | 1633 READYUV444 |
1634 YUVTORGB(yuvconstants) | 1634 YUVTORGB(yuvconstants) |
1635 STOREABGR | 1635 STOREABGR |
1636 "sub $0x8,%[width] \n" | 1636 "sub $0x8,%[width] \n" |
1637 "jg 1b \n" | 1637 "jg 1b \n" |
1638 : [y_buf]"+r"(y_buf), // %[y_buf] | 1638 : [y_buf]"+r"(y_buf), // %[y_buf] |
1639 [u_buf]"+r"(u_buf), // %[u_buf] | 1639 [u_buf]"+r"(u_buf), // %[u_buf] |
1640 [v_buf]"+r"(v_buf), // %[v_buf] | 1640 [v_buf]"+r"(v_buf), // %[v_buf] |
1641 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1641 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
1642 [width]"+rm"(width) // %[width] | 1642 [width]"+rm"(width) // %[width] |
1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1644 : "memory", "cc", NACL_R14 | 1644 : "memory", "cc", NACL_R14 |
1645 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1645 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1646 ); | 1646 ); |
1647 } | 1647 } |
1648 | 1648 |
1649 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, | 1649 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, |
1650 const uint8* u_buf, | 1650 const uint8* u_buf, |
1651 const uint8* v_buf, | 1651 const uint8* v_buf, |
1652 uint8* dst_rgb24, | 1652 uint8* dst_rgb24, |
1653 struct YuvConstants* yuvconstants, | 1653 const struct YuvConstants* yuvconstants, |
1654 int width) { | 1654 int width) { |
1655 asm volatile ( | 1655 asm volatile ( |
1656 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" | 1656 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" |
1657 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" | 1657 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" |
1658 "sub %[u_buf],%[v_buf] \n" | 1658 "sub %[u_buf],%[v_buf] \n" |
1659 LABELALIGN | 1659 LABELALIGN |
1660 "1: \n" | 1660 "1: \n" |
1661 READYUV422 | 1661 READYUV422 |
1662 YUVTORGB(yuvconstants) | 1662 YUVTORGB(yuvconstants) |
1663 "punpcklbw %%xmm1,%%xmm0 \n" | 1663 "punpcklbw %%xmm1,%%xmm0 \n" |
(...skipping 23 matching lines...) Expand all Loading... |
1687 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1687 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
1688 : "memory", "cc", NACL_R14 | 1688 : "memory", "cc", NACL_R14 |
1689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
1690 ); | 1690 ); |
1691 } | 1691 } |
1692 | 1692 |
1693 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, | 1693 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, |
1694 const uint8* u_buf, | 1694 const uint8* u_buf, |
1695 const uint8* v_buf, | 1695 const uint8* v_buf, |
1696 uint8* dst_raw, | 1696 uint8* dst_raw, |
1697 struct YuvConstants* yuvconstants, | 1697 const struct YuvConstants* yuvconstants, |
1698 int width) { | 1698 int width) { |
1699 asm volatile ( | 1699 asm volatile ( |
1700 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" | 1700 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" |
1701 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" | 1701 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" |
1702 "sub %[u_buf],%[v_buf] \n" | 1702 "sub %[u_buf],%[v_buf] \n" |
1703 LABELALIGN | 1703 LABELALIGN |
1704 "1: \n" | 1704 "1: \n" |
1705 READYUV422 | 1705 READYUV422 |
1706 YUVTORGB(yuvconstants) | 1706 YUVTORGB(yuvconstants) |
1707 "punpcklbw %%xmm1,%%xmm0 \n" | 1707 "punpcklbw %%xmm1,%%xmm0 \n" |
(...skipping 23 matching lines...) Expand all Loading... |
1731 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | 1731 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) |
1732 : "memory", "cc", NACL_R14 | 1732 : "memory", "cc", NACL_R14 |
1733 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1733 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
1734 ); | 1734 ); |
1735 } | 1735 } |
1736 | 1736 |
1737 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, | 1737 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
1738 const uint8* u_buf, | 1738 const uint8* u_buf, |
1739 const uint8* v_buf, | 1739 const uint8* v_buf, |
1740 uint8* dst_argb, | 1740 uint8* dst_argb, |
1741 struct YuvConstants* yuvconstants, | 1741 const struct YuvConstants* yuvconstants, |
1742 int width) { | 1742 int width) { |
1743 asm volatile ( | 1743 asm volatile ( |
1744 "sub %[u_buf],%[v_buf] \n" | 1744 "sub %[u_buf],%[v_buf] \n" |
1745 "pcmpeqb %%xmm5,%%xmm5 \n" | 1745 "pcmpeqb %%xmm5,%%xmm5 \n" |
1746 LABELALIGN | 1746 LABELALIGN |
1747 "1: \n" | 1747 "1: \n" |
1748 READYUV422 | 1748 READYUV422 |
1749 YUVTORGB(yuvconstants) | 1749 YUVTORGB(yuvconstants) |
1750 STOREARGB | 1750 STOREARGB |
1751 "sub $0x8,%[width] \n" | 1751 "sub $0x8,%[width] \n" |
1752 "jg 1b \n" | 1752 "jg 1b \n" |
1753 : [y_buf]"+r"(y_buf), // %[y_buf] | 1753 : [y_buf]"+r"(y_buf), // %[y_buf] |
1754 [u_buf]"+r"(u_buf), // %[u_buf] | 1754 [u_buf]"+r"(u_buf), // %[u_buf] |
1755 [v_buf]"+r"(v_buf), // %[v_buf] | 1755 [v_buf]"+r"(v_buf), // %[v_buf] |
1756 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1756 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1757 [width]"+rm"(width) // %[width] | 1757 [width]"+rm"(width) // %[width] |
1758 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1758 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1759 : "memory", "cc", NACL_R14 | 1759 : "memory", "cc", NACL_R14 |
1760 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1760 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1761 ); | 1761 ); |
1762 } | 1762 } |
1763 | 1763 |
1764 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, | 1764 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
1765 const uint8* u_buf, | 1765 const uint8* u_buf, |
1766 const uint8* v_buf, | 1766 const uint8* v_buf, |
1767 const uint8* a_buf, | 1767 const uint8* a_buf, |
1768 uint8* dst_argb, | 1768 uint8* dst_argb, |
1769 struct YuvConstants* yuvconstants, | 1769 const struct YuvConstants* yuvconstants, |
1770 int width) { | 1770 int width) { |
1771 asm volatile ( | 1771 asm volatile ( |
1772 "sub %[u_buf],%[v_buf] \n" | 1772 "sub %[u_buf],%[v_buf] \n" |
1773 LABELALIGN | 1773 LABELALIGN |
1774 "1: \n" | 1774 "1: \n" |
1775 READYUVA422 | 1775 READYUVA422 |
1776 YUVTORGB(yuvconstants) | 1776 YUVTORGB(yuvconstants) |
1777 STOREARGB | 1777 STOREARGB |
1778 "subl $0x8,%[width] \n" | 1778 "subl $0x8,%[width] \n" |
1779 "jg 1b \n" | 1779 "jg 1b \n" |
(...skipping 11 matching lines...) Expand all Loading... |
1791 : "memory", "cc", NACL_R14 | 1791 : "memory", "cc", NACL_R14 |
1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1793 ); | 1793 ); |
1794 } | 1794 } |
1795 | 1795 |
1796 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, | 1796 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
1797 const uint8* u_buf, | 1797 const uint8* u_buf, |
1798 const uint8* v_buf, | 1798 const uint8* v_buf, |
1799 const uint8* a_buf, | 1799 const uint8* a_buf, |
1800 uint8* dst_abgr, | 1800 uint8* dst_abgr, |
1801 struct YuvConstants* yuvconstants, | 1801 const struct YuvConstants* yuvconstants, |
1802 int width) { | 1802 int width) { |
1803 asm volatile ( | 1803 asm volatile ( |
1804 "sub %[u_buf],%[v_buf] \n" | 1804 "sub %[u_buf],%[v_buf] \n" |
1805 LABELALIGN | 1805 LABELALIGN |
1806 "1: \n" | 1806 "1: \n" |
1807 READYUVA422 | 1807 READYUVA422 |
1808 YUVTORGB(yuvconstants) | 1808 YUVTORGB(yuvconstants) |
1809 STOREABGR | 1809 STOREABGR |
1810 "subl $0x8,%[width] \n" | 1810 "subl $0x8,%[width] \n" |
1811 "jg 1b \n" | 1811 "jg 1b \n" |
(...skipping 10 matching lines...) Expand all Loading... |
1822 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1822 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1823 : "memory", "cc", NACL_R14 | 1823 : "memory", "cc", NACL_R14 |
1824 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1824 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1825 ); | 1825 ); |
1826 } | 1826 } |
1827 | 1827 |
1828 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, | 1828 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
1829 const uint8* u_buf, | 1829 const uint8* u_buf, |
1830 const uint8* v_buf, | 1830 const uint8* v_buf, |
1831 uint8* dst_argb, | 1831 uint8* dst_argb, |
1832 struct YuvConstants* yuvconstants, | 1832 const struct YuvConstants* yuvconstants, |
1833 int width) { | 1833 int width) { |
1834 asm volatile ( | 1834 asm volatile ( |
1835 "sub %[u_buf],%[v_buf] \n" | 1835 "sub %[u_buf],%[v_buf] \n" |
1836 "pcmpeqb %%xmm5,%%xmm5 \n" | 1836 "pcmpeqb %%xmm5,%%xmm5 \n" |
1837 LABELALIGN | 1837 LABELALIGN |
1838 "1: \n" | 1838 "1: \n" |
1839 READYUV411 | 1839 READYUV411 |
1840 YUVTORGB(yuvconstants) | 1840 YUVTORGB(yuvconstants) |
1841 STOREARGB | 1841 STOREARGB |
1842 "sub $0x8,%[width] \n" | 1842 "sub $0x8,%[width] \n" |
1843 "jg 1b \n" | 1843 "jg 1b \n" |
1844 : [y_buf]"+r"(y_buf), // %[y_buf] | 1844 : [y_buf]"+r"(y_buf), // %[y_buf] |
1845 [u_buf]"+r"(u_buf), // %[u_buf] | 1845 [u_buf]"+r"(u_buf), // %[u_buf] |
1846 [v_buf]"+r"(v_buf), // %[v_buf] | 1846 [v_buf]"+r"(v_buf), // %[v_buf] |
1847 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1847 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1848 [width]"+rm"(width) // %[width] | 1848 [width]"+rm"(width) // %[width] |
1849 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1849 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1850 : "memory", "cc", NACL_R14 | 1850 : "memory", "cc", NACL_R14 |
1851 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1851 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1852 ); | 1852 ); |
1853 } | 1853 } |
1854 | 1854 |
1855 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, | 1855 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
1856 const uint8* uv_buf, | 1856 const uint8* uv_buf, |
1857 uint8* dst_argb, | 1857 uint8* dst_argb, |
1858 struct YuvConstants* yuvconstants, | 1858 const struct YuvConstants* yuvconstants, |
1859 int width) { | 1859 int width) { |
1860 asm volatile ( | 1860 asm volatile ( |
1861 "pcmpeqb %%xmm5,%%xmm5 \n" | 1861 "pcmpeqb %%xmm5,%%xmm5 \n" |
1862 LABELALIGN | 1862 LABELALIGN |
1863 "1: \n" | 1863 "1: \n" |
1864 READNV12 | 1864 READNV12 |
1865 YUVTORGB(yuvconstants) | 1865 YUVTORGB(yuvconstants) |
1866 STOREARGB | 1866 STOREARGB |
1867 "sub $0x8,%[width] \n" | 1867 "sub $0x8,%[width] \n" |
1868 "jg 1b \n" | 1868 "jg 1b \n" |
1869 : [y_buf]"+r"(y_buf), // %[y_buf] | 1869 : [y_buf]"+r"(y_buf), // %[y_buf] |
1870 [uv_buf]"+r"(uv_buf), // %[uv_buf] | 1870 [uv_buf]"+r"(uv_buf), // %[uv_buf] |
1871 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1871 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1872 [width]"+rm"(width) // %[width] | 1872 [width]"+rm"(width) // %[width] |
1873 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1873 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1874 // Does not use r14. | 1874 // Does not use r14. |
1875 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1875 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1876 ); | 1876 ); |
1877 } | 1877 } |
1878 | 1878 |
1879 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, | 1879 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, |
1880 const uint8* vu_buf, | 1880 const uint8* vu_buf, |
1881 uint8* dst_argb, | 1881 uint8* dst_argb, |
1882 struct YuvConstants* yuvconstants, | 1882 const struct YuvConstants* yuvconstants, |
1883 int width) { | 1883 int width) { |
1884 asm volatile ( | 1884 asm volatile ( |
1885 "pcmpeqb %%xmm5,%%xmm5 \n" | 1885 "pcmpeqb %%xmm5,%%xmm5 \n" |
1886 LABELALIGN | 1886 LABELALIGN |
1887 "1: \n" | 1887 "1: \n" |
1888 READNV21 | 1888 READNV21 |
1889 YUVTORGB(yuvconstants) | 1889 YUVTORGB(yuvconstants) |
1890 STOREARGB | 1890 STOREARGB |
1891 "sub $0x8,%[width] \n" | 1891 "sub $0x8,%[width] \n" |
1892 "jg 1b \n" | 1892 "jg 1b \n" |
1893 : [y_buf]"+r"(y_buf), // %[y_buf] | 1893 : [y_buf]"+r"(y_buf), // %[y_buf] |
1894 [vu_buf]"+r"(vu_buf), // %[vu_buf] | 1894 [vu_buf]"+r"(vu_buf), // %[vu_buf] |
1895 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1895 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1896 [width]"+rm"(width) // %[width] | 1896 [width]"+rm"(width) // %[width] |
1897 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1897 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1898 [kShuffleNV21]"m"(kShuffleNV21) | 1898 [kShuffleNV21]"m"(kShuffleNV21) |
1899 // Does not use r14. | 1899 // Does not use r14. |
1900 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1900 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1901 ); | 1901 ); |
1902 } | 1902 } |
1903 | 1903 |
1904 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, | 1904 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, |
1905 uint8* dst_argb, | 1905 uint8* dst_argb, |
1906 struct YuvConstants* yuvconstants, | 1906 const struct YuvConstants* yuvconstants, |
1907 int width) { | 1907 int width) { |
1908 asm volatile ( | 1908 asm volatile ( |
1909 "pcmpeqb %%xmm5,%%xmm5 \n" | 1909 "pcmpeqb %%xmm5,%%xmm5 \n" |
1910 LABELALIGN | 1910 LABELALIGN |
1911 "1: \n" | 1911 "1: \n" |
1912 READYUY2 | 1912 READYUY2 |
1913 YUVTORGB(yuvconstants) | 1913 YUVTORGB(yuvconstants) |
1914 STOREARGB | 1914 STOREARGB |
1915 "sub $0x8,%[width] \n" | 1915 "sub $0x8,%[width] \n" |
1916 "jg 1b \n" | 1916 "jg 1b \n" |
1917 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] | 1917 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] |
1918 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1918 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1919 [width]"+rm"(width) // %[width] | 1919 [width]"+rm"(width) // %[width] |
1920 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1920 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1921 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), | 1921 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
1922 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) | 1922 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
1923 // Does not use r14. | 1923 // Does not use r14. |
1924 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1924 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1925 ); | 1925 ); |
1926 } | 1926 } |
1927 | 1927 |
1928 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, | 1928 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, |
1929 uint8* dst_argb, | 1929 uint8* dst_argb, |
1930 struct YuvConstants* yuvconstants, | 1930 const struct YuvConstants* yuvconstants, |
1931 int width) { | 1931 int width) { |
1932 asm volatile ( | 1932 asm volatile ( |
1933 "pcmpeqb %%xmm5,%%xmm5 \n" | 1933 "pcmpeqb %%xmm5,%%xmm5 \n" |
1934 LABELALIGN | 1934 LABELALIGN |
1935 "1: \n" | 1935 "1: \n" |
1936 READUYVY | 1936 READUYVY |
1937 YUVTORGB(yuvconstants) | 1937 YUVTORGB(yuvconstants) |
1938 STOREARGB | 1938 STOREARGB |
1939 "sub $0x8,%[width] \n" | 1939 "sub $0x8,%[width] \n" |
1940 "jg 1b \n" | 1940 "jg 1b \n" |
1941 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] | 1941 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] |
1942 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1942 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1943 [width]"+rm"(width) // %[width] | 1943 [width]"+rm"(width) // %[width] |
1944 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1944 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1945 [kShuffleUYVYY]"m"(kShuffleUYVYY), | 1945 [kShuffleUYVYY]"m"(kShuffleUYVYY), |
1946 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) | 1946 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
1947 // Does not use r14. | 1947 // Does not use r14. |
1948 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1948 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1949 ); | 1949 ); |
1950 } | 1950 } |
1951 | 1951 |
1952 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, | 1952 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, |
1953 const uint8* u_buf, | 1953 const uint8* u_buf, |
1954 const uint8* v_buf, | 1954 const uint8* v_buf, |
1955 uint8* dst_bgra, | 1955 uint8* dst_bgra, |
1956 struct YuvConstants* yuvconstants, | 1956 const struct YuvConstants* yuvconstants, |
1957 int width) { | 1957 int width) { |
1958 asm volatile ( | 1958 asm volatile ( |
1959 "sub %[u_buf],%[v_buf] \n" | 1959 "sub %[u_buf],%[v_buf] \n" |
1960 "pcmpeqb %%xmm5,%%xmm5 \n" | 1960 "pcmpeqb %%xmm5,%%xmm5 \n" |
1961 LABELALIGN | 1961 LABELALIGN |
1962 "1: \n" | 1962 "1: \n" |
1963 READYUV422 | 1963 READYUV422 |
1964 YUVTORGB(yuvconstants) | 1964 YUVTORGB(yuvconstants) |
1965 STOREBGRA | 1965 STOREBGRA |
1966 "sub $0x8,%[width] \n" | 1966 "sub $0x8,%[width] \n" |
1967 "jg 1b \n" | 1967 "jg 1b \n" |
1968 : [y_buf]"+r"(y_buf), // %[y_buf] | 1968 : [y_buf]"+r"(y_buf), // %[y_buf] |
1969 [u_buf]"+r"(u_buf), // %[u_buf] | 1969 [u_buf]"+r"(u_buf), // %[u_buf] |
1970 [v_buf]"+r"(v_buf), // %[v_buf] | 1970 [v_buf]"+r"(v_buf), // %[v_buf] |
1971 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | 1971 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] |
1972 [width]"+rm"(width) // %[width] | 1972 [width]"+rm"(width) // %[width] |
1973 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1973 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1974 : "memory", "cc", NACL_R14 | 1974 : "memory", "cc", NACL_R14 |
1975 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1975 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1976 ); | 1976 ); |
1977 } | 1977 } |
1978 | 1978 |
1979 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, | 1979 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, |
1980 const uint8* u_buf, | 1980 const uint8* u_buf, |
1981 const uint8* v_buf, | 1981 const uint8* v_buf, |
1982 uint8* dst_abgr, | 1982 uint8* dst_abgr, |
1983 struct YuvConstants* yuvconstants, | 1983 const struct YuvConstants* yuvconstants, |
1984 int width) { | 1984 int width) { |
1985 asm volatile ( | 1985 asm volatile ( |
1986 "sub %[u_buf],%[v_buf] \n" | 1986 "sub %[u_buf],%[v_buf] \n" |
1987 "pcmpeqb %%xmm5,%%xmm5 \n" | 1987 "pcmpeqb %%xmm5,%%xmm5 \n" |
1988 LABELALIGN | 1988 LABELALIGN |
1989 "1: \n" | 1989 "1: \n" |
1990 READYUV422 | 1990 READYUV422 |
1991 YUVTORGB(yuvconstants) | 1991 YUVTORGB(yuvconstants) |
1992 STOREABGR | 1992 STOREABGR |
1993 "sub $0x8,%[width] \n" | 1993 "sub $0x8,%[width] \n" |
1994 "jg 1b \n" | 1994 "jg 1b \n" |
1995 : [y_buf]"+r"(y_buf), // %[y_buf] | 1995 : [y_buf]"+r"(y_buf), // %[y_buf] |
1996 [u_buf]"+r"(u_buf), // %[u_buf] | 1996 [u_buf]"+r"(u_buf), // %[u_buf] |
1997 [v_buf]"+r"(v_buf), // %[v_buf] | 1997 [v_buf]"+r"(v_buf), // %[v_buf] |
1998 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1998 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
1999 [width]"+rm"(width) // %[width] | 1999 [width]"+rm"(width) // %[width] |
2000 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2000 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
2001 : "memory", "cc", NACL_R14 | 2001 : "memory", "cc", NACL_R14 |
2002 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2002 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2003 ); | 2003 ); |
2004 } | 2004 } |
2005 | 2005 |
2006 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, | 2006 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
2007 const uint8* u_buf, | 2007 const uint8* u_buf, |
2008 const uint8* v_buf, | 2008 const uint8* v_buf, |
2009 uint8* dst_rgba, | 2009 uint8* dst_rgba, |
2010 struct YuvConstants* yuvconstants, | 2010 const struct YuvConstants* yuvconstants, |
2011 int width) { | 2011 int width) { |
2012 asm volatile ( | 2012 asm volatile ( |
2013 "sub %[u_buf],%[v_buf] \n" | 2013 "sub %[u_buf],%[v_buf] \n" |
2014 "pcmpeqb %%xmm5,%%xmm5 \n" | 2014 "pcmpeqb %%xmm5,%%xmm5 \n" |
2015 LABELALIGN | 2015 LABELALIGN |
2016 "1: \n" | 2016 "1: \n" |
2017 READYUV422 | 2017 READYUV422 |
2018 YUVTORGB(yuvconstants) | 2018 YUVTORGB(yuvconstants) |
2019 STORERGBA | 2019 STORERGBA |
2020 "sub $0x8,%[width] \n" | 2020 "sub $0x8,%[width] \n" |
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2145 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ | 2145 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ |
2146 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" | 2146 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" |
2147 | 2147 |
2148 #if defined(HAS_I422TOBGRAROW_AVX2) | 2148 #if defined(HAS_I422TOBGRAROW_AVX2) |
2149 // 16 pixels | 2149 // 16 pixels |
2150 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | 2150 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). |
2151 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, | 2151 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, |
2152 const uint8* u_buf, | 2152 const uint8* u_buf, |
2153 const uint8* v_buf, | 2153 const uint8* v_buf, |
2154 uint8* dst_bgra, | 2154 uint8* dst_bgra, |
2155 struct YuvConstants* yuvconstants, | 2155 const struct YuvConstants* yuvconstants, |
2156 int width) { | 2156 int width) { |
2157 asm volatile ( | 2157 asm volatile ( |
2158 "sub %[u_buf],%[v_buf] \n" | 2158 "sub %[u_buf],%[v_buf] \n" |
2159 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2159 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2160 LABELALIGN | 2160 LABELALIGN |
2161 "1: \n" | 2161 "1: \n" |
2162 READYUV422_AVX2 | 2162 READYUV422_AVX2 |
2163 YUVTORGB_AVX2(yuvconstants) | 2163 YUVTORGB_AVX2(yuvconstants) |
2164 | 2164 |
2165 // Step 3: Weave into BGRA | 2165 // Step 3: Weave into BGRA |
(...skipping 21 matching lines...) Expand all Loading... |
2187 } | 2187 } |
2188 #endif // HAS_I422TOBGRAROW_AVX2 | 2188 #endif // HAS_I422TOBGRAROW_AVX2 |
2189 | 2189 |
2190 #if defined(HAS_I422TOARGBROW_AVX2) | 2190 #if defined(HAS_I422TOARGBROW_AVX2) |
2191 // 16 pixels | 2191 // 16 pixels |
2192 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2192 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2193 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 2193 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
2194 const uint8* u_buf, | 2194 const uint8* u_buf, |
2195 const uint8* v_buf, | 2195 const uint8* v_buf, |
2196 uint8* dst_argb, | 2196 uint8* dst_argb, |
2197 struct YuvConstants* yuvconstants, | 2197 const struct YuvConstants* yuvconstants, |
2198 int width) { | 2198 int width) { |
2199 asm volatile ( | 2199 asm volatile ( |
2200 "sub %[u_buf],%[v_buf] \n" | 2200 "sub %[u_buf],%[v_buf] \n" |
2201 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2201 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2202 LABELALIGN | 2202 LABELALIGN |
2203 "1: \n" | 2203 "1: \n" |
2204 READYUV422_AVX2 | 2204 READYUV422_AVX2 |
2205 YUVTORGB_AVX2(yuvconstants) | 2205 YUVTORGB_AVX2(yuvconstants) |
2206 STOREARGB_AVX2 | 2206 STOREARGB_AVX2 |
2207 "sub $0x10,%[width] \n" | 2207 "sub $0x10,%[width] \n" |
(...skipping 12 matching lines...) Expand all Loading... |
2220 #endif // HAS_I422TOARGBROW_AVX2 | 2220 #endif // HAS_I422TOARGBROW_AVX2 |
2221 | 2221 |
2222 #if defined(HAS_I422ALPHATOARGBROW_AVX2) | 2222 #if defined(HAS_I422ALPHATOARGBROW_AVX2) |
2223 // 16 pixels | 2223 // 16 pixels |
2224 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. | 2224 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. |
2225 void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, | 2225 void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, |
2226 const uint8* u_buf, | 2226 const uint8* u_buf, |
2227 const uint8* v_buf, | 2227 const uint8* v_buf, |
2228 const uint8* a_buf, | 2228 const uint8* a_buf, |
2229 uint8* dst_argb, | 2229 uint8* dst_argb, |
2230 struct YuvConstants* yuvconstants, | 2230 const struct YuvConstants* yuvconstants, |
2231 int width) { | 2231 int width) { |
2232 asm volatile ( | 2232 asm volatile ( |
2233 "sub %[u_buf],%[v_buf] \n" | 2233 "sub %[u_buf],%[v_buf] \n" |
2234 LABELALIGN | 2234 LABELALIGN |
2235 "1: \n" | 2235 "1: \n" |
2236 READYUVA422_AVX2 | 2236 READYUVA422_AVX2 |
2237 YUVTORGB_AVX2(yuvconstants) | 2237 YUVTORGB_AVX2(yuvconstants) |
2238 STOREARGB_AVX2 | 2238 STOREARGB_AVX2 |
2239 "subl $0x10,%[width] \n" | 2239 "subl $0x10,%[width] \n" |
2240 "jg 1b \n" | 2240 "jg 1b \n" |
(...skipping 16 matching lines...) Expand all Loading... |
2257 #endif // HAS_I422ALPHATOARGBROW_AVX2 | 2257 #endif // HAS_I422ALPHATOARGBROW_AVX2 |
2258 | 2258 |
2259 #if defined(HAS_I422ALPHATOABGRROW_AVX2) | 2259 #if defined(HAS_I422ALPHATOABGRROW_AVX2) |
2260 // 16 pixels | 2260 // 16 pixels |
2261 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. | 2261 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. |
2262 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, | 2262 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, |
2263 const uint8* u_buf, | 2263 const uint8* u_buf, |
2264 const uint8* v_buf, | 2264 const uint8* v_buf, |
2265 const uint8* a_buf, | 2265 const uint8* a_buf, |
2266 uint8* dst_abgr, | 2266 uint8* dst_abgr, |
2267 struct YuvConstants* yuvconstants, | 2267 const struct YuvConstants* yuvconstants, |
2268 int width) { | 2268 int width) { |
2269 asm volatile ( | 2269 asm volatile ( |
2270 "sub %[u_buf],%[v_buf] \n" | 2270 "sub %[u_buf],%[v_buf] \n" |
2271 LABELALIGN | 2271 LABELALIGN |
2272 "1: \n" | 2272 "1: \n" |
2273 READYUVA422_AVX2 | 2273 READYUVA422_AVX2 |
2274 YUVTORGB_AVX2(yuvconstants) | 2274 YUVTORGB_AVX2(yuvconstants) |
2275 STOREABGR_AVX2 | 2275 STOREABGR_AVX2 |
2276 "subl $0x10,%[width] \n" | 2276 "subl $0x10,%[width] \n" |
2277 "jg 1b \n" | 2277 "jg 1b \n" |
(...skipping 15 matching lines...) Expand all Loading... |
2293 } | 2293 } |
2294 #endif // HAS_I422ALPHATOABGRROW_AVX2 | 2294 #endif // HAS_I422ALPHATOABGRROW_AVX2 |
2295 | 2295 |
2296 #if defined(HAS_I422TOABGRROW_AVX2) | 2296 #if defined(HAS_I422TOABGRROW_AVX2) |
2297 // 16 pixels | 2297 // 16 pixels |
2298 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 2298 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
2299 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, | 2299 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, |
2300 const uint8* u_buf, | 2300 const uint8* u_buf, |
2301 const uint8* v_buf, | 2301 const uint8* v_buf, |
2302 uint8* dst_abgr, | 2302 uint8* dst_abgr, |
2303 struct YuvConstants* yuvconstants, | 2303 const struct YuvConstants* yuvconstants, |
2304 int width) { | 2304 int width) { |
2305 asm volatile ( | 2305 asm volatile ( |
2306 "sub %[u_buf],%[v_buf] \n" | 2306 "sub %[u_buf],%[v_buf] \n" |
2307 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2307 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2308 LABELALIGN | 2308 LABELALIGN |
2309 "1: \n" | 2309 "1: \n" |
2310 READYUV422_AVX2 | 2310 READYUV422_AVX2 |
2311 YUVTORGB_AVX2(yuvconstants) | 2311 YUVTORGB_AVX2(yuvconstants) |
2312 STOREABGR_AVX2 | 2312 STOREABGR_AVX2 |
2313 "sub $0x10,%[width] \n" | 2313 "sub $0x10,%[width] \n" |
(...skipping 11 matching lines...) Expand all Loading... |
2325 } | 2325 } |
2326 #endif // HAS_I422TOABGRROW_AVX2 | 2326 #endif // HAS_I422TOABGRROW_AVX2 |
2327 | 2327 |
2328 #if defined(HAS_I422TORGBAROW_AVX2) | 2328 #if defined(HAS_I422TORGBAROW_AVX2) |
2329 // 16 pixels | 2329 // 16 pixels |
2330 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2330 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
2331 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, | 2331 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
2332 const uint8* u_buf, | 2332 const uint8* u_buf, |
2333 const uint8* v_buf, | 2333 const uint8* v_buf, |
2334 uint8* dst_argb, | 2334 uint8* dst_argb, |
2335 struct YuvConstants* yuvconstants, | 2335 const struct YuvConstants* yuvconstants, |
2336 int width) { | 2336 int width) { |
2337 asm volatile ( | 2337 asm volatile ( |
2338 "sub %[u_buf],%[v_buf] \n" | 2338 "sub %[u_buf],%[v_buf] \n" |
2339 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2339 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2340 LABELALIGN | 2340 LABELALIGN |
2341 "1: \n" | 2341 "1: \n" |
2342 READYUV422_AVX2 | 2342 READYUV422_AVX2 |
2343 YUVTORGB_AVX2(yuvconstants) | 2343 YUVTORGB_AVX2(yuvconstants) |
2344 | 2344 |
2345 // Step 3: Weave into RGBA | 2345 // Step 3: Weave into RGBA |
(...skipping 20 matching lines...) Expand all Loading... |
2366 ); | 2366 ); |
2367 } | 2367 } |
2368 #endif // HAS_I422TORGBAROW_AVX2 | 2368 #endif // HAS_I422TORGBAROW_AVX2 |
2369 | 2369 |
2370 #if defined(HAS_NV12TOARGBROW_AVX2) | 2370 #if defined(HAS_NV12TOARGBROW_AVX2) |
2371 // 16 pixels. | 2371 // 16 pixels. |
2372 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2372 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2373 void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, | 2373 void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, |
2374 const uint8* uv_buf, | 2374 const uint8* uv_buf, |
2375 uint8* dst_argb, | 2375 uint8* dst_argb, |
2376 struct YuvConstants* yuvconstants, | 2376 const struct YuvConstants* yuvconstants, |
2377 int width) { | 2377 int width) { |
2378 asm volatile ( | 2378 asm volatile ( |
2379 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2379 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2380 LABELALIGN | 2380 LABELALIGN |
2381 "1: \n" | 2381 "1: \n" |
2382 READNV12_AVX2 | 2382 READNV12_AVX2 |
2383 YUVTORGB_AVX2(yuvconstants) | 2383 YUVTORGB_AVX2(yuvconstants) |
2384 STOREARGB_AVX2 | 2384 STOREARGB_AVX2 |
2385 "sub $0x10,%[width] \n" | 2385 "sub $0x10,%[width] \n" |
2386 "jg 1b \n" | 2386 "jg 1b \n" |
2387 "vzeroupper \n" | 2387 "vzeroupper \n" |
2388 : [y_buf]"+r"(y_buf), // %[y_buf] | 2388 : [y_buf]"+r"(y_buf), // %[y_buf] |
2389 [uv_buf]"+r"(uv_buf), // %[uv_buf] | 2389 [uv_buf]"+r"(uv_buf), // %[uv_buf] |
2390 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2390 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
2391 [width]"+rm"(width) // %[width] | 2391 [width]"+rm"(width) // %[width] |
2392 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2392 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
2393 // Does not use r14. | 2393 // Does not use r14. |
2394 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2394 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2395 ); | 2395 ); |
2396 } | 2396 } |
2397 #endif // HAS_NV12TOARGBROW_AVX2 | 2397 #endif // HAS_NV12TOARGBROW_AVX2 |
2398 | 2398 |
2399 #if defined(HAS_NV21TOARGBROW_AVX2) | 2399 #if defined(HAS_NV21TOARGBROW_AVX2) |
2400 // 16 pixels. | 2400 // 16 pixels. |
2401 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2401 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2402 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, | 2402 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, |
2403 const uint8* vu_buf, | 2403 const uint8* vu_buf, |
2404 uint8* dst_argb, | 2404 uint8* dst_argb, |
2405 struct YuvConstants* yuvconstants, | 2405 const struct YuvConstants* yuvconstants, |
2406 int width) { | 2406 int width) { |
2407 asm volatile ( | 2407 asm volatile ( |
2408 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2408 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2409 LABELALIGN | 2409 LABELALIGN |
2410 "1: \n" | 2410 "1: \n" |
2411 READNV21_AVX2 | 2411 READNV21_AVX2 |
2412 YUVTORGB_AVX2(yuvconstants) | 2412 YUVTORGB_AVX2(yuvconstants) |
2413 STOREARGB_AVX2 | 2413 STOREARGB_AVX2 |
2414 "sub $0x10,%[width] \n" | 2414 "sub $0x10,%[width] \n" |
2415 "jg 1b \n" | 2415 "jg 1b \n" |
2416 "vzeroupper \n" | 2416 "vzeroupper \n" |
2417 : [y_buf]"+r"(y_buf), // %[y_buf] | 2417 : [y_buf]"+r"(y_buf), // %[y_buf] |
2418 [vu_buf]"+r"(vu_buf), // %[vu_buf] | 2418 [vu_buf]"+r"(vu_buf), // %[vu_buf] |
2419 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2419 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
2420 [width]"+rm"(width) // %[width] | 2420 [width]"+rm"(width) // %[width] |
2421 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 2421 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
2422 [kShuffleNV21]"m"(kShuffleNV21) | 2422 [kShuffleNV21]"m"(kShuffleNV21) |
2423 // Does not use r14. | 2423 // Does not use r14. |
2424 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2424 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2425 ); | 2425 ); |
2426 } | 2426 } |
2427 #endif // HAS_NV21TOARGBROW_AVX2 | 2427 #endif // HAS_NV21TOARGBROW_AVX2 |
2428 | 2428 |
2429 #if defined(HAS_YUY2TOARGBROW_AVX2) | 2429 #if defined(HAS_YUY2TOARGBROW_AVX2) |
2430 // 16 pixels. | 2430 // 16 pixels. |
2431 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2431 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
2432 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, | 2432 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, |
2433 uint8* dst_argb, | 2433 uint8* dst_argb, |
2434 struct YuvConstants* yuvconstants, | 2434 const struct YuvConstants* yuvconstants, |
2435 int width) { | 2435 int width) { |
2436 asm volatile ( | 2436 asm volatile ( |
2437 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2437 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2438 LABELALIGN | 2438 LABELALIGN |
2439 "1: \n" | 2439 "1: \n" |
2440 READYUY2_AVX2 | 2440 READYUY2_AVX2 |
2441 YUVTORGB_AVX2(yuvconstants) | 2441 YUVTORGB_AVX2(yuvconstants) |
2442 STOREARGB_AVX2 | 2442 STOREARGB_AVX2 |
2443 "sub $0x10,%[width] \n" | 2443 "sub $0x10,%[width] \n" |
2444 "jg 1b \n" | 2444 "jg 1b \n" |
2445 "vzeroupper \n" | 2445 "vzeroupper \n" |
2446 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] | 2446 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] |
2447 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2447 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
2448 [width]"+rm"(width) // %[width] | 2448 [width]"+rm"(width) // %[width] |
2449 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 2449 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
2450 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), | 2450 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
2451 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) | 2451 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
2452 // Does not use r14. | 2452 // Does not use r14. |
2453 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2453 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2454 ); | 2454 ); |
2455 } | 2455 } |
2456 #endif // HAS_YUY2TOARGBROW_AVX2 | 2456 #endif // HAS_YUY2TOARGBROW_AVX2 |
2457 | 2457 |
2458 #if defined(HAS_UYVYTOARGBROW_AVX2) | 2458 #if defined(HAS_UYVYTOARGBROW_AVX2) |
2459 // 16 pixels. | 2459 // 16 pixels. |
2460 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2460 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
2461 void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, | 2461 void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, |
2462 uint8* dst_argb, | 2462 uint8* dst_argb, |
2463 struct YuvConstants* yuvconstants, | 2463 const struct YuvConstants* yuvconstants, |
2464 int width) { | 2464 int width) { |
2465 asm volatile ( | 2465 asm volatile ( |
2466 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2466 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
2467 LABELALIGN | 2467 LABELALIGN |
2468 "1: \n" | 2468 "1: \n" |
2469 READUYVY_AVX2 | 2469 READUYVY_AVX2 |
2470 YUVTORGB_AVX2(yuvconstants) | 2470 YUVTORGB_AVX2(yuvconstants) |
2471 STOREARGB_AVX2 | 2471 STOREARGB_AVX2 |
2472 "sub $0x10,%[width] \n" | 2472 "sub $0x10,%[width] \n" |
2473 "jg 1b \n" | 2473 "jg 1b \n" |
(...skipping 3110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5584 ); | 5584 ); |
5585 } | 5585 } |
5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5587 | 5587 |
5588 #endif // defined(__x86_64__) || defined(__i386__) | 5588 #endif // defined(__x86_64__) || defined(__i386__) |
5589 | 5589 |
5590 #ifdef __cplusplus | 5590 #ifdef __cplusplus |
5591 } // extern "C" | 5591 } // extern "C" |
5592 } // namespace libyuv | 5592 } // namespace libyuv |
5593 #endif | 5593 #endif |
OLD | NEW |