| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 1578 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1589 "punpcklwd %%xmm1,%%xmm5 \n" \ | 1589 "punpcklwd %%xmm1,%%xmm5 \n" \ |
| 1590 "punpckhwd %%xmm1,%%xmm0 \n" \ | 1590 "punpckhwd %%xmm1,%%xmm0 \n" \ |
| 1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ | 1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ |
| 1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ | 1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ |
| 1593 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" | 1593 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" |
| 1594 | 1594 |
| 1595 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, | 1595 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, |
| 1596 const uint8* u_buf, | 1596 const uint8* u_buf, |
| 1597 const uint8* v_buf, | 1597 const uint8* v_buf, |
| 1598 uint8* dst_argb, | 1598 uint8* dst_argb, |
| 1599 struct YuvConstants* yuvconstants, | 1599 const struct YuvConstants* yuvconstants, |
| 1600 int width) { | 1600 int width) { |
| 1601 asm volatile ( | 1601 asm volatile ( |
| 1602 "sub %[u_buf],%[v_buf] \n" | 1602 "sub %[u_buf],%[v_buf] \n" |
| 1603 "pcmpeqb %%xmm5,%%xmm5 \n" | 1603 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1604 LABELALIGN | 1604 LABELALIGN |
| 1605 "1: \n" | 1605 "1: \n" |
| 1606 READYUV444 | 1606 READYUV444 |
| 1607 YUVTORGB(yuvconstants) | 1607 YUVTORGB(yuvconstants) |
| 1608 STOREARGB | 1608 STOREARGB |
| 1609 "sub $0x8,%[width] \n" | 1609 "sub $0x8,%[width] \n" |
| 1610 "jg 1b \n" | 1610 "jg 1b \n" |
| 1611 : [y_buf]"+r"(y_buf), // %[y_buf] | 1611 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1612 [u_buf]"+r"(u_buf), // %[u_buf] | 1612 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1613 [v_buf]"+r"(v_buf), // %[v_buf] | 1613 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1614 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1614 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1615 [width]"+rm"(width) // %[width] | 1615 [width]"+rm"(width) // %[width] |
| 1616 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1616 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1617 : "memory", "cc", NACL_R14 | 1617 : "memory", "cc", NACL_R14 |
| 1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1619 ); | 1619 ); |
| 1620 } | 1620 } |
| 1621 | 1621 |
| 1622 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, | 1622 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, |
| 1623 const uint8* u_buf, | 1623 const uint8* u_buf, |
| 1624 const uint8* v_buf, | 1624 const uint8* v_buf, |
| 1625 uint8* dst_abgr, | 1625 uint8* dst_abgr, |
| 1626 struct YuvConstants* yuvconstants, | 1626 const struct YuvConstants* yuvconstants, |
| 1627 int width) { | 1627 int width) { |
| 1628 asm volatile ( | 1628 asm volatile ( |
| 1629 "sub %[u_buf],%[v_buf] \n" | 1629 "sub %[u_buf],%[v_buf] \n" |
| 1630 "pcmpeqb %%xmm5,%%xmm5 \n" | 1630 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1631 LABELALIGN | 1631 LABELALIGN |
| 1632 "1: \n" | 1632 "1: \n" |
| 1633 READYUV444 | 1633 READYUV444 |
| 1634 YUVTORGB(yuvconstants) | 1634 YUVTORGB(yuvconstants) |
| 1635 STOREABGR | 1635 STOREABGR |
| 1636 "sub $0x8,%[width] \n" | 1636 "sub $0x8,%[width] \n" |
| 1637 "jg 1b \n" | 1637 "jg 1b \n" |
| 1638 : [y_buf]"+r"(y_buf), // %[y_buf] | 1638 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1639 [u_buf]"+r"(u_buf), // %[u_buf] | 1639 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1640 [v_buf]"+r"(v_buf), // %[v_buf] | 1640 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1641 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1641 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 1642 [width]"+rm"(width) // %[width] | 1642 [width]"+rm"(width) // %[width] |
| 1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1644 : "memory", "cc", NACL_R14 | 1644 : "memory", "cc", NACL_R14 |
| 1645 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1645 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1646 ); | 1646 ); |
| 1647 } | 1647 } |
| 1648 | 1648 |
| 1649 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, | 1649 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, |
| 1650 const uint8* u_buf, | 1650 const uint8* u_buf, |
| 1651 const uint8* v_buf, | 1651 const uint8* v_buf, |
| 1652 uint8* dst_rgb24, | 1652 uint8* dst_rgb24, |
| 1653 struct YuvConstants* yuvconstants, | 1653 const struct YuvConstants* yuvconstants, |
| 1654 int width) { | 1654 int width) { |
| 1655 asm volatile ( | 1655 asm volatile ( |
| 1656 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" | 1656 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" |
| 1657 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" | 1657 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" |
| 1658 "sub %[u_buf],%[v_buf] \n" | 1658 "sub %[u_buf],%[v_buf] \n" |
| 1659 LABELALIGN | 1659 LABELALIGN |
| 1660 "1: \n" | 1660 "1: \n" |
| 1661 READYUV422 | 1661 READYUV422 |
| 1662 YUVTORGB(yuvconstants) | 1662 YUVTORGB(yuvconstants) |
| 1663 "punpcklbw %%xmm1,%%xmm0 \n" | 1663 "punpcklbw %%xmm1,%%xmm0 \n" |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1687 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1687 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
| 1688 : "memory", "cc", NACL_R14 | 1688 : "memory", "cc", NACL_R14 |
| 1689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
| 1690 ); | 1690 ); |
| 1691 } | 1691 } |
| 1692 | 1692 |
| 1693 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, | 1693 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, |
| 1694 const uint8* u_buf, | 1694 const uint8* u_buf, |
| 1695 const uint8* v_buf, | 1695 const uint8* v_buf, |
| 1696 uint8* dst_raw, | 1696 uint8* dst_raw, |
| 1697 struct YuvConstants* yuvconstants, | 1697 const struct YuvConstants* yuvconstants, |
| 1698 int width) { | 1698 int width) { |
| 1699 asm volatile ( | 1699 asm volatile ( |
| 1700 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" | 1700 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" |
| 1701 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" | 1701 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" |
| 1702 "sub %[u_buf],%[v_buf] \n" | 1702 "sub %[u_buf],%[v_buf] \n" |
| 1703 LABELALIGN | 1703 LABELALIGN |
| 1704 "1: \n" | 1704 "1: \n" |
| 1705 READYUV422 | 1705 READYUV422 |
| 1706 YUVTORGB(yuvconstants) | 1706 YUVTORGB(yuvconstants) |
| 1707 "punpcklbw %%xmm1,%%xmm0 \n" | 1707 "punpcklbw %%xmm1,%%xmm0 \n" |
| (...skipping 23 matching lines...) Expand all Loading... |
| 1731 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | 1731 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) |
| 1732 : "memory", "cc", NACL_R14 | 1732 : "memory", "cc", NACL_R14 |
| 1733 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1733 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
| 1734 ); | 1734 ); |
| 1735 } | 1735 } |
| 1736 | 1736 |
| 1737 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, | 1737 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
| 1738 const uint8* u_buf, | 1738 const uint8* u_buf, |
| 1739 const uint8* v_buf, | 1739 const uint8* v_buf, |
| 1740 uint8* dst_argb, | 1740 uint8* dst_argb, |
| 1741 struct YuvConstants* yuvconstants, | 1741 const struct YuvConstants* yuvconstants, |
| 1742 int width) { | 1742 int width) { |
| 1743 asm volatile ( | 1743 asm volatile ( |
| 1744 "sub %[u_buf],%[v_buf] \n" | 1744 "sub %[u_buf],%[v_buf] \n" |
| 1745 "pcmpeqb %%xmm5,%%xmm5 \n" | 1745 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1746 LABELALIGN | 1746 LABELALIGN |
| 1747 "1: \n" | 1747 "1: \n" |
| 1748 READYUV422 | 1748 READYUV422 |
| 1749 YUVTORGB(yuvconstants) | 1749 YUVTORGB(yuvconstants) |
| 1750 STOREARGB | 1750 STOREARGB |
| 1751 "sub $0x8,%[width] \n" | 1751 "sub $0x8,%[width] \n" |
| 1752 "jg 1b \n" | 1752 "jg 1b \n" |
| 1753 : [y_buf]"+r"(y_buf), // %[y_buf] | 1753 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1754 [u_buf]"+r"(u_buf), // %[u_buf] | 1754 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1755 [v_buf]"+r"(v_buf), // %[v_buf] | 1755 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1756 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1756 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1757 [width]"+rm"(width) // %[width] | 1757 [width]"+rm"(width) // %[width] |
| 1758 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1758 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1759 : "memory", "cc", NACL_R14 | 1759 : "memory", "cc", NACL_R14 |
| 1760 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1760 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1761 ); | 1761 ); |
| 1762 } | 1762 } |
| 1763 | 1763 |
| 1764 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, | 1764 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
| 1765 const uint8* u_buf, | 1765 const uint8* u_buf, |
| 1766 const uint8* v_buf, | 1766 const uint8* v_buf, |
| 1767 const uint8* a_buf, | 1767 const uint8* a_buf, |
| 1768 uint8* dst_argb, | 1768 uint8* dst_argb, |
| 1769 struct YuvConstants* yuvconstants, | 1769 const struct YuvConstants* yuvconstants, |
| 1770 int width) { | 1770 int width) { |
| 1771 asm volatile ( | 1771 asm volatile ( |
| 1772 "sub %[u_buf],%[v_buf] \n" | 1772 "sub %[u_buf],%[v_buf] \n" |
| 1773 LABELALIGN | 1773 LABELALIGN |
| 1774 "1: \n" | 1774 "1: \n" |
| 1775 READYUVA422 | 1775 READYUVA422 |
| 1776 YUVTORGB(yuvconstants) | 1776 YUVTORGB(yuvconstants) |
| 1777 STOREARGB | 1777 STOREARGB |
| 1778 "subl $0x8,%[width] \n" | 1778 "subl $0x8,%[width] \n" |
| 1779 "jg 1b \n" | 1779 "jg 1b \n" |
| (...skipping 11 matching lines...) Expand all Loading... |
| 1791 : "memory", "cc", NACL_R14 | 1791 : "memory", "cc", NACL_R14 |
| 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1793 ); | 1793 ); |
| 1794 } | 1794 } |
| 1795 | 1795 |
| 1796 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, | 1796 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
| 1797 const uint8* u_buf, | 1797 const uint8* u_buf, |
| 1798 const uint8* v_buf, | 1798 const uint8* v_buf, |
| 1799 const uint8* a_buf, | 1799 const uint8* a_buf, |
| 1800 uint8* dst_abgr, | 1800 uint8* dst_abgr, |
| 1801 struct YuvConstants* yuvconstants, | 1801 const struct YuvConstants* yuvconstants, |
| 1802 int width) { | 1802 int width) { |
| 1803 asm volatile ( | 1803 asm volatile ( |
| 1804 "sub %[u_buf],%[v_buf] \n" | 1804 "sub %[u_buf],%[v_buf] \n" |
| 1805 LABELALIGN | 1805 LABELALIGN |
| 1806 "1: \n" | 1806 "1: \n" |
| 1807 READYUVA422 | 1807 READYUVA422 |
| 1808 YUVTORGB(yuvconstants) | 1808 YUVTORGB(yuvconstants) |
| 1809 STOREABGR | 1809 STOREABGR |
| 1810 "subl $0x8,%[width] \n" | 1810 "subl $0x8,%[width] \n" |
| 1811 "jg 1b \n" | 1811 "jg 1b \n" |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1822 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1822 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1823 : "memory", "cc", NACL_R14 | 1823 : "memory", "cc", NACL_R14 |
| 1824 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1824 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1825 ); | 1825 ); |
| 1826 } | 1826 } |
| 1827 | 1827 |
| 1828 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, | 1828 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
| 1829 const uint8* u_buf, | 1829 const uint8* u_buf, |
| 1830 const uint8* v_buf, | 1830 const uint8* v_buf, |
| 1831 uint8* dst_argb, | 1831 uint8* dst_argb, |
| 1832 struct YuvConstants* yuvconstants, | 1832 const struct YuvConstants* yuvconstants, |
| 1833 int width) { | 1833 int width) { |
| 1834 asm volatile ( | 1834 asm volatile ( |
| 1835 "sub %[u_buf],%[v_buf] \n" | 1835 "sub %[u_buf],%[v_buf] \n" |
| 1836 "pcmpeqb %%xmm5,%%xmm5 \n" | 1836 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1837 LABELALIGN | 1837 LABELALIGN |
| 1838 "1: \n" | 1838 "1: \n" |
| 1839 READYUV411 | 1839 READYUV411 |
| 1840 YUVTORGB(yuvconstants) | 1840 YUVTORGB(yuvconstants) |
| 1841 STOREARGB | 1841 STOREARGB |
| 1842 "sub $0x8,%[width] \n" | 1842 "sub $0x8,%[width] \n" |
| 1843 "jg 1b \n" | 1843 "jg 1b \n" |
| 1844 : [y_buf]"+r"(y_buf), // %[y_buf] | 1844 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1845 [u_buf]"+r"(u_buf), // %[u_buf] | 1845 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1846 [v_buf]"+r"(v_buf), // %[v_buf] | 1846 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1847 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1847 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1848 [width]"+rm"(width) // %[width] | 1848 [width]"+rm"(width) // %[width] |
| 1849 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1849 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1850 : "memory", "cc", NACL_R14 | 1850 : "memory", "cc", NACL_R14 |
| 1851 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1851 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1852 ); | 1852 ); |
| 1853 } | 1853 } |
| 1854 | 1854 |
| 1855 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, | 1855 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, |
| 1856 const uint8* uv_buf, | 1856 const uint8* uv_buf, |
| 1857 uint8* dst_argb, | 1857 uint8* dst_argb, |
| 1858 struct YuvConstants* yuvconstants, | 1858 const struct YuvConstants* yuvconstants, |
| 1859 int width) { | 1859 int width) { |
| 1860 asm volatile ( | 1860 asm volatile ( |
| 1861 "pcmpeqb %%xmm5,%%xmm5 \n" | 1861 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1862 LABELALIGN | 1862 LABELALIGN |
| 1863 "1: \n" | 1863 "1: \n" |
| 1864 READNV12 | 1864 READNV12 |
| 1865 YUVTORGB(yuvconstants) | 1865 YUVTORGB(yuvconstants) |
| 1866 STOREARGB | 1866 STOREARGB |
| 1867 "sub $0x8,%[width] \n" | 1867 "sub $0x8,%[width] \n" |
| 1868 "jg 1b \n" | 1868 "jg 1b \n" |
| 1869 : [y_buf]"+r"(y_buf), // %[y_buf] | 1869 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1870 [uv_buf]"+r"(uv_buf), // %[uv_buf] | 1870 [uv_buf]"+r"(uv_buf), // %[uv_buf] |
| 1871 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1871 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1872 [width]"+rm"(width) // %[width] | 1872 [width]"+rm"(width) // %[width] |
| 1873 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1873 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1874 // Does not use r14. | 1874 // Does not use r14. |
| 1875 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1875 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1876 ); | 1876 ); |
| 1877 } | 1877 } |
| 1878 | 1878 |
| 1879 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, | 1879 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, |
| 1880 const uint8* vu_buf, | 1880 const uint8* vu_buf, |
| 1881 uint8* dst_argb, | 1881 uint8* dst_argb, |
| 1882 struct YuvConstants* yuvconstants, | 1882 const struct YuvConstants* yuvconstants, |
| 1883 int width) { | 1883 int width) { |
| 1884 asm volatile ( | 1884 asm volatile ( |
| 1885 "pcmpeqb %%xmm5,%%xmm5 \n" | 1885 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1886 LABELALIGN | 1886 LABELALIGN |
| 1887 "1: \n" | 1887 "1: \n" |
| 1888 READNV21 | 1888 READNV21 |
| 1889 YUVTORGB(yuvconstants) | 1889 YUVTORGB(yuvconstants) |
| 1890 STOREARGB | 1890 STOREARGB |
| 1891 "sub $0x8,%[width] \n" | 1891 "sub $0x8,%[width] \n" |
| 1892 "jg 1b \n" | 1892 "jg 1b \n" |
| 1893 : [y_buf]"+r"(y_buf), // %[y_buf] | 1893 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1894 [vu_buf]"+r"(vu_buf), // %[vu_buf] | 1894 [vu_buf]"+r"(vu_buf), // %[vu_buf] |
| 1895 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1895 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1896 [width]"+rm"(width) // %[width] | 1896 [width]"+rm"(width) // %[width] |
| 1897 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1897 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1898 [kShuffleNV21]"m"(kShuffleNV21) | 1898 [kShuffleNV21]"m"(kShuffleNV21) |
| 1899 // Does not use r14. | 1899 // Does not use r14. |
| 1900 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1900 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1901 ); | 1901 ); |
| 1902 } | 1902 } |
| 1903 | 1903 |
| 1904 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, | 1904 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, |
| 1905 uint8* dst_argb, | 1905 uint8* dst_argb, |
| 1906 struct YuvConstants* yuvconstants, | 1906 const struct YuvConstants* yuvconstants, |
| 1907 int width) { | 1907 int width) { |
| 1908 asm volatile ( | 1908 asm volatile ( |
| 1909 "pcmpeqb %%xmm5,%%xmm5 \n" | 1909 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1910 LABELALIGN | 1910 LABELALIGN |
| 1911 "1: \n" | 1911 "1: \n" |
| 1912 READYUY2 | 1912 READYUY2 |
| 1913 YUVTORGB(yuvconstants) | 1913 YUVTORGB(yuvconstants) |
| 1914 STOREARGB | 1914 STOREARGB |
| 1915 "sub $0x8,%[width] \n" | 1915 "sub $0x8,%[width] \n" |
| 1916 "jg 1b \n" | 1916 "jg 1b \n" |
| 1917 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] | 1917 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] |
| 1918 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1918 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1919 [width]"+rm"(width) // %[width] | 1919 [width]"+rm"(width) // %[width] |
| 1920 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1920 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1921 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), | 1921 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
| 1922 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) | 1922 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
| 1923 // Does not use r14. | 1923 // Does not use r14. |
| 1924 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1924 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1925 ); | 1925 ); |
| 1926 } | 1926 } |
| 1927 | 1927 |
| 1928 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, | 1928 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, |
| 1929 uint8* dst_argb, | 1929 uint8* dst_argb, |
| 1930 struct YuvConstants* yuvconstants, | 1930 const struct YuvConstants* yuvconstants, |
| 1931 int width) { | 1931 int width) { |
| 1932 asm volatile ( | 1932 asm volatile ( |
| 1933 "pcmpeqb %%xmm5,%%xmm5 \n" | 1933 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1934 LABELALIGN | 1934 LABELALIGN |
| 1935 "1: \n" | 1935 "1: \n" |
| 1936 READUYVY | 1936 READUYVY |
| 1937 YUVTORGB(yuvconstants) | 1937 YUVTORGB(yuvconstants) |
| 1938 STOREARGB | 1938 STOREARGB |
| 1939 "sub $0x8,%[width] \n" | 1939 "sub $0x8,%[width] \n" |
| 1940 "jg 1b \n" | 1940 "jg 1b \n" |
| 1941 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] | 1941 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] |
| 1942 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1942 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1943 [width]"+rm"(width) // %[width] | 1943 [width]"+rm"(width) // %[width] |
| 1944 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1944 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1945 [kShuffleUYVYY]"m"(kShuffleUYVYY), | 1945 [kShuffleUYVYY]"m"(kShuffleUYVYY), |
| 1946 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) | 1946 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
| 1947 // Does not use r14. | 1947 // Does not use r14. |
| 1948 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1948 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1949 ); | 1949 ); |
| 1950 } | 1950 } |
| 1951 | 1951 |
| 1952 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, | 1952 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, |
| 1953 const uint8* u_buf, | 1953 const uint8* u_buf, |
| 1954 const uint8* v_buf, | 1954 const uint8* v_buf, |
| 1955 uint8* dst_bgra, | 1955 uint8* dst_bgra, |
| 1956 struct YuvConstants* yuvconstants, | 1956 const struct YuvConstants* yuvconstants, |
| 1957 int width) { | 1957 int width) { |
| 1958 asm volatile ( | 1958 asm volatile ( |
| 1959 "sub %[u_buf],%[v_buf] \n" | 1959 "sub %[u_buf],%[v_buf] \n" |
| 1960 "pcmpeqb %%xmm5,%%xmm5 \n" | 1960 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1961 LABELALIGN | 1961 LABELALIGN |
| 1962 "1: \n" | 1962 "1: \n" |
| 1963 READYUV422 | 1963 READYUV422 |
| 1964 YUVTORGB(yuvconstants) | 1964 YUVTORGB(yuvconstants) |
| 1965 STOREBGRA | 1965 STOREBGRA |
| 1966 "sub $0x8,%[width] \n" | 1966 "sub $0x8,%[width] \n" |
| 1967 "jg 1b \n" | 1967 "jg 1b \n" |
| 1968 : [y_buf]"+r"(y_buf), // %[y_buf] | 1968 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1969 [u_buf]"+r"(u_buf), // %[u_buf] | 1969 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1970 [v_buf]"+r"(v_buf), // %[v_buf] | 1970 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1971 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | 1971 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] |
| 1972 [width]"+rm"(width) // %[width] | 1972 [width]"+rm"(width) // %[width] |
| 1973 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1973 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1974 : "memory", "cc", NACL_R14 | 1974 : "memory", "cc", NACL_R14 |
| 1975 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1975 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1976 ); | 1976 ); |
| 1977 } | 1977 } |
| 1978 | 1978 |
| 1979 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, | 1979 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, |
| 1980 const uint8* u_buf, | 1980 const uint8* u_buf, |
| 1981 const uint8* v_buf, | 1981 const uint8* v_buf, |
| 1982 uint8* dst_abgr, | 1982 uint8* dst_abgr, |
| 1983 struct YuvConstants* yuvconstants, | 1983 const struct YuvConstants* yuvconstants, |
| 1984 int width) { | 1984 int width) { |
| 1985 asm volatile ( | 1985 asm volatile ( |
| 1986 "sub %[u_buf],%[v_buf] \n" | 1986 "sub %[u_buf],%[v_buf] \n" |
| 1987 "pcmpeqb %%xmm5,%%xmm5 \n" | 1987 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 1988 LABELALIGN | 1988 LABELALIGN |
| 1989 "1: \n" | 1989 "1: \n" |
| 1990 READYUV422 | 1990 READYUV422 |
| 1991 YUVTORGB(yuvconstants) | 1991 YUVTORGB(yuvconstants) |
| 1992 STOREABGR | 1992 STOREABGR |
| 1993 "sub $0x8,%[width] \n" | 1993 "sub $0x8,%[width] \n" |
| 1994 "jg 1b \n" | 1994 "jg 1b \n" |
| 1995 : [y_buf]"+r"(y_buf), // %[y_buf] | 1995 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1996 [u_buf]"+r"(u_buf), // %[u_buf] | 1996 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1997 [v_buf]"+r"(v_buf), // %[v_buf] | 1997 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1998 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1998 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 1999 [width]"+rm"(width) // %[width] | 1999 [width]"+rm"(width) // %[width] |
| 2000 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2000 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 2001 : "memory", "cc", NACL_R14 | 2001 : "memory", "cc", NACL_R14 |
| 2002 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2002 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2003 ); | 2003 ); |
| 2004 } | 2004 } |
| 2005 | 2005 |
| 2006 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, | 2006 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
| 2007 const uint8* u_buf, | 2007 const uint8* u_buf, |
| 2008 const uint8* v_buf, | 2008 const uint8* v_buf, |
| 2009 uint8* dst_rgba, | 2009 uint8* dst_rgba, |
| 2010 struct YuvConstants* yuvconstants, | 2010 const struct YuvConstants* yuvconstants, |
| 2011 int width) { | 2011 int width) { |
| 2012 asm volatile ( | 2012 asm volatile ( |
| 2013 "sub %[u_buf],%[v_buf] \n" | 2013 "sub %[u_buf],%[v_buf] \n" |
| 2014 "pcmpeqb %%xmm5,%%xmm5 \n" | 2014 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 2015 LABELALIGN | 2015 LABELALIGN |
| 2016 "1: \n" | 2016 "1: \n" |
| 2017 READYUV422 | 2017 READYUV422 |
| 2018 YUVTORGB(yuvconstants) | 2018 YUVTORGB(yuvconstants) |
| 2019 STORERGBA | 2019 STORERGBA |
| 2020 "sub $0x8,%[width] \n" | 2020 "sub $0x8,%[width] \n" |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2145 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ | 2145 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ |
| 2146 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" | 2146 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" |
| 2147 | 2147 |
| 2148 #if defined(HAS_I422TOBGRAROW_AVX2) | 2148 #if defined(HAS_I422TOBGRAROW_AVX2) |
| 2149 // 16 pixels | 2149 // 16 pixels |
| 2150 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | 2150 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). |
| 2151 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, | 2151 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, |
| 2152 const uint8* u_buf, | 2152 const uint8* u_buf, |
| 2153 const uint8* v_buf, | 2153 const uint8* v_buf, |
| 2154 uint8* dst_bgra, | 2154 uint8* dst_bgra, |
| 2155 struct YuvConstants* yuvconstants, | 2155 const struct YuvConstants* yuvconstants, |
| 2156 int width) { | 2156 int width) { |
| 2157 asm volatile ( | 2157 asm volatile ( |
| 2158 "sub %[u_buf],%[v_buf] \n" | 2158 "sub %[u_buf],%[v_buf] \n" |
| 2159 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2159 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2160 LABELALIGN | 2160 LABELALIGN |
| 2161 "1: \n" | 2161 "1: \n" |
| 2162 READYUV422_AVX2 | 2162 READYUV422_AVX2 |
| 2163 YUVTORGB_AVX2(yuvconstants) | 2163 YUVTORGB_AVX2(yuvconstants) |
| 2164 | 2164 |
| 2165 // Step 3: Weave into BGRA | 2165 // Step 3: Weave into BGRA |
| (...skipping 21 matching lines...) Expand all Loading... |
| 2187 } | 2187 } |
| 2188 #endif // HAS_I422TOBGRAROW_AVX2 | 2188 #endif // HAS_I422TOBGRAROW_AVX2 |
| 2189 | 2189 |
| 2190 #if defined(HAS_I422TOARGBROW_AVX2) | 2190 #if defined(HAS_I422TOARGBROW_AVX2) |
| 2191 // 16 pixels | 2191 // 16 pixels |
| 2192 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2192 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 2193 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 2193 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
| 2194 const uint8* u_buf, | 2194 const uint8* u_buf, |
| 2195 const uint8* v_buf, | 2195 const uint8* v_buf, |
| 2196 uint8* dst_argb, | 2196 uint8* dst_argb, |
| 2197 struct YuvConstants* yuvconstants, | 2197 const struct YuvConstants* yuvconstants, |
| 2198 int width) { | 2198 int width) { |
| 2199 asm volatile ( | 2199 asm volatile ( |
| 2200 "sub %[u_buf],%[v_buf] \n" | 2200 "sub %[u_buf],%[v_buf] \n" |
| 2201 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2201 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2202 LABELALIGN | 2202 LABELALIGN |
| 2203 "1: \n" | 2203 "1: \n" |
| 2204 READYUV422_AVX2 | 2204 READYUV422_AVX2 |
| 2205 YUVTORGB_AVX2(yuvconstants) | 2205 YUVTORGB_AVX2(yuvconstants) |
| 2206 STOREARGB_AVX2 | 2206 STOREARGB_AVX2 |
| 2207 "sub $0x10,%[width] \n" | 2207 "sub $0x10,%[width] \n" |
| (...skipping 12 matching lines...) Expand all Loading... |
| 2220 #endif // HAS_I422TOARGBROW_AVX2 | 2220 #endif // HAS_I422TOARGBROW_AVX2 |
| 2221 | 2221 |
| 2222 #if defined(HAS_I422ALPHATOARGBROW_AVX2) | 2222 #if defined(HAS_I422ALPHATOARGBROW_AVX2) |
| 2223 // 16 pixels | 2223 // 16 pixels |
| 2224 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. | 2224 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. |
| 2225 void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, | 2225 void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, |
| 2226 const uint8* u_buf, | 2226 const uint8* u_buf, |
| 2227 const uint8* v_buf, | 2227 const uint8* v_buf, |
| 2228 const uint8* a_buf, | 2228 const uint8* a_buf, |
| 2229 uint8* dst_argb, | 2229 uint8* dst_argb, |
| 2230 struct YuvConstants* yuvconstants, | 2230 const struct YuvConstants* yuvconstants, |
| 2231 int width) { | 2231 int width) { |
| 2232 asm volatile ( | 2232 asm volatile ( |
| 2233 "sub %[u_buf],%[v_buf] \n" | 2233 "sub %[u_buf],%[v_buf] \n" |
| 2234 LABELALIGN | 2234 LABELALIGN |
| 2235 "1: \n" | 2235 "1: \n" |
| 2236 READYUVA422_AVX2 | 2236 READYUVA422_AVX2 |
| 2237 YUVTORGB_AVX2(yuvconstants) | 2237 YUVTORGB_AVX2(yuvconstants) |
| 2238 STOREARGB_AVX2 | 2238 STOREARGB_AVX2 |
| 2239 "subl $0x10,%[width] \n" | 2239 "subl $0x10,%[width] \n" |
| 2240 "jg 1b \n" | 2240 "jg 1b \n" |
| (...skipping 16 matching lines...) Expand all Loading... |
| 2257 #endif // HAS_I422ALPHATOARGBROW_AVX2 | 2257 #endif // HAS_I422ALPHATOARGBROW_AVX2 |
| 2258 | 2258 |
| 2259 #if defined(HAS_I422ALPHATOABGRROW_AVX2) | 2259 #if defined(HAS_I422ALPHATOABGRROW_AVX2) |
| 2260 // 16 pixels | 2260 // 16 pixels |
| 2261 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. | 2261 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. |
| 2262 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, | 2262 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, |
| 2263 const uint8* u_buf, | 2263 const uint8* u_buf, |
| 2264 const uint8* v_buf, | 2264 const uint8* v_buf, |
| 2265 const uint8* a_buf, | 2265 const uint8* a_buf, |
| 2266 uint8* dst_abgr, | 2266 uint8* dst_abgr, |
| 2267 struct YuvConstants* yuvconstants, | 2267 const struct YuvConstants* yuvconstants, |
| 2268 int width) { | 2268 int width) { |
| 2269 asm volatile ( | 2269 asm volatile ( |
| 2270 "sub %[u_buf],%[v_buf] \n" | 2270 "sub %[u_buf],%[v_buf] \n" |
| 2271 LABELALIGN | 2271 LABELALIGN |
| 2272 "1: \n" | 2272 "1: \n" |
| 2273 READYUVA422_AVX2 | 2273 READYUVA422_AVX2 |
| 2274 YUVTORGB_AVX2(yuvconstants) | 2274 YUVTORGB_AVX2(yuvconstants) |
| 2275 STOREABGR_AVX2 | 2275 STOREABGR_AVX2 |
| 2276 "subl $0x10,%[width] \n" | 2276 "subl $0x10,%[width] \n" |
| 2277 "jg 1b \n" | 2277 "jg 1b \n" |
| (...skipping 15 matching lines...) Expand all Loading... |
| 2293 } | 2293 } |
| 2294 #endif // HAS_I422ALPHATOABGRROW_AVX2 | 2294 #endif // HAS_I422ALPHATOABGRROW_AVX2 |
| 2295 | 2295 |
| 2296 #if defined(HAS_I422TOABGRROW_AVX2) | 2296 #if defined(HAS_I422TOABGRROW_AVX2) |
| 2297 // 16 pixels | 2297 // 16 pixels |
| 2298 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 2298 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
| 2299 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, | 2299 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, |
| 2300 const uint8* u_buf, | 2300 const uint8* u_buf, |
| 2301 const uint8* v_buf, | 2301 const uint8* v_buf, |
| 2302 uint8* dst_abgr, | 2302 uint8* dst_abgr, |
| 2303 struct YuvConstants* yuvconstants, | 2303 const struct YuvConstants* yuvconstants, |
| 2304 int width) { | 2304 int width) { |
| 2305 asm volatile ( | 2305 asm volatile ( |
| 2306 "sub %[u_buf],%[v_buf] \n" | 2306 "sub %[u_buf],%[v_buf] \n" |
| 2307 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2307 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2308 LABELALIGN | 2308 LABELALIGN |
| 2309 "1: \n" | 2309 "1: \n" |
| 2310 READYUV422_AVX2 | 2310 READYUV422_AVX2 |
| 2311 YUVTORGB_AVX2(yuvconstants) | 2311 YUVTORGB_AVX2(yuvconstants) |
| 2312 STOREABGR_AVX2 | 2312 STOREABGR_AVX2 |
| 2313 "sub $0x10,%[width] \n" | 2313 "sub $0x10,%[width] \n" |
| (...skipping 11 matching lines...) Expand all Loading... |
| 2325 } | 2325 } |
| 2326 #endif // HAS_I422TOABGRROW_AVX2 | 2326 #endif // HAS_I422TOABGRROW_AVX2 |
| 2327 | 2327 |
| 2328 #if defined(HAS_I422TORGBAROW_AVX2) | 2328 #if defined(HAS_I422TORGBAROW_AVX2) |
| 2329 // 16 pixels | 2329 // 16 pixels |
| 2330 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2330 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
| 2331 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, | 2331 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
| 2332 const uint8* u_buf, | 2332 const uint8* u_buf, |
| 2333 const uint8* v_buf, | 2333 const uint8* v_buf, |
| 2334 uint8* dst_argb, | 2334 uint8* dst_argb, |
| 2335 struct YuvConstants* yuvconstants, | 2335 const struct YuvConstants* yuvconstants, |
| 2336 int width) { | 2336 int width) { |
| 2337 asm volatile ( | 2337 asm volatile ( |
| 2338 "sub %[u_buf],%[v_buf] \n" | 2338 "sub %[u_buf],%[v_buf] \n" |
| 2339 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2339 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2340 LABELALIGN | 2340 LABELALIGN |
| 2341 "1: \n" | 2341 "1: \n" |
| 2342 READYUV422_AVX2 | 2342 READYUV422_AVX2 |
| 2343 YUVTORGB_AVX2(yuvconstants) | 2343 YUVTORGB_AVX2(yuvconstants) |
| 2344 | 2344 |
| 2345 // Step 3: Weave into RGBA | 2345 // Step 3: Weave into RGBA |
| (...skipping 20 matching lines...) Expand all Loading... |
| 2366 ); | 2366 ); |
| 2367 } | 2367 } |
| 2368 #endif // HAS_I422TORGBAROW_AVX2 | 2368 #endif // HAS_I422TORGBAROW_AVX2 |
| 2369 | 2369 |
| 2370 #if defined(HAS_NV12TOARGBROW_AVX2) | 2370 #if defined(HAS_NV12TOARGBROW_AVX2) |
| 2371 // 16 pixels. | 2371 // 16 pixels. |
| 2372 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2372 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 2373 void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, | 2373 void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, |
| 2374 const uint8* uv_buf, | 2374 const uint8* uv_buf, |
| 2375 uint8* dst_argb, | 2375 uint8* dst_argb, |
| 2376 struct YuvConstants* yuvconstants, | 2376 const struct YuvConstants* yuvconstants, |
| 2377 int width) { | 2377 int width) { |
| 2378 asm volatile ( | 2378 asm volatile ( |
| 2379 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2379 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2380 LABELALIGN | 2380 LABELALIGN |
| 2381 "1: \n" | 2381 "1: \n" |
| 2382 READNV12_AVX2 | 2382 READNV12_AVX2 |
| 2383 YUVTORGB_AVX2(yuvconstants) | 2383 YUVTORGB_AVX2(yuvconstants) |
| 2384 STOREARGB_AVX2 | 2384 STOREARGB_AVX2 |
| 2385 "sub $0x10,%[width] \n" | 2385 "sub $0x10,%[width] \n" |
| 2386 "jg 1b \n" | 2386 "jg 1b \n" |
| 2387 "vzeroupper \n" | 2387 "vzeroupper \n" |
| 2388 : [y_buf]"+r"(y_buf), // %[y_buf] | 2388 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 2389 [uv_buf]"+r"(uv_buf), // %[uv_buf] | 2389 [uv_buf]"+r"(uv_buf), // %[uv_buf] |
| 2390 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2390 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2391 [width]"+rm"(width) // %[width] | 2391 [width]"+rm"(width) // %[width] |
| 2392 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2392 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 2393 // Does not use r14. | 2393 // Does not use r14. |
| 2394 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2394 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2395 ); | 2395 ); |
| 2396 } | 2396 } |
| 2397 #endif // HAS_NV12TOARGBROW_AVX2 | 2397 #endif // HAS_NV12TOARGBROW_AVX2 |
| 2398 | 2398 |
| 2399 #if defined(HAS_NV21TOARGBROW_AVX2) | 2399 #if defined(HAS_NV21TOARGBROW_AVX2) |
| 2400 // 16 pixels. | 2400 // 16 pixels. |
| 2401 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2401 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 2402 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, | 2402 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, |
| 2403 const uint8* vu_buf, | 2403 const uint8* vu_buf, |
| 2404 uint8* dst_argb, | 2404 uint8* dst_argb, |
| 2405 struct YuvConstants* yuvconstants, | 2405 const struct YuvConstants* yuvconstants, |
| 2406 int width) { | 2406 int width) { |
| 2407 asm volatile ( | 2407 asm volatile ( |
| 2408 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2408 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2409 LABELALIGN | 2409 LABELALIGN |
| 2410 "1: \n" | 2410 "1: \n" |
| 2411 READNV21_AVX2 | 2411 READNV21_AVX2 |
| 2412 YUVTORGB_AVX2(yuvconstants) | 2412 YUVTORGB_AVX2(yuvconstants) |
| 2413 STOREARGB_AVX2 | 2413 STOREARGB_AVX2 |
| 2414 "sub $0x10,%[width] \n" | 2414 "sub $0x10,%[width] \n" |
| 2415 "jg 1b \n" | 2415 "jg 1b \n" |
| 2416 "vzeroupper \n" | 2416 "vzeroupper \n" |
| 2417 : [y_buf]"+r"(y_buf), // %[y_buf] | 2417 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 2418 [vu_buf]"+r"(vu_buf), // %[vu_buf] | 2418 [vu_buf]"+r"(vu_buf), // %[vu_buf] |
| 2419 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2419 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2420 [width]"+rm"(width) // %[width] | 2420 [width]"+rm"(width) // %[width] |
| 2421 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 2421 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 2422 [kShuffleNV21]"m"(kShuffleNV21) | 2422 [kShuffleNV21]"m"(kShuffleNV21) |
| 2423 // Does not use r14. | 2423 // Does not use r14. |
| 2424 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2424 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2425 ); | 2425 ); |
| 2426 } | 2426 } |
| 2427 #endif // HAS_NV21TOARGBROW_AVX2 | 2427 #endif // HAS_NV21TOARGBROW_AVX2 |
| 2428 | 2428 |
| 2429 #if defined(HAS_YUY2TOARGBROW_AVX2) | 2429 #if defined(HAS_YUY2TOARGBROW_AVX2) |
| 2430 // 16 pixels. | 2430 // 16 pixels. |
| 2431 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2431 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
| 2432 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, | 2432 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, |
| 2433 uint8* dst_argb, | 2433 uint8* dst_argb, |
| 2434 struct YuvConstants* yuvconstants, | 2434 const struct YuvConstants* yuvconstants, |
| 2435 int width) { | 2435 int width) { |
| 2436 asm volatile ( | 2436 asm volatile ( |
| 2437 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2437 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2438 LABELALIGN | 2438 LABELALIGN |
| 2439 "1: \n" | 2439 "1: \n" |
| 2440 READYUY2_AVX2 | 2440 READYUY2_AVX2 |
| 2441 YUVTORGB_AVX2(yuvconstants) | 2441 YUVTORGB_AVX2(yuvconstants) |
| 2442 STOREARGB_AVX2 | 2442 STOREARGB_AVX2 |
| 2443 "sub $0x10,%[width] \n" | 2443 "sub $0x10,%[width] \n" |
| 2444 "jg 1b \n" | 2444 "jg 1b \n" |
| 2445 "vzeroupper \n" | 2445 "vzeroupper \n" |
| 2446 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] | 2446 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] |
| 2447 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2447 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2448 [width]"+rm"(width) // %[width] | 2448 [width]"+rm"(width) // %[width] |
| 2449 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 2449 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 2450 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), | 2450 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), |
| 2451 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) | 2451 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) |
| 2452 // Does not use r14. | 2452 // Does not use r14. |
| 2453 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2453 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2454 ); | 2454 ); |
| 2455 } | 2455 } |
| 2456 #endif // HAS_YUY2TOARGBROW_AVX2 | 2456 #endif // HAS_YUY2TOARGBROW_AVX2 |
| 2457 | 2457 |
| 2458 #if defined(HAS_UYVYTOARGBROW_AVX2) | 2458 #if defined(HAS_UYVYTOARGBROW_AVX2) |
| 2459 // 16 pixels. | 2459 // 16 pixels. |
| 2460 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2460 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
| 2461 void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, | 2461 void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, |
| 2462 uint8* dst_argb, | 2462 uint8* dst_argb, |
| 2463 struct YuvConstants* yuvconstants, | 2463 const struct YuvConstants* yuvconstants, |
| 2464 int width) { | 2464 int width) { |
| 2465 asm volatile ( | 2465 asm volatile ( |
| 2466 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | 2466 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" |
| 2467 LABELALIGN | 2467 LABELALIGN |
| 2468 "1: \n" | 2468 "1: \n" |
| 2469 READUYVY_AVX2 | 2469 READUYVY_AVX2 |
| 2470 YUVTORGB_AVX2(yuvconstants) | 2470 YUVTORGB_AVX2(yuvconstants) |
| 2471 STOREARGB_AVX2 | 2471 STOREARGB_AVX2 |
| 2472 "sub $0x10,%[width] \n" | 2472 "sub $0x10,%[width] \n" |
| 2473 "jg 1b \n" | 2473 "jg 1b \n" |
| (...skipping 3110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5584 ); | 5584 ); |
| 5585 } | 5585 } |
| 5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5587 | 5587 |
| 5588 #endif // defined(__x86_64__) || defined(__i386__) | 5588 #endif // defined(__x86_64__) || defined(__i386__) |
| 5589 | 5589 |
| 5590 #ifdef __cplusplus | 5590 #ifdef __cplusplus |
| 5591 } // extern "C" | 5591 } // extern "C" |
| 5592 } // namespace libyuv | 5592 } // namespace libyuv |
| 5593 #endif | 5593 #endif |
| OLD | NEW |