Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1060)

Side by Side Diff: source/row_gcc.cc

Issue 1398623002: fix jpeg and bt.709 yuvconstants for neon64. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1578 matching lines...) Expand 10 before | Expand all | Expand 10 after
1589 "punpcklwd %%xmm1,%%xmm5 \n" \ 1589 "punpcklwd %%xmm1,%%xmm5 \n" \
1590 "punpckhwd %%xmm1,%%xmm0 \n" \ 1590 "punpckhwd %%xmm1,%%xmm0 \n" \
1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \ 1591 "movdqu %%xmm5," MEMACCESS([dst_rgba]) " \n" \
1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \ 1592 "movdqu %%xmm0," MEMACCESS2(0x10, [dst_rgba]) " \n" \
1593 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n" 1593 "lea " MEMLEA(0x20, [dst_rgba]) ",%[dst_rgba] \n"
1594 1594
1595 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf, 1595 void OMITFP I444ToARGBRow_SSSE3(const uint8* y_buf,
1596 const uint8* u_buf, 1596 const uint8* u_buf,
1597 const uint8* v_buf, 1597 const uint8* v_buf,
1598 uint8* dst_argb, 1598 uint8* dst_argb,
1599 struct YuvConstants* yuvconstants, 1599 const struct YuvConstants* yuvconstants,
1600 int width) { 1600 int width) {
1601 asm volatile ( 1601 asm volatile (
1602 "sub %[u_buf],%[v_buf] \n" 1602 "sub %[u_buf],%[v_buf] \n"
1603 "pcmpeqb %%xmm5,%%xmm5 \n" 1603 "pcmpeqb %%xmm5,%%xmm5 \n"
1604 LABELALIGN 1604 LABELALIGN
1605 "1: \n" 1605 "1: \n"
1606 READYUV444 1606 READYUV444
1607 YUVTORGB(yuvconstants) 1607 YUVTORGB(yuvconstants)
1608 STOREARGB 1608 STOREARGB
1609 "sub $0x8,%[width] \n" 1609 "sub $0x8,%[width] \n"
1610 "jg 1b \n" 1610 "jg 1b \n"
1611 : [y_buf]"+r"(y_buf), // %[y_buf] 1611 : [y_buf]"+r"(y_buf), // %[y_buf]
1612 [u_buf]"+r"(u_buf), // %[u_buf] 1612 [u_buf]"+r"(u_buf), // %[u_buf]
1613 [v_buf]"+r"(v_buf), // %[v_buf] 1613 [v_buf]"+r"(v_buf), // %[v_buf]
1614 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1614 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1615 [width]"+rm"(width) // %[width] 1615 [width]"+rm"(width) // %[width]
1616 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1616 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1617 : "memory", "cc", NACL_R14 1617 : "memory", "cc", NACL_R14
1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1618 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1619 ); 1619 );
1620 } 1620 }
1621 1621
1622 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf, 1622 void OMITFP I444ToABGRRow_SSSE3(const uint8* y_buf,
1623 const uint8* u_buf, 1623 const uint8* u_buf,
1624 const uint8* v_buf, 1624 const uint8* v_buf,
1625 uint8* dst_abgr, 1625 uint8* dst_abgr,
1626 struct YuvConstants* yuvconstants, 1626 const struct YuvConstants* yuvconstants,
1627 int width) { 1627 int width) {
1628 asm volatile ( 1628 asm volatile (
1629 "sub %[u_buf],%[v_buf] \n" 1629 "sub %[u_buf],%[v_buf] \n"
1630 "pcmpeqb %%xmm5,%%xmm5 \n" 1630 "pcmpeqb %%xmm5,%%xmm5 \n"
1631 LABELALIGN 1631 LABELALIGN
1632 "1: \n" 1632 "1: \n"
1633 READYUV444 1633 READYUV444
1634 YUVTORGB(yuvconstants) 1634 YUVTORGB(yuvconstants)
1635 STOREABGR 1635 STOREABGR
1636 "sub $0x8,%[width] \n" 1636 "sub $0x8,%[width] \n"
1637 "jg 1b \n" 1637 "jg 1b \n"
1638 : [y_buf]"+r"(y_buf), // %[y_buf] 1638 : [y_buf]"+r"(y_buf), // %[y_buf]
1639 [u_buf]"+r"(u_buf), // %[u_buf] 1639 [u_buf]"+r"(u_buf), // %[u_buf]
1640 [v_buf]"+r"(v_buf), // %[v_buf] 1640 [v_buf]"+r"(v_buf), // %[v_buf]
1641 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1641 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1642 [width]"+rm"(width) // %[width] 1642 [width]"+rm"(width) // %[width]
1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1643 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1644 : "memory", "cc", NACL_R14 1644 : "memory", "cc", NACL_R14
1645 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1645 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1646 ); 1646 );
1647 } 1647 }
1648 1648
1649 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf, 1649 void OMITFP I422ToRGB24Row_SSSE3(const uint8* y_buf,
1650 const uint8* u_buf, 1650 const uint8* u_buf,
1651 const uint8* v_buf, 1651 const uint8* v_buf,
1652 uint8* dst_rgb24, 1652 uint8* dst_rgb24,
1653 struct YuvConstants* yuvconstants, 1653 const struct YuvConstants* yuvconstants,
1654 int width) { 1654 int width) {
1655 asm volatile ( 1655 asm volatile (
1656 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" 1656 "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n"
1657 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" 1657 "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n"
1658 "sub %[u_buf],%[v_buf] \n" 1658 "sub %[u_buf],%[v_buf] \n"
1659 LABELALIGN 1659 LABELALIGN
1660 "1: \n" 1660 "1: \n"
1661 READYUV422 1661 READYUV422
1662 YUVTORGB(yuvconstants) 1662 YUVTORGB(yuvconstants)
1663 "punpcklbw %%xmm1,%%xmm0 \n" 1663 "punpcklbw %%xmm1,%%xmm0 \n"
(...skipping 23 matching lines...) Expand all
1687 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) 1687 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
1688 : "memory", "cc", NACL_R14 1688 : "memory", "cc", NACL_R14
1689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 1689 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1690 ); 1690 );
1691 } 1691 }
1692 1692
1693 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, 1693 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
1694 const uint8* u_buf, 1694 const uint8* u_buf,
1695 const uint8* v_buf, 1695 const uint8* v_buf,
1696 uint8* dst_raw, 1696 uint8* dst_raw,
1697 struct YuvConstants* yuvconstants, 1697 const struct YuvConstants* yuvconstants,
1698 int width) { 1698 int width) {
1699 asm volatile ( 1699 asm volatile (
1700 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" 1700 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
1701 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" 1701 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
1702 "sub %[u_buf],%[v_buf] \n" 1702 "sub %[u_buf],%[v_buf] \n"
1703 LABELALIGN 1703 LABELALIGN
1704 "1: \n" 1704 "1: \n"
1705 READYUV422 1705 READYUV422
1706 YUVTORGB(yuvconstants) 1706 YUVTORGB(yuvconstants)
1707 "punpcklbw %%xmm1,%%xmm0 \n" 1707 "punpcklbw %%xmm1,%%xmm0 \n"
(...skipping 23 matching lines...) Expand all
1731 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) 1731 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
1732 : "memory", "cc", NACL_R14 1732 : "memory", "cc", NACL_R14
1733 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 1733 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1734 ); 1734 );
1735 } 1735 }
1736 1736
1737 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, 1737 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
1738 const uint8* u_buf, 1738 const uint8* u_buf,
1739 const uint8* v_buf, 1739 const uint8* v_buf,
1740 uint8* dst_argb, 1740 uint8* dst_argb,
1741 struct YuvConstants* yuvconstants, 1741 const struct YuvConstants* yuvconstants,
1742 int width) { 1742 int width) {
1743 asm volatile ( 1743 asm volatile (
1744 "sub %[u_buf],%[v_buf] \n" 1744 "sub %[u_buf],%[v_buf] \n"
1745 "pcmpeqb %%xmm5,%%xmm5 \n" 1745 "pcmpeqb %%xmm5,%%xmm5 \n"
1746 LABELALIGN 1746 LABELALIGN
1747 "1: \n" 1747 "1: \n"
1748 READYUV422 1748 READYUV422
1749 YUVTORGB(yuvconstants) 1749 YUVTORGB(yuvconstants)
1750 STOREARGB 1750 STOREARGB
1751 "sub $0x8,%[width] \n" 1751 "sub $0x8,%[width] \n"
1752 "jg 1b \n" 1752 "jg 1b \n"
1753 : [y_buf]"+r"(y_buf), // %[y_buf] 1753 : [y_buf]"+r"(y_buf), // %[y_buf]
1754 [u_buf]"+r"(u_buf), // %[u_buf] 1754 [u_buf]"+r"(u_buf), // %[u_buf]
1755 [v_buf]"+r"(v_buf), // %[v_buf] 1755 [v_buf]"+r"(v_buf), // %[v_buf]
1756 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1756 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1757 [width]"+rm"(width) // %[width] 1757 [width]"+rm"(width) // %[width]
1758 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1758 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1759 : "memory", "cc", NACL_R14 1759 : "memory", "cc", NACL_R14
1760 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1760 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1761 ); 1761 );
1762 } 1762 }
1763 1763
1764 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf, 1764 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
1765 const uint8* u_buf, 1765 const uint8* u_buf,
1766 const uint8* v_buf, 1766 const uint8* v_buf,
1767 const uint8* a_buf, 1767 const uint8* a_buf,
1768 uint8* dst_argb, 1768 uint8* dst_argb,
1769 struct YuvConstants* yuvconstants, 1769 const struct YuvConstants* yuvconstants,
1770 int width) { 1770 int width) {
1771 asm volatile ( 1771 asm volatile (
1772 "sub %[u_buf],%[v_buf] \n" 1772 "sub %[u_buf],%[v_buf] \n"
1773 LABELALIGN 1773 LABELALIGN
1774 "1: \n" 1774 "1: \n"
1775 READYUVA422 1775 READYUVA422
1776 YUVTORGB(yuvconstants) 1776 YUVTORGB(yuvconstants)
1777 STOREARGB 1777 STOREARGB
1778 "subl $0x8,%[width] \n" 1778 "subl $0x8,%[width] \n"
1779 "jg 1b \n" 1779 "jg 1b \n"
(...skipping 11 matching lines...) Expand all
1791 : "memory", "cc", NACL_R14 1791 : "memory", "cc", NACL_R14
1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1792 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1793 ); 1793 );
1794 } 1794 }
1795 1795
1796 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, 1796 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
1797 const uint8* u_buf, 1797 const uint8* u_buf,
1798 const uint8* v_buf, 1798 const uint8* v_buf,
1799 const uint8* a_buf, 1799 const uint8* a_buf,
1800 uint8* dst_abgr, 1800 uint8* dst_abgr,
1801 struct YuvConstants* yuvconstants, 1801 const struct YuvConstants* yuvconstants,
1802 int width) { 1802 int width) {
1803 asm volatile ( 1803 asm volatile (
1804 "sub %[u_buf],%[v_buf] \n" 1804 "sub %[u_buf],%[v_buf] \n"
1805 LABELALIGN 1805 LABELALIGN
1806 "1: \n" 1806 "1: \n"
1807 READYUVA422 1807 READYUVA422
1808 YUVTORGB(yuvconstants) 1808 YUVTORGB(yuvconstants)
1809 STOREABGR 1809 STOREABGR
1810 "subl $0x8,%[width] \n" 1810 "subl $0x8,%[width] \n"
1811 "jg 1b \n" 1811 "jg 1b \n"
(...skipping 10 matching lines...) Expand all
1822 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1822 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1823 : "memory", "cc", NACL_R14 1823 : "memory", "cc", NACL_R14
1824 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1824 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1825 ); 1825 );
1826 } 1826 }
1827 1827
1828 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, 1828 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
1829 const uint8* u_buf, 1829 const uint8* u_buf,
1830 const uint8* v_buf, 1830 const uint8* v_buf,
1831 uint8* dst_argb, 1831 uint8* dst_argb,
1832 struct YuvConstants* yuvconstants, 1832 const struct YuvConstants* yuvconstants,
1833 int width) { 1833 int width) {
1834 asm volatile ( 1834 asm volatile (
1835 "sub %[u_buf],%[v_buf] \n" 1835 "sub %[u_buf],%[v_buf] \n"
1836 "pcmpeqb %%xmm5,%%xmm5 \n" 1836 "pcmpeqb %%xmm5,%%xmm5 \n"
1837 LABELALIGN 1837 LABELALIGN
1838 "1: \n" 1838 "1: \n"
1839 READYUV411 1839 READYUV411
1840 YUVTORGB(yuvconstants) 1840 YUVTORGB(yuvconstants)
1841 STOREARGB 1841 STOREARGB
1842 "sub $0x8,%[width] \n" 1842 "sub $0x8,%[width] \n"
1843 "jg 1b \n" 1843 "jg 1b \n"
1844 : [y_buf]"+r"(y_buf), // %[y_buf] 1844 : [y_buf]"+r"(y_buf), // %[y_buf]
1845 [u_buf]"+r"(u_buf), // %[u_buf] 1845 [u_buf]"+r"(u_buf), // %[u_buf]
1846 [v_buf]"+r"(v_buf), // %[v_buf] 1846 [v_buf]"+r"(v_buf), // %[v_buf]
1847 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1847 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1848 [width]"+rm"(width) // %[width] 1848 [width]"+rm"(width) // %[width]
1849 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1849 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1850 : "memory", "cc", NACL_R14 1850 : "memory", "cc", NACL_R14
1851 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1851 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1852 ); 1852 );
1853 } 1853 }
1854 1854
1855 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, 1855 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
1856 const uint8* uv_buf, 1856 const uint8* uv_buf,
1857 uint8* dst_argb, 1857 uint8* dst_argb,
1858 struct YuvConstants* yuvconstants, 1858 const struct YuvConstants* yuvconstants,
1859 int width) { 1859 int width) {
1860 asm volatile ( 1860 asm volatile (
1861 "pcmpeqb %%xmm5,%%xmm5 \n" 1861 "pcmpeqb %%xmm5,%%xmm5 \n"
1862 LABELALIGN 1862 LABELALIGN
1863 "1: \n" 1863 "1: \n"
1864 READNV12 1864 READNV12
1865 YUVTORGB(yuvconstants) 1865 YUVTORGB(yuvconstants)
1866 STOREARGB 1866 STOREARGB
1867 "sub $0x8,%[width] \n" 1867 "sub $0x8,%[width] \n"
1868 "jg 1b \n" 1868 "jg 1b \n"
1869 : [y_buf]"+r"(y_buf), // %[y_buf] 1869 : [y_buf]"+r"(y_buf), // %[y_buf]
1870 [uv_buf]"+r"(uv_buf), // %[uv_buf] 1870 [uv_buf]"+r"(uv_buf), // %[uv_buf]
1871 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1871 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1872 [width]"+rm"(width) // %[width] 1872 [width]"+rm"(width) // %[width]
1873 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1873 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1874 // Does not use r14. 1874 // Does not use r14.
1875 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1875 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1876 ); 1876 );
1877 } 1877 }
1878 1878
1879 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf, 1879 void OMITFP NV21ToARGBRow_SSSE3(const uint8* y_buf,
1880 const uint8* vu_buf, 1880 const uint8* vu_buf,
1881 uint8* dst_argb, 1881 uint8* dst_argb,
1882 struct YuvConstants* yuvconstants, 1882 const struct YuvConstants* yuvconstants,
1883 int width) { 1883 int width) {
1884 asm volatile ( 1884 asm volatile (
1885 "pcmpeqb %%xmm5,%%xmm5 \n" 1885 "pcmpeqb %%xmm5,%%xmm5 \n"
1886 LABELALIGN 1886 LABELALIGN
1887 "1: \n" 1887 "1: \n"
1888 READNV21 1888 READNV21
1889 YUVTORGB(yuvconstants) 1889 YUVTORGB(yuvconstants)
1890 STOREARGB 1890 STOREARGB
1891 "sub $0x8,%[width] \n" 1891 "sub $0x8,%[width] \n"
1892 "jg 1b \n" 1892 "jg 1b \n"
1893 : [y_buf]"+r"(y_buf), // %[y_buf] 1893 : [y_buf]"+r"(y_buf), // %[y_buf]
1894 [vu_buf]"+r"(vu_buf), // %[vu_buf] 1894 [vu_buf]"+r"(vu_buf), // %[vu_buf]
1895 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1895 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1896 [width]"+rm"(width) // %[width] 1896 [width]"+rm"(width) // %[width]
1897 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1897 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1898 [kShuffleNV21]"m"(kShuffleNV21) 1898 [kShuffleNV21]"m"(kShuffleNV21)
1899 // Does not use r14. 1899 // Does not use r14.
1900 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1900 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1901 ); 1901 );
1902 } 1902 }
1903 1903
1904 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf, 1904 void OMITFP YUY2ToARGBRow_SSSE3(const uint8* yuy2_buf,
1905 uint8* dst_argb, 1905 uint8* dst_argb,
1906 struct YuvConstants* yuvconstants, 1906 const struct YuvConstants* yuvconstants,
1907 int width) { 1907 int width) {
1908 asm volatile ( 1908 asm volatile (
1909 "pcmpeqb %%xmm5,%%xmm5 \n" 1909 "pcmpeqb %%xmm5,%%xmm5 \n"
1910 LABELALIGN 1910 LABELALIGN
1911 "1: \n" 1911 "1: \n"
1912 READYUY2 1912 READYUY2
1913 YUVTORGB(yuvconstants) 1913 YUVTORGB(yuvconstants)
1914 STOREARGB 1914 STOREARGB
1915 "sub $0x8,%[width] \n" 1915 "sub $0x8,%[width] \n"
1916 "jg 1b \n" 1916 "jg 1b \n"
1917 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] 1917 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
1918 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1918 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1919 [width]"+rm"(width) // %[width] 1919 [width]"+rm"(width) // %[width]
1920 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1920 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1921 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), 1921 [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
1922 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) 1922 [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
1923 // Does not use r14. 1923 // Does not use r14.
1924 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1924 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1925 ); 1925 );
1926 } 1926 }
1927 1927
1928 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf, 1928 void OMITFP UYVYToARGBRow_SSSE3(const uint8* uyvy_buf,
1929 uint8* dst_argb, 1929 uint8* dst_argb,
1930 struct YuvConstants* yuvconstants, 1930 const struct YuvConstants* yuvconstants,
1931 int width) { 1931 int width) {
1932 asm volatile ( 1932 asm volatile (
1933 "pcmpeqb %%xmm5,%%xmm5 \n" 1933 "pcmpeqb %%xmm5,%%xmm5 \n"
1934 LABELALIGN 1934 LABELALIGN
1935 "1: \n" 1935 "1: \n"
1936 READUYVY 1936 READUYVY
1937 YUVTORGB(yuvconstants) 1937 YUVTORGB(yuvconstants)
1938 STOREARGB 1938 STOREARGB
1939 "sub $0x8,%[width] \n" 1939 "sub $0x8,%[width] \n"
1940 "jg 1b \n" 1940 "jg 1b \n"
1941 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] 1941 : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf]
1942 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1942 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1943 [width]"+rm"(width) // %[width] 1943 [width]"+rm"(width) // %[width]
1944 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1944 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1945 [kShuffleUYVYY]"m"(kShuffleUYVYY), 1945 [kShuffleUYVYY]"m"(kShuffleUYVYY),
1946 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) 1946 [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
1947 // Does not use r14. 1947 // Does not use r14.
1948 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1948 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1949 ); 1949 );
1950 } 1950 }
1951 1951
1952 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, 1952 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
1953 const uint8* u_buf, 1953 const uint8* u_buf,
1954 const uint8* v_buf, 1954 const uint8* v_buf,
1955 uint8* dst_bgra, 1955 uint8* dst_bgra,
1956 struct YuvConstants* yuvconstants, 1956 const struct YuvConstants* yuvconstants,
1957 int width) { 1957 int width) {
1958 asm volatile ( 1958 asm volatile (
1959 "sub %[u_buf],%[v_buf] \n" 1959 "sub %[u_buf],%[v_buf] \n"
1960 "pcmpeqb %%xmm5,%%xmm5 \n" 1960 "pcmpeqb %%xmm5,%%xmm5 \n"
1961 LABELALIGN 1961 LABELALIGN
1962 "1: \n" 1962 "1: \n"
1963 READYUV422 1963 READYUV422
1964 YUVTORGB(yuvconstants) 1964 YUVTORGB(yuvconstants)
1965 STOREBGRA 1965 STOREBGRA
1966 "sub $0x8,%[width] \n" 1966 "sub $0x8,%[width] \n"
1967 "jg 1b \n" 1967 "jg 1b \n"
1968 : [y_buf]"+r"(y_buf), // %[y_buf] 1968 : [y_buf]"+r"(y_buf), // %[y_buf]
1969 [u_buf]"+r"(u_buf), // %[u_buf] 1969 [u_buf]"+r"(u_buf), // %[u_buf]
1970 [v_buf]"+r"(v_buf), // %[v_buf] 1970 [v_buf]"+r"(v_buf), // %[v_buf]
1971 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] 1971 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
1972 [width]"+rm"(width) // %[width] 1972 [width]"+rm"(width) // %[width]
1973 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1973 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1974 : "memory", "cc", NACL_R14 1974 : "memory", "cc", NACL_R14
1975 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1975 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1976 ); 1976 );
1977 } 1977 }
1978 1978
1979 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, 1979 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
1980 const uint8* u_buf, 1980 const uint8* u_buf,
1981 const uint8* v_buf, 1981 const uint8* v_buf,
1982 uint8* dst_abgr, 1982 uint8* dst_abgr,
1983 struct YuvConstants* yuvconstants, 1983 const struct YuvConstants* yuvconstants,
1984 int width) { 1984 int width) {
1985 asm volatile ( 1985 asm volatile (
1986 "sub %[u_buf],%[v_buf] \n" 1986 "sub %[u_buf],%[v_buf] \n"
1987 "pcmpeqb %%xmm5,%%xmm5 \n" 1987 "pcmpeqb %%xmm5,%%xmm5 \n"
1988 LABELALIGN 1988 LABELALIGN
1989 "1: \n" 1989 "1: \n"
1990 READYUV422 1990 READYUV422
1991 YUVTORGB(yuvconstants) 1991 YUVTORGB(yuvconstants)
1992 STOREABGR 1992 STOREABGR
1993 "sub $0x8,%[width] \n" 1993 "sub $0x8,%[width] \n"
1994 "jg 1b \n" 1994 "jg 1b \n"
1995 : [y_buf]"+r"(y_buf), // %[y_buf] 1995 : [y_buf]"+r"(y_buf), // %[y_buf]
1996 [u_buf]"+r"(u_buf), // %[u_buf] 1996 [u_buf]"+r"(u_buf), // %[u_buf]
1997 [v_buf]"+r"(v_buf), // %[v_buf] 1997 [v_buf]"+r"(v_buf), // %[v_buf]
1998 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1998 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1999 [width]"+rm"(width) // %[width] 1999 [width]"+rm"(width) // %[width]
2000 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2000 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2001 : "memory", "cc", NACL_R14 2001 : "memory", "cc", NACL_R14
2002 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2002 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2003 ); 2003 );
2004 } 2004 }
2005 2005
2006 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, 2006 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
2007 const uint8* u_buf, 2007 const uint8* u_buf,
2008 const uint8* v_buf, 2008 const uint8* v_buf,
2009 uint8* dst_rgba, 2009 uint8* dst_rgba,
2010 struct YuvConstants* yuvconstants, 2010 const struct YuvConstants* yuvconstants,
2011 int width) { 2011 int width) {
2012 asm volatile ( 2012 asm volatile (
2013 "sub %[u_buf],%[v_buf] \n" 2013 "sub %[u_buf],%[v_buf] \n"
2014 "pcmpeqb %%xmm5,%%xmm5 \n" 2014 "pcmpeqb %%xmm5,%%xmm5 \n"
2015 LABELALIGN 2015 LABELALIGN
2016 "1: \n" 2016 "1: \n"
2017 READYUV422 2017 READYUV422
2018 YUVTORGB(yuvconstants) 2018 YUVTORGB(yuvconstants)
2019 STORERGBA 2019 STORERGBA
2020 "sub $0x8,%[width] \n" 2020 "sub $0x8,%[width] \n"
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
2145 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ 2145 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \
2146 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" 2146 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n"
2147 2147
2148 #if defined(HAS_I422TOBGRAROW_AVX2) 2148 #if defined(HAS_I422TOBGRAROW_AVX2)
2149 // 16 pixels 2149 // 16 pixels
2150 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). 2150 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
2151 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, 2151 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
2152 const uint8* u_buf, 2152 const uint8* u_buf,
2153 const uint8* v_buf, 2153 const uint8* v_buf,
2154 uint8* dst_bgra, 2154 uint8* dst_bgra,
2155 struct YuvConstants* yuvconstants, 2155 const struct YuvConstants* yuvconstants,
2156 int width) { 2156 int width) {
2157 asm volatile ( 2157 asm volatile (
2158 "sub %[u_buf],%[v_buf] \n" 2158 "sub %[u_buf],%[v_buf] \n"
2159 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2159 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2160 LABELALIGN 2160 LABELALIGN
2161 "1: \n" 2161 "1: \n"
2162 READYUV422_AVX2 2162 READYUV422_AVX2
2163 YUVTORGB_AVX2(yuvconstants) 2163 YUVTORGB_AVX2(yuvconstants)
2164 2164
2165 // Step 3: Weave into BGRA 2165 // Step 3: Weave into BGRA
(...skipping 21 matching lines...) Expand all
2187 } 2187 }
2188 #endif // HAS_I422TOBGRAROW_AVX2 2188 #endif // HAS_I422TOBGRAROW_AVX2
2189 2189
2190 #if defined(HAS_I422TOARGBROW_AVX2) 2190 #if defined(HAS_I422TOARGBROW_AVX2)
2191 // 16 pixels 2191 // 16 pixels
2192 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2192 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2193 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, 2193 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
2194 const uint8* u_buf, 2194 const uint8* u_buf,
2195 const uint8* v_buf, 2195 const uint8* v_buf,
2196 uint8* dst_argb, 2196 uint8* dst_argb,
2197 struct YuvConstants* yuvconstants, 2197 const struct YuvConstants* yuvconstants,
2198 int width) { 2198 int width) {
2199 asm volatile ( 2199 asm volatile (
2200 "sub %[u_buf],%[v_buf] \n" 2200 "sub %[u_buf],%[v_buf] \n"
2201 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2201 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2202 LABELALIGN 2202 LABELALIGN
2203 "1: \n" 2203 "1: \n"
2204 READYUV422_AVX2 2204 READYUV422_AVX2
2205 YUVTORGB_AVX2(yuvconstants) 2205 YUVTORGB_AVX2(yuvconstants)
2206 STOREARGB_AVX2 2206 STOREARGB_AVX2
2207 "sub $0x10,%[width] \n" 2207 "sub $0x10,%[width] \n"
(...skipping 12 matching lines...) Expand all
2220 #endif // HAS_I422TOARGBROW_AVX2 2220 #endif // HAS_I422TOARGBROW_AVX2
2221 2221
2222 #if defined(HAS_I422ALPHATOARGBROW_AVX2) 2222 #if defined(HAS_I422ALPHATOARGBROW_AVX2)
2223 // 16 pixels 2223 // 16 pixels
2224 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. 2224 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
2225 void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf, 2225 void OMITFP I422AlphaToARGBRow_AVX2(const uint8* y_buf,
2226 const uint8* u_buf, 2226 const uint8* u_buf,
2227 const uint8* v_buf, 2227 const uint8* v_buf,
2228 const uint8* a_buf, 2228 const uint8* a_buf,
2229 uint8* dst_argb, 2229 uint8* dst_argb,
2230 struct YuvConstants* yuvconstants, 2230 const struct YuvConstants* yuvconstants,
2231 int width) { 2231 int width) {
2232 asm volatile ( 2232 asm volatile (
2233 "sub %[u_buf],%[v_buf] \n" 2233 "sub %[u_buf],%[v_buf] \n"
2234 LABELALIGN 2234 LABELALIGN
2235 "1: \n" 2235 "1: \n"
2236 READYUVA422_AVX2 2236 READYUVA422_AVX2
2237 YUVTORGB_AVX2(yuvconstants) 2237 YUVTORGB_AVX2(yuvconstants)
2238 STOREARGB_AVX2 2238 STOREARGB_AVX2
2239 "subl $0x10,%[width] \n" 2239 "subl $0x10,%[width] \n"
2240 "jg 1b \n" 2240 "jg 1b \n"
(...skipping 16 matching lines...) Expand all
2257 #endif // HAS_I422ALPHATOARGBROW_AVX2 2257 #endif // HAS_I422ALPHATOARGBROW_AVX2
2258 2258
2259 #if defined(HAS_I422ALPHATOABGRROW_AVX2) 2259 #if defined(HAS_I422ALPHATOABGRROW_AVX2)
2260 // 16 pixels 2260 // 16 pixels
2261 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. 2261 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
2262 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, 2262 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf,
2263 const uint8* u_buf, 2263 const uint8* u_buf,
2264 const uint8* v_buf, 2264 const uint8* v_buf,
2265 const uint8* a_buf, 2265 const uint8* a_buf,
2266 uint8* dst_abgr, 2266 uint8* dst_abgr,
2267 struct YuvConstants* yuvconstants, 2267 const struct YuvConstants* yuvconstants,
2268 int width) { 2268 int width) {
2269 asm volatile ( 2269 asm volatile (
2270 "sub %[u_buf],%[v_buf] \n" 2270 "sub %[u_buf],%[v_buf] \n"
2271 LABELALIGN 2271 LABELALIGN
2272 "1: \n" 2272 "1: \n"
2273 READYUVA422_AVX2 2273 READYUVA422_AVX2
2274 YUVTORGB_AVX2(yuvconstants) 2274 YUVTORGB_AVX2(yuvconstants)
2275 STOREABGR_AVX2 2275 STOREABGR_AVX2
2276 "subl $0x10,%[width] \n" 2276 "subl $0x10,%[width] \n"
2277 "jg 1b \n" 2277 "jg 1b \n"
(...skipping 15 matching lines...) Expand all
2293 } 2293 }
2294 #endif // HAS_I422ALPHATOABGRROW_AVX2 2294 #endif // HAS_I422ALPHATOABGRROW_AVX2
2295 2295
2296 #if defined(HAS_I422TOABGRROW_AVX2) 2296 #if defined(HAS_I422TOABGRROW_AVX2)
2297 // 16 pixels 2297 // 16 pixels
2298 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 2298 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
2299 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, 2299 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
2300 const uint8* u_buf, 2300 const uint8* u_buf,
2301 const uint8* v_buf, 2301 const uint8* v_buf,
2302 uint8* dst_abgr, 2302 uint8* dst_abgr,
2303 struct YuvConstants* yuvconstants, 2303 const struct YuvConstants* yuvconstants,
2304 int width) { 2304 int width) {
2305 asm volatile ( 2305 asm volatile (
2306 "sub %[u_buf],%[v_buf] \n" 2306 "sub %[u_buf],%[v_buf] \n"
2307 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2307 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2308 LABELALIGN 2308 LABELALIGN
2309 "1: \n" 2309 "1: \n"
2310 READYUV422_AVX2 2310 READYUV422_AVX2
2311 YUVTORGB_AVX2(yuvconstants) 2311 YUVTORGB_AVX2(yuvconstants)
2312 STOREABGR_AVX2 2312 STOREABGR_AVX2
2313 "sub $0x10,%[width] \n" 2313 "sub $0x10,%[width] \n"
(...skipping 11 matching lines...) Expand all
2325 } 2325 }
2326 #endif // HAS_I422TOABGRROW_AVX2 2326 #endif // HAS_I422TOABGRROW_AVX2
2327 2327
2328 #if defined(HAS_I422TORGBAROW_AVX2) 2328 #if defined(HAS_I422TORGBAROW_AVX2)
2329 // 16 pixels 2329 // 16 pixels
2330 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 2330 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
2331 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, 2331 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
2332 const uint8* u_buf, 2332 const uint8* u_buf,
2333 const uint8* v_buf, 2333 const uint8* v_buf,
2334 uint8* dst_argb, 2334 uint8* dst_argb,
2335 struct YuvConstants* yuvconstants, 2335 const struct YuvConstants* yuvconstants,
2336 int width) { 2336 int width) {
2337 asm volatile ( 2337 asm volatile (
2338 "sub %[u_buf],%[v_buf] \n" 2338 "sub %[u_buf],%[v_buf] \n"
2339 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2339 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2340 LABELALIGN 2340 LABELALIGN
2341 "1: \n" 2341 "1: \n"
2342 READYUV422_AVX2 2342 READYUV422_AVX2
2343 YUVTORGB_AVX2(yuvconstants) 2343 YUVTORGB_AVX2(yuvconstants)
2344 2344
2345 // Step 3: Weave into RGBA 2345 // Step 3: Weave into RGBA
(...skipping 20 matching lines...) Expand all
2366 ); 2366 );
2367 } 2367 }
2368 #endif // HAS_I422TORGBAROW_AVX2 2368 #endif // HAS_I422TORGBAROW_AVX2
2369 2369
2370 #if defined(HAS_NV12TOARGBROW_AVX2) 2370 #if defined(HAS_NV12TOARGBROW_AVX2)
2371 // 16 pixels. 2371 // 16 pixels.
2372 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2372 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2373 void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf, 2373 void OMITFP NV12ToARGBRow_AVX2(const uint8* y_buf,
2374 const uint8* uv_buf, 2374 const uint8* uv_buf,
2375 uint8* dst_argb, 2375 uint8* dst_argb,
2376 struct YuvConstants* yuvconstants, 2376 const struct YuvConstants* yuvconstants,
2377 int width) { 2377 int width) {
2378 asm volatile ( 2378 asm volatile (
2379 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2379 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2380 LABELALIGN 2380 LABELALIGN
2381 "1: \n" 2381 "1: \n"
2382 READNV12_AVX2 2382 READNV12_AVX2
2383 YUVTORGB_AVX2(yuvconstants) 2383 YUVTORGB_AVX2(yuvconstants)
2384 STOREARGB_AVX2 2384 STOREARGB_AVX2
2385 "sub $0x10,%[width] \n" 2385 "sub $0x10,%[width] \n"
2386 "jg 1b \n" 2386 "jg 1b \n"
2387 "vzeroupper \n" 2387 "vzeroupper \n"
2388 : [y_buf]"+r"(y_buf), // %[y_buf] 2388 : [y_buf]"+r"(y_buf), // %[y_buf]
2389 [uv_buf]"+r"(uv_buf), // %[uv_buf] 2389 [uv_buf]"+r"(uv_buf), // %[uv_buf]
2390 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2390 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2391 [width]"+rm"(width) // %[width] 2391 [width]"+rm"(width) // %[width]
2392 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2392 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2393 // Does not use r14. 2393 // Does not use r14.
2394 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2394 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2395 ); 2395 );
2396 } 2396 }
2397 #endif // HAS_NV12TOARGBROW_AVX2 2397 #endif // HAS_NV12TOARGBROW_AVX2
2398 2398
2399 #if defined(HAS_NV21TOARGBROW_AVX2) 2399 #if defined(HAS_NV21TOARGBROW_AVX2)
2400 // 16 pixels. 2400 // 16 pixels.
2401 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2401 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2402 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf, 2402 void OMITFP NV21ToARGBRow_AVX2(const uint8* y_buf,
2403 const uint8* vu_buf, 2403 const uint8* vu_buf,
2404 uint8* dst_argb, 2404 uint8* dst_argb,
2405 struct YuvConstants* yuvconstants, 2405 const struct YuvConstants* yuvconstants,
2406 int width) { 2406 int width) {
2407 asm volatile ( 2407 asm volatile (
2408 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2408 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2409 LABELALIGN 2409 LABELALIGN
2410 "1: \n" 2410 "1: \n"
2411 READNV21_AVX2 2411 READNV21_AVX2
2412 YUVTORGB_AVX2(yuvconstants) 2412 YUVTORGB_AVX2(yuvconstants)
2413 STOREARGB_AVX2 2413 STOREARGB_AVX2
2414 "sub $0x10,%[width] \n" 2414 "sub $0x10,%[width] \n"
2415 "jg 1b \n" 2415 "jg 1b \n"
2416 "vzeroupper \n" 2416 "vzeroupper \n"
2417 : [y_buf]"+r"(y_buf), // %[y_buf] 2417 : [y_buf]"+r"(y_buf), // %[y_buf]
2418 [vu_buf]"+r"(vu_buf), // %[vu_buf] 2418 [vu_buf]"+r"(vu_buf), // %[vu_buf]
2419 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2419 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2420 [width]"+rm"(width) // %[width] 2420 [width]"+rm"(width) // %[width]
2421 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 2421 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
2422 [kShuffleNV21]"m"(kShuffleNV21) 2422 [kShuffleNV21]"m"(kShuffleNV21)
2423 // Does not use r14. 2423 // Does not use r14.
2424 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2424 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2425 ); 2425 );
2426 } 2426 }
2427 #endif // HAS_NV21TOARGBROW_AVX2 2427 #endif // HAS_NV21TOARGBROW_AVX2
2428 2428
2429 #if defined(HAS_YUY2TOARGBROW_AVX2) 2429 #if defined(HAS_YUY2TOARGBROW_AVX2)
2430 // 16 pixels. 2430 // 16 pixels.
2431 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2431 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2432 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf, 2432 void OMITFP YUY2ToARGBRow_AVX2(const uint8* yuy2_buf,
2433 uint8* dst_argb, 2433 uint8* dst_argb,
2434 struct YuvConstants* yuvconstants, 2434 const struct YuvConstants* yuvconstants,
2435 int width) { 2435 int width) {
2436 asm volatile ( 2436 asm volatile (
2437 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2437 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2438 LABELALIGN 2438 LABELALIGN
2439 "1: \n" 2439 "1: \n"
2440 READYUY2_AVX2 2440 READYUY2_AVX2
2441 YUVTORGB_AVX2(yuvconstants) 2441 YUVTORGB_AVX2(yuvconstants)
2442 STOREARGB_AVX2 2442 STOREARGB_AVX2
2443 "sub $0x10,%[width] \n" 2443 "sub $0x10,%[width] \n"
2444 "jg 1b \n" 2444 "jg 1b \n"
2445 "vzeroupper \n" 2445 "vzeroupper \n"
2446 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] 2446 : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf]
2447 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2447 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2448 [width]"+rm"(width) // %[width] 2448 [width]"+rm"(width) // %[width]
2449 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 2449 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
2450 [kShuffleYUY2Y]"m"(kShuffleYUY2Y), 2450 [kShuffleYUY2Y]"m"(kShuffleYUY2Y),
2451 [kShuffleYUY2UV]"m"(kShuffleYUY2UV) 2451 [kShuffleYUY2UV]"m"(kShuffleYUY2UV)
2452 // Does not use r14. 2452 // Does not use r14.
2453 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2453 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2454 ); 2454 );
2455 } 2455 }
2456 #endif // HAS_YUY2TOARGBROW_AVX2 2456 #endif // HAS_YUY2TOARGBROW_AVX2
2457 2457
2458 #if defined(HAS_UYVYTOARGBROW_AVX2) 2458 #if defined(HAS_UYVYTOARGBROW_AVX2)
2459 // 16 pixels. 2459 // 16 pixels.
2460 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2460 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2461 void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf, 2461 void OMITFP UYVYToARGBRow_AVX2(const uint8* uyvy_buf,
2462 uint8* dst_argb, 2462 uint8* dst_argb,
2463 struct YuvConstants* yuvconstants, 2463 const struct YuvConstants* yuvconstants,
2464 int width) { 2464 int width) {
2465 asm volatile ( 2465 asm volatile (
2466 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2466 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2467 LABELALIGN 2467 LABELALIGN
2468 "1: \n" 2468 "1: \n"
2469 READUYVY_AVX2 2469 READUYVY_AVX2
2470 YUVTORGB_AVX2(yuvconstants) 2470 YUVTORGB_AVX2(yuvconstants)
2471 STOREARGB_AVX2 2471 STOREARGB_AVX2
2472 "sub $0x10,%[width] \n" 2472 "sub $0x10,%[width] \n"
2473 "jg 1b \n" 2473 "jg 1b \n"
(...skipping 3110 matching lines...) Expand 10 before | Expand all | Expand 10 after
5584 ); 5584 );
5585 } 5585 }
5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5587 5587
5588 #endif // defined(__x86_64__) || defined(__i386__) 5588 #endif // defined(__x86_64__) || defined(__i386__)
5589 5589
5590 #ifdef __cplusplus 5590 #ifdef __cplusplus
5591 } // extern "C" 5591 } // extern "C"
5592 } // namespace libyuv 5592 } // namespace libyuv
5593 #endif 5593 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698