OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1552 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1563 "palignr $0xc,%%xmm0,%%xmm1 \n" | 1563 "palignr $0xc,%%xmm0,%%xmm1 \n" |
1564 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" | 1564 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" |
1565 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" | 1565 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" |
1566 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" | 1566 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" |
1567 "subl $0x8,%[width] \n" | 1567 "subl $0x8,%[width] \n" |
1568 "jg 1b \n" | 1568 "jg 1b \n" |
1569 : [y_buf]"+r"(y_buf), // %[y_buf] | 1569 : [y_buf]"+r"(y_buf), // %[y_buf] |
1570 [u_buf]"+r"(u_buf), // %[u_buf] | 1570 [u_buf]"+r"(u_buf), // %[u_buf] |
1571 [v_buf]"+r"(v_buf), // %[v_buf] | 1571 [v_buf]"+r"(v_buf), // %[v_buf] |
1572 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] | 1572 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] |
1573 // TODO(fbarchard): Make width a register for 32 bit. | |
1574 #if defined(__i386__) && defined(__pic__) | 1573 #if defined(__i386__) && defined(__pic__) |
1575 [width]"+m"(width) // %[width] | 1574 [width]"+m"(width) // %[width] |
1576 #else | 1575 #else |
1577 [width]"+rm"(width) // %[width] | 1576 [width]"+rm"(width) // %[width] |
1578 #endif | 1577 #endif |
1579 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1578 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1580 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), | 1579 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), |
1581 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1580 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
1582 : "memory", "cc", NACL_R14 | 1581 : "memory", "cc", NACL_R14 |
1583 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1582 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
(...skipping 24 matching lines...) Expand all Loading... |
1608 "palignr $0xc,%%xmm0,%%xmm1 \n" | 1607 "palignr $0xc,%%xmm0,%%xmm1 \n" |
1609 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" | 1608 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" |
1610 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" | 1609 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" |
1611 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" | 1610 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" |
1612 "subl $0x8,%[width] \n" | 1611 "subl $0x8,%[width] \n" |
1613 "jg 1b \n" | 1612 "jg 1b \n" |
1614 : [y_buf]"+r"(y_buf), // %[y_buf] | 1613 : [y_buf]"+r"(y_buf), // %[y_buf] |
1615 [u_buf]"+r"(u_buf), // %[u_buf] | 1614 [u_buf]"+r"(u_buf), // %[u_buf] |
1616 [v_buf]"+r"(v_buf), // %[v_buf] | 1615 [v_buf]"+r"(v_buf), // %[v_buf] |
1617 [dst_raw]"+r"(dst_raw), // %[dst_raw] | 1616 [dst_raw]"+r"(dst_raw), // %[dst_raw] |
1618 // TODO(fbarchard): Make width a register for 32 bit. | |
1619 #if defined(__i386__) && defined(__pic__) | 1617 #if defined(__i386__) && defined(__pic__) |
1620 [width]"+m"(width) // %[width] | 1618 [width]"+m"(width) // %[width] |
1621 #else | 1619 #else |
1622 [width]"+rm"(width) // %[width] | 1620 [width]"+rm"(width) // %[width] |
1623 #endif | 1621 #endif |
1624 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1622 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1625 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), | 1623 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), |
1626 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | 1624 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) |
1627 : "memory", "cc", NACL_R14 | 1625 : "memory", "cc", NACL_R14 |
1628 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1626 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1663 uint8* dst_argb, | 1661 uint8* dst_argb, |
1664 struct YuvConstants* yuvconstants, | 1662 struct YuvConstants* yuvconstants, |
1665 int width) { | 1663 int width) { |
1666 asm volatile ( | 1664 asm volatile ( |
1667 "sub %[u_buf],%[v_buf] \n" | 1665 "sub %[u_buf],%[v_buf] \n" |
1668 LABELALIGN | 1666 LABELALIGN |
1669 "1: \n" | 1667 "1: \n" |
1670 READYUVA422 | 1668 READYUVA422 |
1671 YUVTORGB(yuvconstants) | 1669 YUVTORGB(yuvconstants) |
1672 STOREARGB | 1670 STOREARGB |
1673 "sub $0x8,%[width] \n" | 1671 "subl $0x8,%[width] \n" |
1674 "jg 1b \n" | 1672 "jg 1b \n" |
1675 : [y_buf]"+r"(y_buf), // %[y_buf] | 1673 : [y_buf]"+r"(y_buf), // %[y_buf] |
1676 [u_buf]"+r"(u_buf), // %[u_buf] | 1674 [u_buf]"+r"(u_buf), // %[u_buf] |
1677 [v_buf]"+r"(v_buf), // %[v_buf] | 1675 [v_buf]"+r"(v_buf), // %[v_buf] |
1678 [a_buf]"+r"(a_buf), // %[a_buf] | 1676 [a_buf]"+r"(a_buf), // %[a_buf] |
1679 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1677 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1678 #if defined(__i386__) && defined(__pic__) |
| 1679 [width]"+m"(width) // %[width] |
| 1680 #else |
1680 [width]"+rm"(width) // %[width] | 1681 [width]"+rm"(width) // %[width] |
| 1682 #endif |
1681 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1683 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1682 : "memory", "cc", NACL_R14 | 1684 : "memory", "cc", NACL_R14 |
1683 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1685 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1684 ); | 1686 ); |
1685 } | 1687 } |
1686 | 1688 |
1687 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, | 1689 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
1688 const uint8* u_buf, | 1690 const uint8* u_buf, |
1689 const uint8* v_buf, | 1691 const uint8* v_buf, |
1690 const uint8* a_buf, | 1692 const uint8* a_buf, |
1691 uint8* dst_abgr, | 1693 uint8* dst_abgr, |
1692 struct YuvConstants* yuvconstants, | 1694 struct YuvConstants* yuvconstants, |
1693 int width) { | 1695 int width) { |
1694 asm volatile ( | 1696 asm volatile ( |
1695 "sub %[u_buf],%[v_buf] \n" | 1697 "sub %[u_buf],%[v_buf] \n" |
1696 LABELALIGN | 1698 LABELALIGN |
1697 "1: \n" | 1699 "1: \n" |
1698 READYUVA422 | 1700 READYUVA422 |
1699 YUVTORGB(yuvconstants) | 1701 YUVTORGB(yuvconstants) |
1700 STOREABGR | 1702 STOREABGR |
1701 "sub $0x8,%[width] \n" | 1703 "subl $0x8,%[width] \n" |
1702 "jg 1b \n" | 1704 "jg 1b \n" |
1703 : [y_buf]"+r"(y_buf), // %[y_buf] | 1705 : [y_buf]"+r"(y_buf), // %[y_buf] |
1704 [u_buf]"+r"(u_buf), // %[u_buf] | 1706 [u_buf]"+r"(u_buf), // %[u_buf] |
1705 [v_buf]"+r"(v_buf), // %[v_buf] | 1707 [v_buf]"+r"(v_buf), // %[v_buf] |
1706 [a_buf]"+r"(a_buf), // %[a_buf] | 1708 [a_buf]"+r"(a_buf), // %[a_buf] |
1707 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1709 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 1710 #if defined(__i386__) && defined(__pic__) |
| 1711 [width]"+m"(width) // %[width] |
| 1712 #else |
1708 [width]"+rm"(width) // %[width] | 1713 [width]"+rm"(width) // %[width] |
| 1714 #endif |
1709 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1715 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
1710 : "memory", "cc", NACL_R14 | 1716 : "memory", "cc", NACL_R14 |
1711 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1717 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1712 ); | 1718 ); |
1713 } | 1719 } |
1714 | 1720 |
1715 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, | 1721 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
1716 const uint8* u_buf, | 1722 const uint8* u_buf, |
1717 const uint8* v_buf, | 1723 const uint8* v_buf, |
1718 uint8* dst_argb, | 1724 uint8* dst_argb, |
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2081 uint8* dst_argb, | 2087 uint8* dst_argb, |
2082 struct YuvConstants* yuvconstants, | 2088 struct YuvConstants* yuvconstants, |
2083 int width) { | 2089 int width) { |
2084 asm volatile ( | 2090 asm volatile ( |
2085 "sub %[u_buf],%[v_buf] \n" | 2091 "sub %[u_buf],%[v_buf] \n" |
2086 LABELALIGN | 2092 LABELALIGN |
2087 "1: \n" | 2093 "1: \n" |
2088 READYUVA422_AVX2 | 2094 READYUVA422_AVX2 |
2089 YUVTORGB_AVX2(yuvconstants) | 2095 YUVTORGB_AVX2(yuvconstants) |
2090 STOREARGB_AVX2 | 2096 STOREARGB_AVX2 |
2091 "sub $0x10,%[width] \n" | 2097 "subl $0x10,%[width] \n" |
2092 "jg 1b \n" | 2098 "jg 1b \n" |
2093 "vzeroupper \n" | 2099 "vzeroupper \n" |
2094 : [y_buf]"+r"(y_buf), // %[y_buf] | 2100 : [y_buf]"+r"(y_buf), // %[y_buf] |
2095 [u_buf]"+r"(u_buf), // %[u_buf] | 2101 [u_buf]"+r"(u_buf), // %[u_buf] |
2096 [v_buf]"+r"(v_buf), // %[v_buf] | 2102 [v_buf]"+r"(v_buf), // %[v_buf] |
2097 [a_buf]"+r"(a_buf), // %[a_buf] | 2103 [a_buf]"+r"(a_buf), // %[a_buf] |
2098 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2104 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2105 #if defined(__i386__) && defined(__pic__) |
| 2106 [width]"+m"(width) // %[width] |
| 2107 #else |
2099 [width]"+rm"(width) // %[width] | 2108 [width]"+rm"(width) // %[width] |
| 2109 #endif |
2100 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2110 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
2101 : "memory", "cc", NACL_R14 | 2111 : "memory", "cc", NACL_R14 |
2102 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2112 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2103 ); | 2113 ); |
2104 } | 2114 } |
2105 #endif // HAS_I422ALPHATOARGBROW_AVX2 | 2115 #endif // HAS_I422ALPHATOARGBROW_AVX2 |
2106 | 2116 |
2107 #if defined(HAS_I422ALPHATOABGRROW_AVX2) | 2117 #if defined(HAS_I422ALPHATOABGRROW_AVX2) |
2108 // 16 pixels | 2118 // 16 pixels |
2109 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. | 2119 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. |
2110 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, | 2120 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, |
2111 const uint8* u_buf, | 2121 const uint8* u_buf, |
2112 const uint8* v_buf, | 2122 const uint8* v_buf, |
2113 const uint8* a_buf, | 2123 const uint8* a_buf, |
2114 uint8* dst_abgr, | 2124 uint8* dst_abgr, |
2115 struct YuvConstants* yuvconstants, | 2125 struct YuvConstants* yuvconstants, |
2116 int width) { | 2126 int width) { |
2117 asm volatile ( | 2127 asm volatile ( |
2118 "sub %[u_buf],%[v_buf] \n" | 2128 "sub %[u_buf],%[v_buf] \n" |
2119 LABELALIGN | 2129 LABELALIGN |
2120 "1: \n" | 2130 "1: \n" |
2121 READYUVA422_AVX2 | 2131 READYUVA422_AVX2 |
2122 YUVTORGB_AVX2(yuvconstants) | 2132 YUVTORGB_AVX2(yuvconstants) |
2123 STOREABGR_AVX2 | 2133 STOREABGR_AVX2 |
2124 "sub $0x10,%[width] \n" | 2134 "subl $0x10,%[width] \n" |
2125 "jg 1b \n" | 2135 "jg 1b \n" |
2126 "vzeroupper \n" | 2136 "vzeroupper \n" |
2127 : [y_buf]"+r"(y_buf), // %[y_buf] | 2137 : [y_buf]"+r"(y_buf), // %[y_buf] |
2128 [u_buf]"+r"(u_buf), // %[u_buf] | 2138 [u_buf]"+r"(u_buf), // %[u_buf] |
2129 [v_buf]"+r"(v_buf), // %[v_buf] | 2139 [v_buf]"+r"(v_buf), // %[v_buf] |
2130 [a_buf]"+r"(a_buf), // %[a_buf] | 2140 [a_buf]"+r"(a_buf), // %[a_buf] |
2131 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 2141 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 2142 #if defined(__i386__) && defined(__pic__) |
| 2143 [width]"+m"(width) // %[width] |
| 2144 #else |
2132 [width]"+rm"(width) // %[width] | 2145 [width]"+rm"(width) // %[width] |
| 2146 #endif |
2133 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2147 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
2134 : "memory", "cc", NACL_R14 | 2148 : "memory", "cc", NACL_R14 |
2135 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2149 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2136 ); | 2150 ); |
2137 } | 2151 } |
2138 #endif // HAS_I422ALPHATOABGRROW_AVX2 | 2152 #endif // HAS_I422ALPHATOABGRROW_AVX2 |
2139 | 2153 |
2140 #if defined(HAS_I422TOABGRROW_AVX2) | 2154 #if defined(HAS_I422TOABGRROW_AVX2) |
2141 // 16 pixels | 2155 // 16 pixels |
2142 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 2156 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
(...skipping 3426 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5569 ); | 5583 ); |
5570 } | 5584 } |
5571 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5585 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5572 | 5586 |
5573 #endif // defined(__x86_64__) || defined(__i386__) | 5587 #endif // defined(__x86_64__) || defined(__i386__) |
5574 | 5588 |
5575 #ifdef __cplusplus | 5589 #ifdef __cplusplus |
5576 } // extern "C" | 5590 } // extern "C" |
5577 } // namespace libyuv | 5591 } // namespace libyuv |
5578 #endif | 5592 #endif |
OLD | NEW |