| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 1552 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1563 "palignr $0xc,%%xmm0,%%xmm1 \n" | 1563 "palignr $0xc,%%xmm0,%%xmm1 \n" |
| 1564 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" | 1564 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" |
| 1565 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" | 1565 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" |
| 1566 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" | 1566 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" |
| 1567 "subl $0x8,%[width] \n" | 1567 "subl $0x8,%[width] \n" |
| 1568 "jg 1b \n" | 1568 "jg 1b \n" |
| 1569 : [y_buf]"+r"(y_buf), // %[y_buf] | 1569 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1570 [u_buf]"+r"(u_buf), // %[u_buf] | 1570 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1571 [v_buf]"+r"(v_buf), // %[v_buf] | 1571 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1572 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] | 1572 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] |
| 1573 // TODO(fbarchard): Make width a register for 32 bit. | |
| 1574 #if defined(__i386__) && defined(__pic__) | 1573 #if defined(__i386__) && defined(__pic__) |
| 1575 [width]"+m"(width) // %[width] | 1574 [width]"+m"(width) // %[width] |
| 1576 #else | 1575 #else |
| 1577 [width]"+rm"(width) // %[width] | 1576 [width]"+rm"(width) // %[width] |
| 1578 #endif | 1577 #endif |
| 1579 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1578 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1580 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), | 1579 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), |
| 1581 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1580 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
| 1582 : "memory", "cc", NACL_R14 | 1581 : "memory", "cc", NACL_R14 |
| 1583 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1582 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
| (...skipping 24 matching lines...) Expand all Loading... |
| 1608 "palignr $0xc,%%xmm0,%%xmm1 \n" | 1607 "palignr $0xc,%%xmm0,%%xmm1 \n" |
| 1609 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" | 1608 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" |
| 1610 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" | 1609 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" |
| 1611 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" | 1610 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" |
| 1612 "subl $0x8,%[width] \n" | 1611 "subl $0x8,%[width] \n" |
| 1613 "jg 1b \n" | 1612 "jg 1b \n" |
| 1614 : [y_buf]"+r"(y_buf), // %[y_buf] | 1613 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1615 [u_buf]"+r"(u_buf), // %[u_buf] | 1614 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1616 [v_buf]"+r"(v_buf), // %[v_buf] | 1615 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1617 [dst_raw]"+r"(dst_raw), // %[dst_raw] | 1616 [dst_raw]"+r"(dst_raw), // %[dst_raw] |
| 1618 // TODO(fbarchard): Make width a register for 32 bit. | |
| 1619 #if defined(__i386__) && defined(__pic__) | 1617 #if defined(__i386__) && defined(__pic__) |
| 1620 [width]"+m"(width) // %[width] | 1618 [width]"+m"(width) // %[width] |
| 1621 #else | 1619 #else |
| 1622 [width]"+rm"(width) // %[width] | 1620 [width]"+rm"(width) // %[width] |
| 1623 #endif | 1621 #endif |
| 1624 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1622 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
| 1625 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), | 1623 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), |
| 1626 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | 1624 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) |
| 1627 : "memory", "cc", NACL_R14 | 1625 : "memory", "cc", NACL_R14 |
| 1628 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1626 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1663 uint8* dst_argb, | 1661 uint8* dst_argb, |
| 1664 struct YuvConstants* yuvconstants, | 1662 struct YuvConstants* yuvconstants, |
| 1665 int width) { | 1663 int width) { |
| 1666 asm volatile ( | 1664 asm volatile ( |
| 1667 "sub %[u_buf],%[v_buf] \n" | 1665 "sub %[u_buf],%[v_buf] \n" |
| 1668 LABELALIGN | 1666 LABELALIGN |
| 1669 "1: \n" | 1667 "1: \n" |
| 1670 READYUVA422 | 1668 READYUVA422 |
| 1671 YUVTORGB(yuvconstants) | 1669 YUVTORGB(yuvconstants) |
| 1672 STOREARGB | 1670 STOREARGB |
| 1673 "sub $0x8,%[width] \n" | 1671 "subl $0x8,%[width] \n" |
| 1674 "jg 1b \n" | 1672 "jg 1b \n" |
| 1675 : [y_buf]"+r"(y_buf), // %[y_buf] | 1673 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1676 [u_buf]"+r"(u_buf), // %[u_buf] | 1674 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1677 [v_buf]"+r"(v_buf), // %[v_buf] | 1675 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1678 [a_buf]"+r"(a_buf), // %[a_buf] | 1676 [a_buf]"+r"(a_buf), // %[a_buf] |
| 1679 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1677 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 1678 #if defined(__i386__) && defined(__pic__) |
| 1679 [width]"+m"(width) // %[width] |
| 1680 #else |
| 1680 [width]"+rm"(width) // %[width] | 1681 [width]"+rm"(width) // %[width] |
| 1682 #endif |
| 1681 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1683 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1682 : "memory", "cc", NACL_R14 | 1684 : "memory", "cc", NACL_R14 |
| 1683 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1685 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1684 ); | 1686 ); |
| 1685 } | 1687 } |
| 1686 | 1688 |
| 1687 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, | 1689 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
| 1688 const uint8* u_buf, | 1690 const uint8* u_buf, |
| 1689 const uint8* v_buf, | 1691 const uint8* v_buf, |
| 1690 const uint8* a_buf, | 1692 const uint8* a_buf, |
| 1691 uint8* dst_abgr, | 1693 uint8* dst_abgr, |
| 1692 struct YuvConstants* yuvconstants, | 1694 struct YuvConstants* yuvconstants, |
| 1693 int width) { | 1695 int width) { |
| 1694 asm volatile ( | 1696 asm volatile ( |
| 1695 "sub %[u_buf],%[v_buf] \n" | 1697 "sub %[u_buf],%[v_buf] \n" |
| 1696 LABELALIGN | 1698 LABELALIGN |
| 1697 "1: \n" | 1699 "1: \n" |
| 1698 READYUVA422 | 1700 READYUVA422 |
| 1699 YUVTORGB(yuvconstants) | 1701 YUVTORGB(yuvconstants) |
| 1700 STOREABGR | 1702 STOREABGR |
| 1701 "sub $0x8,%[width] \n" | 1703 "subl $0x8,%[width] \n" |
| 1702 "jg 1b \n" | 1704 "jg 1b \n" |
| 1703 : [y_buf]"+r"(y_buf), // %[y_buf] | 1705 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 1704 [u_buf]"+r"(u_buf), // %[u_buf] | 1706 [u_buf]"+r"(u_buf), // %[u_buf] |
| 1705 [v_buf]"+r"(v_buf), // %[v_buf] | 1707 [v_buf]"+r"(v_buf), // %[v_buf] |
| 1706 [a_buf]"+r"(a_buf), // %[a_buf] | 1708 [a_buf]"+r"(a_buf), // %[a_buf] |
| 1707 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 1709 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 1710 #if defined(__i386__) && defined(__pic__) |
| 1711 [width]"+m"(width) // %[width] |
| 1712 #else |
| 1708 [width]"+rm"(width) // %[width] | 1713 [width]"+rm"(width) // %[width] |
| 1714 #endif |
| 1709 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 1715 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 1710 : "memory", "cc", NACL_R14 | 1716 : "memory", "cc", NACL_R14 |
| 1711 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1717 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 1712 ); | 1718 ); |
| 1713 } | 1719 } |
| 1714 | 1720 |
| 1715 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, | 1721 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, |
| 1716 const uint8* u_buf, | 1722 const uint8* u_buf, |
| 1717 const uint8* v_buf, | 1723 const uint8* v_buf, |
| 1718 uint8* dst_argb, | 1724 uint8* dst_argb, |
| (...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2081 uint8* dst_argb, | 2087 uint8* dst_argb, |
| 2082 struct YuvConstants* yuvconstants, | 2088 struct YuvConstants* yuvconstants, |
| 2083 int width) { | 2089 int width) { |
| 2084 asm volatile ( | 2090 asm volatile ( |
| 2085 "sub %[u_buf],%[v_buf] \n" | 2091 "sub %[u_buf],%[v_buf] \n" |
| 2086 LABELALIGN | 2092 LABELALIGN |
| 2087 "1: \n" | 2093 "1: \n" |
| 2088 READYUVA422_AVX2 | 2094 READYUVA422_AVX2 |
| 2089 YUVTORGB_AVX2(yuvconstants) | 2095 YUVTORGB_AVX2(yuvconstants) |
| 2090 STOREARGB_AVX2 | 2096 STOREARGB_AVX2 |
| 2091 "sub $0x10,%[width] \n" | 2097 "subl $0x10,%[width] \n" |
| 2092 "jg 1b \n" | 2098 "jg 1b \n" |
| 2093 "vzeroupper \n" | 2099 "vzeroupper \n" |
| 2094 : [y_buf]"+r"(y_buf), // %[y_buf] | 2100 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 2095 [u_buf]"+r"(u_buf), // %[u_buf] | 2101 [u_buf]"+r"(u_buf), // %[u_buf] |
| 2096 [v_buf]"+r"(v_buf), // %[v_buf] | 2102 [v_buf]"+r"(v_buf), // %[v_buf] |
| 2097 [a_buf]"+r"(a_buf), // %[a_buf] | 2103 [a_buf]"+r"(a_buf), // %[a_buf] |
| 2098 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 2104 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
| 2105 #if defined(__i386__) && defined(__pic__) |
| 2106 [width]"+m"(width) // %[width] |
| 2107 #else |
| 2099 [width]"+rm"(width) // %[width] | 2108 [width]"+rm"(width) // %[width] |
| 2109 #endif |
| 2100 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2110 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 2101 : "memory", "cc", NACL_R14 | 2111 : "memory", "cc", NACL_R14 |
| 2102 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2112 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2103 ); | 2113 ); |
| 2104 } | 2114 } |
| 2105 #endif // HAS_I422ALPHATOARGBROW_AVX2 | 2115 #endif // HAS_I422ALPHATOARGBROW_AVX2 |
| 2106 | 2116 |
| 2107 #if defined(HAS_I422ALPHATOABGRROW_AVX2) | 2117 #if defined(HAS_I422ALPHATOABGRROW_AVX2) |
| 2108 // 16 pixels | 2118 // 16 pixels |
| 2109 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. | 2119 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. |
| 2110 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, | 2120 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, |
| 2111 const uint8* u_buf, | 2121 const uint8* u_buf, |
| 2112 const uint8* v_buf, | 2122 const uint8* v_buf, |
| 2113 const uint8* a_buf, | 2123 const uint8* a_buf, |
| 2114 uint8* dst_abgr, | 2124 uint8* dst_abgr, |
| 2115 struct YuvConstants* yuvconstants, | 2125 struct YuvConstants* yuvconstants, |
| 2116 int width) { | 2126 int width) { |
| 2117 asm volatile ( | 2127 asm volatile ( |
| 2118 "sub %[u_buf],%[v_buf] \n" | 2128 "sub %[u_buf],%[v_buf] \n" |
| 2119 LABELALIGN | 2129 LABELALIGN |
| 2120 "1: \n" | 2130 "1: \n" |
| 2121 READYUVA422_AVX2 | 2131 READYUVA422_AVX2 |
| 2122 YUVTORGB_AVX2(yuvconstants) | 2132 YUVTORGB_AVX2(yuvconstants) |
| 2123 STOREABGR_AVX2 | 2133 STOREABGR_AVX2 |
| 2124 "sub $0x10,%[width] \n" | 2134 "subl $0x10,%[width] \n" |
| 2125 "jg 1b \n" | 2135 "jg 1b \n" |
| 2126 "vzeroupper \n" | 2136 "vzeroupper \n" |
| 2127 : [y_buf]"+r"(y_buf), // %[y_buf] | 2137 : [y_buf]"+r"(y_buf), // %[y_buf] |
| 2128 [u_buf]"+r"(u_buf), // %[u_buf] | 2138 [u_buf]"+r"(u_buf), // %[u_buf] |
| 2129 [v_buf]"+r"(v_buf), // %[v_buf] | 2139 [v_buf]"+r"(v_buf), // %[v_buf] |
| 2130 [a_buf]"+r"(a_buf), // %[a_buf] | 2140 [a_buf]"+r"(a_buf), // %[a_buf] |
| 2131 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | 2141 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] |
| 2142 #if defined(__i386__) && defined(__pic__) |
| 2143 [width]"+m"(width) // %[width] |
| 2144 #else |
| 2132 [width]"+rm"(width) // %[width] | 2145 [width]"+rm"(width) // %[width] |
| 2146 #endif |
| 2133 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2147 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
| 2134 : "memory", "cc", NACL_R14 | 2148 : "memory", "cc", NACL_R14 |
| 2135 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2149 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2136 ); | 2150 ); |
| 2137 } | 2151 } |
| 2138 #endif // HAS_I422ALPHATOABGRROW_AVX2 | 2152 #endif // HAS_I422ALPHATOABGRROW_AVX2 |
| 2139 | 2153 |
| 2140 #if defined(HAS_I422TOABGRROW_AVX2) | 2154 #if defined(HAS_I422TOABGRROW_AVX2) |
| 2141 // 16 pixels | 2155 // 16 pixels |
| 2142 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 2156 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
| (...skipping 3426 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5569 ); | 5583 ); |
| 5570 } | 5584 } |
| 5571 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5585 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5572 | 5586 |
| 5573 #endif // defined(__x86_64__) || defined(__i386__) | 5587 #endif // defined(__x86_64__) || defined(__i386__) |
| 5574 | 5588 |
| 5575 #ifdef __cplusplus | 5589 #ifdef __cplusplus |
| 5576 } // extern "C" | 5590 } // extern "C" |
| 5577 } // namespace libyuv | 5591 } // namespace libyuv |
| 5578 #endif | 5592 #endif |
| OLD | NEW |