Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1254)

Side by Side Diff: source/row_gcc.cc

Issue 1369053002: Width use memory instead of register for 32 bit fpic. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: apply fix to ssse3 as well as avx2 Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1552 matching lines...) Expand 10 before | Expand all | Expand 10 after
1563 "palignr $0xc,%%xmm0,%%xmm1 \n" 1563 "palignr $0xc,%%xmm0,%%xmm1 \n"
1564 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n" 1564 "movq %%xmm0," MEMACCESS([dst_rgb24]) "\n"
1565 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n" 1565 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_rgb24]) "\n"
1566 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n" 1566 "lea " MEMLEA(0x18,[dst_rgb24]) ",%[dst_rgb24] \n"
1567 "subl $0x8,%[width] \n" 1567 "subl $0x8,%[width] \n"
1568 "jg 1b \n" 1568 "jg 1b \n"
1569 : [y_buf]"+r"(y_buf), // %[y_buf] 1569 : [y_buf]"+r"(y_buf), // %[y_buf]
1570 [u_buf]"+r"(u_buf), // %[u_buf] 1570 [u_buf]"+r"(u_buf), // %[u_buf]
1571 [v_buf]"+r"(v_buf), // %[v_buf] 1571 [v_buf]"+r"(v_buf), // %[v_buf]
1572 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] 1572 [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24]
1573 // TODO(fbarchard): Make width a register for 32 bit.
1574 #if defined(__i386__) && defined(__pic__) 1573 #if defined(__i386__) && defined(__pic__)
1575 [width]"+m"(width) // %[width] 1574 [width]"+m"(width) // %[width]
1576 #else 1575 #else
1577 [width]"+rm"(width) // %[width] 1576 [width]"+rm"(width) // %[width]
1578 #endif 1577 #endif
1579 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1578 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1580 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), 1579 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
1581 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) 1580 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
1582 : "memory", "cc", NACL_R14 1581 : "memory", "cc", NACL_R14
1583 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 1582 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
(...skipping 24 matching lines...) Expand all
1608 "palignr $0xc,%%xmm0,%%xmm1 \n" 1607 "palignr $0xc,%%xmm0,%%xmm1 \n"
1609 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" 1608 "movq %%xmm0," MEMACCESS([dst_raw]) " \n"
1610 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" 1609 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
1611 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" 1610 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
1612 "subl $0x8,%[width] \n" 1611 "subl $0x8,%[width] \n"
1613 "jg 1b \n" 1612 "jg 1b \n"
1614 : [y_buf]"+r"(y_buf), // %[y_buf] 1613 : [y_buf]"+r"(y_buf), // %[y_buf]
1615 [u_buf]"+r"(u_buf), // %[u_buf] 1614 [u_buf]"+r"(u_buf), // %[u_buf]
1616 [v_buf]"+r"(v_buf), // %[v_buf] 1615 [v_buf]"+r"(v_buf), // %[v_buf]
1617 [dst_raw]"+r"(dst_raw), // %[dst_raw] 1616 [dst_raw]"+r"(dst_raw), // %[dst_raw]
1618 // TODO(fbarchard): Make width a register for 32 bit.
1619 #if defined(__i386__) && defined(__pic__) 1617 #if defined(__i386__) && defined(__pic__)
1620 [width]"+m"(width) // %[width] 1618 [width]"+m"(width) // %[width]
1621 #else 1619 #else
1622 [width]"+rm"(width) // %[width] 1620 [width]"+rm"(width) // %[width]
1623 #endif 1621 #endif
1624 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1622 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1625 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), 1623 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
1626 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) 1624 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
1627 : "memory", "cc", NACL_R14 1625 : "memory", "cc", NACL_R14
1628 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 1626 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1663 uint8* dst_argb, 1661 uint8* dst_argb,
1664 struct YuvConstants* yuvconstants, 1662 struct YuvConstants* yuvconstants,
1665 int width) { 1663 int width) {
1666 asm volatile ( 1664 asm volatile (
1667 "sub %[u_buf],%[v_buf] \n" 1665 "sub %[u_buf],%[v_buf] \n"
1668 LABELALIGN 1666 LABELALIGN
1669 "1: \n" 1667 "1: \n"
1670 READYUVA422 1668 READYUVA422
1671 YUVTORGB(yuvconstants) 1669 YUVTORGB(yuvconstants)
1672 STOREARGB 1670 STOREARGB
1673 "sub $0x8,%[width] \n" 1671 "subl $0x8,%[width] \n"
1674 "jg 1b \n" 1672 "jg 1b \n"
1675 : [y_buf]"+r"(y_buf), // %[y_buf] 1673 : [y_buf]"+r"(y_buf), // %[y_buf]
1676 [u_buf]"+r"(u_buf), // %[u_buf] 1674 [u_buf]"+r"(u_buf), // %[u_buf]
1677 [v_buf]"+r"(v_buf), // %[v_buf] 1675 [v_buf]"+r"(v_buf), // %[v_buf]
1678 [a_buf]"+r"(a_buf), // %[a_buf] 1676 [a_buf]"+r"(a_buf), // %[a_buf]
1679 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1677 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1678 #if defined(__i386__) && defined(__pic__)
1679 [width]"+m"(width) // %[width]
1680 #else
1680 [width]"+rm"(width) // %[width] 1681 [width]"+rm"(width) // %[width]
1682 #endif
1681 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1683 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1682 : "memory", "cc", NACL_R14 1684 : "memory", "cc", NACL_R14
1683 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1685 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1684 ); 1686 );
1685 } 1687 }
1686 1688
1687 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf, 1689 void OMITFP I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
1688 const uint8* u_buf, 1690 const uint8* u_buf,
1689 const uint8* v_buf, 1691 const uint8* v_buf,
1690 const uint8* a_buf, 1692 const uint8* a_buf,
1691 uint8* dst_abgr, 1693 uint8* dst_abgr,
1692 struct YuvConstants* yuvconstants, 1694 struct YuvConstants* yuvconstants,
1693 int width) { 1695 int width) {
1694 asm volatile ( 1696 asm volatile (
1695 "sub %[u_buf],%[v_buf] \n" 1697 "sub %[u_buf],%[v_buf] \n"
1696 LABELALIGN 1698 LABELALIGN
1697 "1: \n" 1699 "1: \n"
1698 READYUVA422 1700 READYUVA422
1699 YUVTORGB(yuvconstants) 1701 YUVTORGB(yuvconstants)
1700 STOREABGR 1702 STOREABGR
1701 "sub $0x8,%[width] \n" 1703 "subl $0x8,%[width] \n"
1702 "jg 1b \n" 1704 "jg 1b \n"
1703 : [y_buf]"+r"(y_buf), // %[y_buf] 1705 : [y_buf]"+r"(y_buf), // %[y_buf]
1704 [u_buf]"+r"(u_buf), // %[u_buf] 1706 [u_buf]"+r"(u_buf), // %[u_buf]
1705 [v_buf]"+r"(v_buf), // %[v_buf] 1707 [v_buf]"+r"(v_buf), // %[v_buf]
1706 [a_buf]"+r"(a_buf), // %[a_buf] 1708 [a_buf]"+r"(a_buf), // %[a_buf]
1707 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 1709 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1710 #if defined(__i386__) && defined(__pic__)
1711 [width]"+m"(width) // %[width]
1712 #else
1708 [width]"+rm"(width) // %[width] 1713 [width]"+rm"(width) // %[width]
1714 #endif
1709 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1715 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1710 : "memory", "cc", NACL_R14 1716 : "memory", "cc", NACL_R14
1711 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1717 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1712 ); 1718 );
1713 } 1719 }
1714 1720
1715 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, 1721 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
1716 const uint8* u_buf, 1722 const uint8* u_buf,
1717 const uint8* v_buf, 1723 const uint8* v_buf,
1718 uint8* dst_argb, 1724 uint8* dst_argb,
(...skipping 362 matching lines...) Expand 10 before | Expand all | Expand 10 after
2081 uint8* dst_argb, 2087 uint8* dst_argb,
2082 struct YuvConstants* yuvconstants, 2088 struct YuvConstants* yuvconstants,
2083 int width) { 2089 int width) {
2084 asm volatile ( 2090 asm volatile (
2085 "sub %[u_buf],%[v_buf] \n" 2091 "sub %[u_buf],%[v_buf] \n"
2086 LABELALIGN 2092 LABELALIGN
2087 "1: \n" 2093 "1: \n"
2088 READYUVA422_AVX2 2094 READYUVA422_AVX2
2089 YUVTORGB_AVX2(yuvconstants) 2095 YUVTORGB_AVX2(yuvconstants)
2090 STOREARGB_AVX2 2096 STOREARGB_AVX2
2091 "sub $0x10,%[width] \n" 2097 "subl $0x10,%[width] \n"
2092 "jg 1b \n" 2098 "jg 1b \n"
2093 "vzeroupper \n" 2099 "vzeroupper \n"
2094 : [y_buf]"+r"(y_buf), // %[y_buf] 2100 : [y_buf]"+r"(y_buf), // %[y_buf]
2095 [u_buf]"+r"(u_buf), // %[u_buf] 2101 [u_buf]"+r"(u_buf), // %[u_buf]
2096 [v_buf]"+r"(v_buf), // %[v_buf] 2102 [v_buf]"+r"(v_buf), // %[v_buf]
2097 [a_buf]"+r"(a_buf), // %[a_buf] 2103 [a_buf]"+r"(a_buf), // %[a_buf]
2098 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2104 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2105 #if defined(__i386__) && defined(__pic__)
2106 [width]"+m"(width) // %[width]
2107 #else
2099 [width]"+rm"(width) // %[width] 2108 [width]"+rm"(width) // %[width]
2109 #endif
2100 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2110 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2101 : "memory", "cc", NACL_R14 2111 : "memory", "cc", NACL_R14
2102 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2112 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2103 ); 2113 );
2104 } 2114 }
2105 #endif // HAS_I422ALPHATOARGBROW_AVX2 2115 #endif // HAS_I422ALPHATOARGBROW_AVX2
2106 2116
2107 #if defined(HAS_I422ALPHATOABGRROW_AVX2) 2117 #if defined(HAS_I422ALPHATOABGRROW_AVX2)
2108 // 16 pixels 2118 // 16 pixels
2109 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. 2119 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
2110 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf, 2120 void OMITFP I422AlphaToABGRRow_AVX2(const uint8* y_buf,
2111 const uint8* u_buf, 2121 const uint8* u_buf,
2112 const uint8* v_buf, 2122 const uint8* v_buf,
2113 const uint8* a_buf, 2123 const uint8* a_buf,
2114 uint8* dst_abgr, 2124 uint8* dst_abgr,
2115 struct YuvConstants* yuvconstants, 2125 struct YuvConstants* yuvconstants,
2116 int width) { 2126 int width) {
2117 asm volatile ( 2127 asm volatile (
2118 "sub %[u_buf],%[v_buf] \n" 2128 "sub %[u_buf],%[v_buf] \n"
2119 LABELALIGN 2129 LABELALIGN
2120 "1: \n" 2130 "1: \n"
2121 READYUVA422_AVX2 2131 READYUVA422_AVX2
2122 YUVTORGB_AVX2(yuvconstants) 2132 YUVTORGB_AVX2(yuvconstants)
2123 STOREABGR_AVX2 2133 STOREABGR_AVX2
2124 "sub $0x10,%[width] \n" 2134 "subl $0x10,%[width] \n"
2125 "jg 1b \n" 2135 "jg 1b \n"
2126 "vzeroupper \n" 2136 "vzeroupper \n"
2127 : [y_buf]"+r"(y_buf), // %[y_buf] 2137 : [y_buf]"+r"(y_buf), // %[y_buf]
2128 [u_buf]"+r"(u_buf), // %[u_buf] 2138 [u_buf]"+r"(u_buf), // %[u_buf]
2129 [v_buf]"+r"(v_buf), // %[v_buf] 2139 [v_buf]"+r"(v_buf), // %[v_buf]
2130 [a_buf]"+r"(a_buf), // %[a_buf] 2140 [a_buf]"+r"(a_buf), // %[a_buf]
2131 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] 2141 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
2142 #if defined(__i386__) && defined(__pic__)
2143 [width]"+m"(width) // %[width]
2144 #else
2132 [width]"+rm"(width) // %[width] 2145 [width]"+rm"(width) // %[width]
2146 #endif
2133 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2147 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2134 : "memory", "cc", NACL_R14 2148 : "memory", "cc", NACL_R14
2135 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2149 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2136 ); 2150 );
2137 } 2151 }
2138 #endif // HAS_I422ALPHATOABGRROW_AVX2 2152 #endif // HAS_I422ALPHATOABGRROW_AVX2
2139 2153
2140 #if defined(HAS_I422TOABGRROW_AVX2) 2154 #if defined(HAS_I422TOABGRROW_AVX2)
2141 // 16 pixels 2155 // 16 pixels
2142 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 2156 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
(...skipping 3426 matching lines...) Expand 10 before | Expand all | Expand 10 after
5569 ); 5583 );
5570 } 5584 }
5571 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5585 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5572 5586
5573 #endif // defined(__x86_64__) || defined(__i386__) 5587 #endif // defined(__x86_64__) || defined(__i386__)
5574 5588
5575 #ifdef __cplusplus 5589 #ifdef __cplusplus
5576 } // extern "C" 5590 } // extern "C"
5577 } // namespace libyuv 5591 } // namespace libyuv
5578 #endif 5592 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698