Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(334)

Side by Side Diff: source/row_gcc.cc

Issue 1427993004: remove I422ToBGRA and use I422ToRGBA internally (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: lint fixes for warnings about comments in test Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1645 matching lines...) Expand 10 before | Expand all | Expand 10 after
1656 [width]"+rm"(width) // %[width] 1656 [width]"+rm"(width) // %[width]
1657 #endif 1657 #endif
1658 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1658 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1659 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), 1659 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0),
1660 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) 1660 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24)
1661 : "memory", "cc", NACL_R14 1661 : "memory", "cc", NACL_R14
1662 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 1662 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1663 ); 1663 );
1664 } 1664 }
1665 1665
1666 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf,
1667 const uint8* u_buf,
1668 const uint8* v_buf,
1669 uint8* dst_raw,
1670 const struct YuvConstants* yuvconstants,
1671 int width) {
1672 asm volatile (
1673 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n"
1674 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n"
1675 "sub %[u_buf],%[v_buf] \n"
1676 LABELALIGN
1677 "1: \n"
1678 READYUV422
1679 YUVTORGB(yuvconstants)
1680 "punpcklbw %%xmm1,%%xmm0 \n"
1681 "punpcklbw %%xmm2,%%xmm2 \n"
1682 "movdqa %%xmm0,%%xmm1 \n"
1683 "punpcklwd %%xmm2,%%xmm0 \n"
1684 "punpckhwd %%xmm2,%%xmm1 \n"
1685 "pshufb %%xmm5,%%xmm0 \n"
1686 "pshufb %%xmm6,%%xmm1 \n"
1687 "palignr $0xc,%%xmm0,%%xmm1 \n"
1688 "movq %%xmm0," MEMACCESS([dst_raw]) " \n"
1689 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n"
1690 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n"
1691 "subl $0x8,%[width] \n"
1692 "jg 1b \n"
1693 : [y_buf]"+r"(y_buf), // %[y_buf]
1694 [u_buf]"+r"(u_buf), // %[u_buf]
1695 [v_buf]"+r"(v_buf), // %[v_buf]
1696 [dst_raw]"+r"(dst_raw), // %[dst_raw]
1697 #if defined(__i386__) && defined(__pic__)
1698 [width]"+m"(width) // %[width]
1699 #else
1700 [width]"+rm"(width) // %[width]
1701 #endif
1702 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1703 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0),
1704 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW)
1705 : "memory", "cc", NACL_R14
1706 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1707 );
1708 }
1709
1710 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, 1666 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf,
1711 const uint8* u_buf, 1667 const uint8* u_buf,
1712 const uint8* v_buf, 1668 const uint8* v_buf,
1713 uint8* dst_argb, 1669 uint8* dst_argb,
1714 const struct YuvConstants* yuvconstants, 1670 const struct YuvConstants* yuvconstants,
1715 int width) { 1671 int width) {
1716 asm volatile ( 1672 asm volatile (
1717 "sub %[u_buf],%[v_buf] \n" 1673 "sub %[u_buf],%[v_buf] \n"
1718 "pcmpeqb %%xmm5,%%xmm5 \n" 1674 "pcmpeqb %%xmm5,%%xmm5 \n"
1719 LABELALIGN 1675 LABELALIGN
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
1885 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1841 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1886 [width]"+rm"(width) // %[width] 1842 [width]"+rm"(width) // %[width]
1887 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] 1843 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants]
1888 [kShuffleUYVYY]"m"(kShuffleUYVYY), 1844 [kShuffleUYVYY]"m"(kShuffleUYVYY),
1889 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) 1845 [kShuffleUYVYUV]"m"(kShuffleUYVYUV)
1890 // Does not use r14. 1846 // Does not use r14.
1891 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1847 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1892 ); 1848 );
1893 } 1849 }
1894 1850
1895 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf,
1896 const uint8* u_buf,
1897 const uint8* v_buf,
1898 uint8* dst_bgra,
1899 const struct YuvConstants* yuvconstants,
1900 int width) {
1901 asm volatile (
1902 "sub %[u_buf],%[v_buf] \n"
1903 "pcmpeqb %%xmm5,%%xmm5 \n"
1904 LABELALIGN
1905 "1: \n"
1906 READYUV422
1907 YUVTORGB(yuvconstants)
1908 STOREBGRA
1909 "sub $0x8,%[width] \n"
1910 "jg 1b \n"
1911 : [y_buf]"+r"(y_buf), // %[y_buf]
1912 [u_buf]"+r"(u_buf), // %[u_buf]
1913 [v_buf]"+r"(v_buf), // %[v_buf]
1914 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
1915 [width]"+rm"(width) // %[width]
1916 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1917 : "memory", "cc", NACL_R14
1918 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1919 );
1920 }
1921
1922 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf,
1923 const uint8* u_buf,
1924 const uint8* v_buf,
1925 uint8* dst_abgr,
1926 const struct YuvConstants* yuvconstants,
1927 int width) {
1928 asm volatile (
1929 "sub %[u_buf],%[v_buf] \n"
1930 "pcmpeqb %%xmm5,%%xmm5 \n"
1931 LABELALIGN
1932 "1: \n"
1933 READYUV422
1934 YUVTORGB(yuvconstants)
1935 STOREABGR
1936 "sub $0x8,%[width] \n"
1937 "jg 1b \n"
1938 : [y_buf]"+r"(y_buf), // %[y_buf]
1939 [u_buf]"+r"(u_buf), // %[u_buf]
1940 [v_buf]"+r"(v_buf), // %[v_buf]
1941 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
1942 [width]"+rm"(width) // %[width]
1943 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1944 : "memory", "cc", NACL_R14
1945 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1946 );
1947 }
1948
1949 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, 1851 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
1950 const uint8* u_buf, 1852 const uint8* u_buf,
1951 const uint8* v_buf, 1853 const uint8* v_buf,
1952 uint8* dst_rgba, 1854 uint8* dst_rgba,
1953 const struct YuvConstants* yuvconstants, 1855 const struct YuvConstants* yuvconstants,
1954 int width) { 1856 int width) {
1955 asm volatile ( 1857 asm volatile (
1956 "sub %[u_buf],%[v_buf] \n" 1858 "sub %[u_buf],%[v_buf] \n"
1957 "pcmpeqb %%xmm5,%%xmm5 \n" 1859 "pcmpeqb %%xmm5,%%xmm5 \n"
1958 LABELALIGN 1860 LABELALIGN
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after
2081 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" \ 1983 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" \
2082 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ 1984 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
2083 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" \ 1985 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" \
2084 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ 1986 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
2085 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" \ 1987 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" \
2086 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" \ 1988 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" \
2087 "vmovdqu %%ymm0," MEMACCESS([dst_abgr]) " \n" \ 1989 "vmovdqu %%ymm0," MEMACCESS([dst_abgr]) " \n" \
2088 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ 1990 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \
2089 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" 1991 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n"
2090 1992
2091 #if defined(HAS_I422TOBGRAROW_AVX2)
2092 // 16 pixels
2093 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
2094 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf,
2095 const uint8* u_buf,
2096 const uint8* v_buf,
2097 uint8* dst_bgra,
2098 const struct YuvConstants* yuvconstants,
2099 int width) {
2100 asm volatile (
2101 "sub %[u_buf],%[v_buf] \n"
2102 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2103 LABELALIGN
2104 "1: \n"
2105 READYUV422_AVX2
2106 YUVTORGB_AVX2(yuvconstants)
2107
2108 // Step 3: Weave into BGRA
2109 "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB
2110 "vpermq $0xd8,%%ymm1,%%ymm1 \n"
2111 "vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR
2112 "vpermq $0xd8,%%ymm2,%%ymm2 \n"
2113 "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels
2114 "vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels
2115 "vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n"
2116 "vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n"
2117 "lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n"
2118 "sub $0x10,%[width] \n"
2119 "jg 1b \n"
2120 "vzeroupper \n"
2121 : [y_buf]"+r"(y_buf), // %[y_buf]
2122 [u_buf]"+r"(u_buf), // %[u_buf]
2123 [v_buf]"+r"(v_buf), // %[v_buf]
2124 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra]
2125 [width]"+rm"(width) // %[width]
2126 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2127 : "memory", "cc", NACL_R14
2128 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2129 );
2130 }
2131 #endif // HAS_I422TOBGRAROW_AVX2
2132
2133 #if defined(HAS_I422TOARGBROW_AVX2) 1993 #if defined(HAS_I422TOARGBROW_AVX2)
2134 // 16 pixels 1994 // 16 pixels
2135 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 1995 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2136 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, 1996 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
2137 const uint8* u_buf, 1997 const uint8* u_buf,
2138 const uint8* v_buf, 1998 const uint8* v_buf,
2139 uint8* dst_argb, 1999 uint8* dst_argb,
2140 const struct YuvConstants* yuvconstants, 2000 const struct YuvConstants* yuvconstants,
2141 int width) { 2001 int width) {
2142 asm volatile ( 2002 asm volatile (
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
2192 #else 2052 #else
2193 [width]"+rm"(width) // %[width] 2053 [width]"+rm"(width) // %[width]
2194 #endif 2054 #endif
2195 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2055 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2196 : "memory", "cc", NACL_R14 2056 : "memory", "cc", NACL_R14
2197 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2057 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2198 ); 2058 );
2199 } 2059 }
2200 #endif // HAS_I422ALPHATOARGBROW_AVX2 2060 #endif // HAS_I422ALPHATOARGBROW_AVX2
2201 2061
2202 #if defined(HAS_I422TOABGRROW_AVX2)
2203 // 16 pixels
2204 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
2205 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf,
2206 const uint8* u_buf,
2207 const uint8* v_buf,
2208 uint8* dst_abgr,
2209 const struct YuvConstants* yuvconstants,
2210 int width) {
2211 asm volatile (
2212 "sub %[u_buf],%[v_buf] \n"
2213 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2214 LABELALIGN
2215 "1: \n"
2216 READYUV422_AVX2
2217 YUVTORGB_AVX2(yuvconstants)
2218 STOREABGR_AVX2
2219 "sub $0x10,%[width] \n"
2220 "jg 1b \n"
2221 "vzeroupper \n"
2222 : [y_buf]"+r"(y_buf), // %[y_buf]
2223 [u_buf]"+r"(u_buf), // %[u_buf]
2224 [v_buf]"+r"(v_buf), // %[v_buf]
2225 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr]
2226 [width]"+rm"(width) // %[width]
2227 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2228 : "memory", "cc", NACL_R14
2229 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2230 );
2231 }
2232 #endif // HAS_I422TOABGRROW_AVX2
2233
2234 #if defined(HAS_I422TORGBAROW_AVX2) 2062 #if defined(HAS_I422TORGBAROW_AVX2)
2235 // 16 pixels 2063 // 16 pixels
2236 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 2064 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
2237 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, 2065 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf,
2238 const uint8* u_buf, 2066 const uint8* u_buf,
2239 const uint8* v_buf, 2067 const uint8* v_buf,
2240 uint8* dst_argb, 2068 uint8* dst_argb,
2241 const struct YuvConstants* yuvconstants, 2069 const struct YuvConstants* yuvconstants,
2242 int width) { 2070 int width) {
2243 asm volatile ( 2071 asm volatile (
(...skipping 3248 matching lines...) Expand 10 before | Expand all | Expand 10 after
5492 ); 5320 );
5493 } 5321 }
5494 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5322 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5495 5323
5496 #endif // defined(__x86_64__) || defined(__i386__) 5324 #endif // defined(__x86_64__) || defined(__i386__)
5497 5325
5498 #ifdef __cplusplus 5326 #ifdef __cplusplus
5499 } // extern "C" 5327 } // extern "C"
5500 } // namespace libyuv 5328 } // namespace libyuv
5501 #endif 5329 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698