OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 1645 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1656 [width]"+rm"(width) // %[width] | 1656 [width]"+rm"(width) // %[width] |
1657 #endif | 1657 #endif |
1658 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1658 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1659 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), | 1659 [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), |
1660 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) | 1660 [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) |
1661 : "memory", "cc", NACL_R14 | 1661 : "memory", "cc", NACL_R14 |
1662 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | 1662 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" |
1663 ); | 1663 ); |
1664 } | 1664 } |
1665 | 1665 |
1666 void OMITFP I422ToRAWRow_SSSE3(const uint8* y_buf, | |
1667 const uint8* u_buf, | |
1668 const uint8* v_buf, | |
1669 uint8* dst_raw, | |
1670 const struct YuvConstants* yuvconstants, | |
1671 int width) { | |
1672 asm volatile ( | |
1673 "movdqa %[kShuffleMaskARGBToRAW_0],%%xmm5 \n" | |
1674 "movdqa %[kShuffleMaskARGBToRAW],%%xmm6 \n" | |
1675 "sub %[u_buf],%[v_buf] \n" | |
1676 LABELALIGN | |
1677 "1: \n" | |
1678 READYUV422 | |
1679 YUVTORGB(yuvconstants) | |
1680 "punpcklbw %%xmm1,%%xmm0 \n" | |
1681 "punpcklbw %%xmm2,%%xmm2 \n" | |
1682 "movdqa %%xmm0,%%xmm1 \n" | |
1683 "punpcklwd %%xmm2,%%xmm0 \n" | |
1684 "punpckhwd %%xmm2,%%xmm1 \n" | |
1685 "pshufb %%xmm5,%%xmm0 \n" | |
1686 "pshufb %%xmm6,%%xmm1 \n" | |
1687 "palignr $0xc,%%xmm0,%%xmm1 \n" | |
1688 "movq %%xmm0," MEMACCESS([dst_raw]) " \n" | |
1689 "movdqu %%xmm1," MEMACCESS2(0x8,[dst_raw]) "\n" | |
1690 "lea " MEMLEA(0x18,[dst_raw]) ",%[dst_raw] \n" | |
1691 "subl $0x8,%[width] \n" | |
1692 "jg 1b \n" | |
1693 : [y_buf]"+r"(y_buf), // %[y_buf] | |
1694 [u_buf]"+r"(u_buf), // %[u_buf] | |
1695 [v_buf]"+r"(v_buf), // %[v_buf] | |
1696 [dst_raw]"+r"(dst_raw), // %[dst_raw] | |
1697 #if defined(__i386__) && defined(__pic__) | |
1698 [width]"+m"(width) // %[width] | |
1699 #else | |
1700 [width]"+rm"(width) // %[width] | |
1701 #endif | |
1702 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | |
1703 [kShuffleMaskARGBToRAW_0]"m"(kShuffleMaskARGBToRAW_0), | |
1704 [kShuffleMaskARGBToRAW]"m"(kShuffleMaskARGBToRAW) | |
1705 : "memory", "cc", NACL_R14 | |
1706 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" | |
1707 ); | |
1708 } | |
1709 | |
1710 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, | 1666 void OMITFP I422ToARGBRow_SSSE3(const uint8* y_buf, |
1711 const uint8* u_buf, | 1667 const uint8* u_buf, |
1712 const uint8* v_buf, | 1668 const uint8* v_buf, |
1713 uint8* dst_argb, | 1669 uint8* dst_argb, |
1714 const struct YuvConstants* yuvconstants, | 1670 const struct YuvConstants* yuvconstants, |
1715 int width) { | 1671 int width) { |
1716 asm volatile ( | 1672 asm volatile ( |
1717 "sub %[u_buf],%[v_buf] \n" | 1673 "sub %[u_buf],%[v_buf] \n" |
1718 "pcmpeqb %%xmm5,%%xmm5 \n" | 1674 "pcmpeqb %%xmm5,%%xmm5 \n" |
1719 LABELALIGN | 1675 LABELALIGN |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1885 [dst_argb]"+r"(dst_argb), // %[dst_argb] | 1841 [dst_argb]"+r"(dst_argb), // %[dst_argb] |
1886 [width]"+rm"(width) // %[width] | 1842 [width]"+rm"(width) // %[width] |
1887 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] | 1843 : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] |
1888 [kShuffleUYVYY]"m"(kShuffleUYVYY), | 1844 [kShuffleUYVYY]"m"(kShuffleUYVYY), |
1889 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) | 1845 [kShuffleUYVYUV]"m"(kShuffleUYVYUV) |
1890 // Does not use r14. | 1846 // Does not use r14. |
1891 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 1847 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
1892 ); | 1848 ); |
1893 } | 1849 } |
1894 | 1850 |
1895 void OMITFP I422ToBGRARow_SSSE3(const uint8* y_buf, | |
1896 const uint8* u_buf, | |
1897 const uint8* v_buf, | |
1898 uint8* dst_bgra, | |
1899 const struct YuvConstants* yuvconstants, | |
1900 int width) { | |
1901 asm volatile ( | |
1902 "sub %[u_buf],%[v_buf] \n" | |
1903 "pcmpeqb %%xmm5,%%xmm5 \n" | |
1904 LABELALIGN | |
1905 "1: \n" | |
1906 READYUV422 | |
1907 YUVTORGB(yuvconstants) | |
1908 STOREBGRA | |
1909 "sub $0x8,%[width] \n" | |
1910 "jg 1b \n" | |
1911 : [y_buf]"+r"(y_buf), // %[y_buf] | |
1912 [u_buf]"+r"(u_buf), // %[u_buf] | |
1913 [v_buf]"+r"(v_buf), // %[v_buf] | |
1914 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | |
1915 [width]"+rm"(width) // %[width] | |
1916 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | |
1917 : "memory", "cc", NACL_R14 | |
1918 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | |
1919 ); | |
1920 } | |
1921 | |
1922 void OMITFP I422ToABGRRow_SSSE3(const uint8* y_buf, | |
1923 const uint8* u_buf, | |
1924 const uint8* v_buf, | |
1925 uint8* dst_abgr, | |
1926 const struct YuvConstants* yuvconstants, | |
1927 int width) { | |
1928 asm volatile ( | |
1929 "sub %[u_buf],%[v_buf] \n" | |
1930 "pcmpeqb %%xmm5,%%xmm5 \n" | |
1931 LABELALIGN | |
1932 "1: \n" | |
1933 READYUV422 | |
1934 YUVTORGB(yuvconstants) | |
1935 STOREABGR | |
1936 "sub $0x8,%[width] \n" | |
1937 "jg 1b \n" | |
1938 : [y_buf]"+r"(y_buf), // %[y_buf] | |
1939 [u_buf]"+r"(u_buf), // %[u_buf] | |
1940 [v_buf]"+r"(v_buf), // %[v_buf] | |
1941 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | |
1942 [width]"+rm"(width) // %[width] | |
1943 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | |
1944 : "memory", "cc", NACL_R14 | |
1945 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | |
1946 ); | |
1947 } | |
1948 | |
1949 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, | 1851 void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf, |
1950 const uint8* u_buf, | 1852 const uint8* u_buf, |
1951 const uint8* v_buf, | 1853 const uint8* v_buf, |
1952 uint8* dst_rgba, | 1854 uint8* dst_rgba, |
1953 const struct YuvConstants* yuvconstants, | 1855 const struct YuvConstants* yuvconstants, |
1954 int width) { | 1856 int width) { |
1955 asm volatile ( | 1857 asm volatile ( |
1956 "sub %[u_buf],%[v_buf] \n" | 1858 "sub %[u_buf],%[v_buf] \n" |
1957 "pcmpeqb %%xmm5,%%xmm5 \n" | 1859 "pcmpeqb %%xmm5,%%xmm5 \n" |
1958 LABELALIGN | 1860 LABELALIGN |
(...skipping 122 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2081 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" \ | 1983 "vpunpcklbw %%ymm1,%%ymm2,%%ymm1 \n" \ |
2082 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ | 1984 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ |
2083 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" \ | 1985 "vpunpcklbw %%ymm5,%%ymm0,%%ymm2 \n" \ |
2084 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ | 1986 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ |
2085 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" \ | 1987 "vpunpcklwd %%ymm2,%%ymm1,%%ymm0 \n" \ |
2086 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" \ | 1988 "vpunpckhwd %%ymm2,%%ymm1,%%ymm1 \n" \ |
2087 "vmovdqu %%ymm0," MEMACCESS([dst_abgr]) " \n" \ | 1989 "vmovdqu %%ymm0," MEMACCESS([dst_abgr]) " \n" \ |
2088 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ | 1990 "vmovdqu %%ymm1," MEMACCESS2(0x20, [dst_abgr]) " \n" \ |
2089 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" | 1991 "lea " MEMLEA(0x40, [dst_abgr]) ", %[dst_abgr] \n" |
2090 | 1992 |
2091 #if defined(HAS_I422TOBGRAROW_AVX2) | |
2092 // 16 pixels | |
2093 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | |
2094 void OMITFP I422ToBGRARow_AVX2(const uint8* y_buf, | |
2095 const uint8* u_buf, | |
2096 const uint8* v_buf, | |
2097 uint8* dst_bgra, | |
2098 const struct YuvConstants* yuvconstants, | |
2099 int width) { | |
2100 asm volatile ( | |
2101 "sub %[u_buf],%[v_buf] \n" | |
2102 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | |
2103 LABELALIGN | |
2104 "1: \n" | |
2105 READYUV422_AVX2 | |
2106 YUVTORGB_AVX2(yuvconstants) | |
2107 | |
2108 // Step 3: Weave into BGRA | |
2109 "vpunpcklbw %%ymm0,%%ymm1,%%ymm1 \n" // GB | |
2110 "vpermq $0xd8,%%ymm1,%%ymm1 \n" | |
2111 "vpunpcklbw %%ymm2,%%ymm5,%%ymm2 \n" // AR | |
2112 "vpermq $0xd8,%%ymm2,%%ymm2 \n" | |
2113 "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" // ARGB first 8 pixels | |
2114 "vpunpckhwd %%ymm1,%%ymm2,%%ymm2 \n" // ARGB next 8 pixels | |
2115 "vmovdqu %%ymm0," MEMACCESS([dst_bgra]) "\n" | |
2116 "vmovdqu %%ymm2," MEMACCESS2(0x20,[dst_bgra]) "\n" | |
2117 "lea " MEMLEA(0x40,[dst_bgra]) ",%[dst_bgra] \n" | |
2118 "sub $0x10,%[width] \n" | |
2119 "jg 1b \n" | |
2120 "vzeroupper \n" | |
2121 : [y_buf]"+r"(y_buf), // %[y_buf] | |
2122 [u_buf]"+r"(u_buf), // %[u_buf] | |
2123 [v_buf]"+r"(v_buf), // %[v_buf] | |
2124 [dst_bgra]"+r"(dst_bgra), // %[dst_bgra] | |
2125 [width]"+rm"(width) // %[width] | |
2126 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | |
2127 : "memory", "cc", NACL_R14 | |
2128 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | |
2129 ); | |
2130 } | |
2131 #endif // HAS_I422TOBGRAROW_AVX2 | |
2132 | |
2133 #if defined(HAS_I422TOARGBROW_AVX2) | 1993 #if defined(HAS_I422TOARGBROW_AVX2) |
2134 // 16 pixels | 1994 // 16 pixels |
2135 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 1995 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2136 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, | 1996 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, |
2137 const uint8* u_buf, | 1997 const uint8* u_buf, |
2138 const uint8* v_buf, | 1998 const uint8* v_buf, |
2139 uint8* dst_argb, | 1999 uint8* dst_argb, |
2140 const struct YuvConstants* yuvconstants, | 2000 const struct YuvConstants* yuvconstants, |
2141 int width) { | 2001 int width) { |
2142 asm volatile ( | 2002 asm volatile ( |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2192 #else | 2052 #else |
2193 [width]"+rm"(width) // %[width] | 2053 [width]"+rm"(width) // %[width] |
2194 #endif | 2054 #endif |
2195 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | 2055 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] |
2196 : "memory", "cc", NACL_R14 | 2056 : "memory", "cc", NACL_R14 |
2197 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | 2057 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
2198 ); | 2058 ); |
2199 } | 2059 } |
2200 #endif // HAS_I422ALPHATOARGBROW_AVX2 | 2060 #endif // HAS_I422ALPHATOARGBROW_AVX2 |
2201 | 2061 |
2202 #if defined(HAS_I422TOABGRROW_AVX2) | |
2203 // 16 pixels | |
2204 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | |
2205 void OMITFP I422ToABGRRow_AVX2(const uint8* y_buf, | |
2206 const uint8* u_buf, | |
2207 const uint8* v_buf, | |
2208 uint8* dst_abgr, | |
2209 const struct YuvConstants* yuvconstants, | |
2210 int width) { | |
2211 asm volatile ( | |
2212 "sub %[u_buf],%[v_buf] \n" | |
2213 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" | |
2214 LABELALIGN | |
2215 "1: \n" | |
2216 READYUV422_AVX2 | |
2217 YUVTORGB_AVX2(yuvconstants) | |
2218 STOREABGR_AVX2 | |
2219 "sub $0x10,%[width] \n" | |
2220 "jg 1b \n" | |
2221 "vzeroupper \n" | |
2222 : [y_buf]"+r"(y_buf), // %[y_buf] | |
2223 [u_buf]"+r"(u_buf), // %[u_buf] | |
2224 [v_buf]"+r"(v_buf), // %[v_buf] | |
2225 [dst_abgr]"+r"(dst_abgr), // %[dst_abgr] | |
2226 [width]"+rm"(width) // %[width] | |
2227 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] | |
2228 : "memory", "cc", NACL_R14 | |
2229 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | |
2230 ); | |
2231 } | |
2232 #endif // HAS_I422TOABGRROW_AVX2 | |
2233 | |
2234 #if defined(HAS_I422TORGBAROW_AVX2) | 2062 #if defined(HAS_I422TORGBAROW_AVX2) |
2235 // 16 pixels | 2063 // 16 pixels |
2236 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2064 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
2237 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, | 2065 void OMITFP I422ToRGBARow_AVX2(const uint8* y_buf, |
2238 const uint8* u_buf, | 2066 const uint8* u_buf, |
2239 const uint8* v_buf, | 2067 const uint8* v_buf, |
2240 uint8* dst_argb, | 2068 uint8* dst_argb, |
2241 const struct YuvConstants* yuvconstants, | 2069 const struct YuvConstants* yuvconstants, |
2242 int width) { | 2070 int width) { |
2243 asm volatile ( | 2071 asm volatile ( |
(...skipping 3248 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5492 ); | 5320 ); |
5493 } | 5321 } |
5494 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5322 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5495 | 5323 |
5496 #endif // defined(__x86_64__) || defined(__i386__) | 5324 #endif // defined(__x86_64__) || defined(__i386__) |
5497 | 5325 |
5498 #ifdef __cplusplus | 5326 #ifdef __cplusplus |
5499 } // extern "C" | 5327 } // extern "C" |
5500 } // namespace libyuv | 5328 } // namespace libyuv |
5501 #endif | 5329 #endif |
OLD | NEW |