Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(455)

Side by Side Diff: source/row_gcc.cc

Issue 2406123002: Remove I411 support, update doc and switch to side by side test (Closed)
Patch Set: bump version, disable a few lint warnings Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1508 matching lines...) Expand 10 before | Expand all | Expand 10 after
1519 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1519 MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
1520 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \ 1520 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
1521 "punpcklbw %%xmm1,%%xmm0 \n" \ 1521 "punpcklbw %%xmm1,%%xmm0 \n" \
1522 "punpcklwd %%xmm0,%%xmm0 \n" \ 1522 "punpcklwd %%xmm0,%%xmm0 \n" \
1523 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1523 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1524 "punpcklbw %%xmm4,%%xmm4 \n" \ 1524 "punpcklbw %%xmm4,%%xmm4 \n" \
1525 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \ 1525 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
1526 "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \ 1526 "movq " MEMACCESS([a_buf]) ",%%xmm5 \n" \
1527 "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n" 1527 "lea " MEMLEA(0x8, [a_buf]) ",%[a_buf] \n"
1528 1528
1529 // Read 2 UV from 411, upsample to 8 UV.
1530 // reading 4 bytes is an msan violation.
1531 // "movd " MEMACCESS([u_buf]) ",%%xmm0 \n"
1532 // MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1)
1533 // pinsrw fails with drmemory
1534 // __asm pinsrw xmm0, [esi], 0 /* U */
1535 // __asm pinsrw xmm1, [esi + edi], 0 /* V */
1536 #define READYUV411_TEMP \
1537 "movzwl " MEMACCESS([u_buf]) ",%[temp] \n" \
1538 "movd %[temp],%%xmm0 \n" \
1539 MEMOPARG(movzwl, 0x00, [u_buf], [v_buf], 1, [temp]) " \n" \
1540 "movd %[temp],%%xmm1 \n" \
1541 "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
1542 "punpcklbw %%xmm1,%%xmm0 \n" \
1543 "punpcklwd %%xmm0,%%xmm0 \n" \
1544 "punpckldq %%xmm0,%%xmm0 \n" \
1545 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1546 "punpcklbw %%xmm4,%%xmm4 \n" \
1547 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1548
1549 // Read 4 UV from NV12, upsample to 8 UV 1529 // Read 4 UV from NV12, upsample to 8 UV
1550 #define READNV12 \ 1530 #define READNV12 \
1551 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1531 "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
1552 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \ 1532 "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
1553 "punpcklwd %%xmm0,%%xmm0 \n" \ 1533 "punpcklwd %%xmm0,%%xmm0 \n" \
1554 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1534 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1555 "punpcklbw %%xmm4,%%xmm4 \n" \ 1535 "punpcklbw %%xmm4,%%xmm4 \n" \
1556 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" 1536 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1557 1537
1558 // Read 4 VU from NV21, upsample to 8 UV 1538 // Read 4 VU from NV21, upsample to 8 UV
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
1797 #else 1777 #else
1798 [width]"+rm"(width) // %[width] 1778 [width]"+rm"(width) // %[width]
1799 #endif 1779 #endif
1800 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1780 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1801 : "memory", "cc", NACL_R14 YUVTORGB_REGS 1781 : "memory", "cc", NACL_R14 YUVTORGB_REGS
1802 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1782 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1803 ); 1783 );
1804 } 1784 }
1805 #endif // HAS_I422ALPHATOARGBROW_SSSE3 1785 #endif // HAS_I422ALPHATOARGBROW_SSSE3
1806 1786
1807 #ifdef HAS_I411TOARGBROW_SSSE3
1808 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
1809 const uint8* u_buf,
1810 const uint8* v_buf,
1811 uint8* dst_argb,
1812 const struct YuvConstants* yuvconstants,
1813 int width) {
1814 int temp;
1815 asm volatile (
1816 YUVTORGB_SETUP(yuvconstants)
1817 "sub %[u_buf],%[v_buf] \n"
1818 "pcmpeqb %%xmm5,%%xmm5 \n"
1819 LABELALIGN
1820 "1: \n"
1821 READYUV411_TEMP
1822 YUVTORGB(yuvconstants)
1823 STOREARGB
1824 "subl $0x8,%[width] \n"
1825 "jg 1b \n"
1826 : [y_buf]"+r"(y_buf), // %[y_buf]
1827 [u_buf]"+r"(u_buf), // %[u_buf]
1828 [v_buf]"+r"(v_buf), // %[v_buf]
1829 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1830 [temp]"=&r"(temp), // %[temp]
1831 #if defined(__i386__) && defined(__pic__)
1832 [width]"+m"(width) // %[width]
1833 #else
1834 [width]"+rm"(width) // %[width]
1835 #endif
1836 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1837 : "memory", "cc", NACL_R14 YUVTORGB_REGS
1838 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1839 );
1840 }
1841 #endif
1842
1843 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, 1787 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
1844 const uint8* uv_buf, 1788 const uint8* uv_buf,
1845 uint8* dst_argb, 1789 uint8* dst_argb,
1846 const struct YuvConstants* yuvconstants, 1790 const struct YuvConstants* yuvconstants,
1847 int width) { 1791 int width) {
1848 asm volatile ( 1792 asm volatile (
1849 YUVTORGB_SETUP(yuvconstants) 1793 YUVTORGB_SETUP(yuvconstants)
1850 "pcmpeqb %%xmm5,%%xmm5 \n" 1794 "pcmpeqb %%xmm5,%%xmm5 \n"
1851 LABELALIGN 1795 LABELALIGN
1852 "1: \n" 1796 "1: \n"
(...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after
2006 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 1950 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
2007 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ 1951 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
2008 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1952 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
2009 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ 1953 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
2010 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ 1954 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
2011 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \ 1955 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
2012 "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \ 1956 "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \
2013 "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ 1957 "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
2014 "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" 1958 "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
2015 1959
2016 // Read 4 UV from 411, upsample to 16 UV.
2017 #define READYUV411_AVX2 \
2018 "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
2019 MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
2020 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
2021 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
2022 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
2023 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
2024 "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
2025 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
2026 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
2027 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
2028 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
2029
2030 // Read 8 UV from NV12, upsample to 16 UV. 1960 // Read 8 UV from NV12, upsample to 16 UV.
2031 #define READNV12_AVX2 \ 1961 #define READNV12_AVX2 \
2032 "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1962 "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
2033 "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \ 1963 "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \
2034 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 1964 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
2035 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ 1965 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
2036 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1966 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
2037 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ 1967 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
2038 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ 1968 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
2039 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" 1969 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
2156 [v_buf]"+r"(v_buf), // %[v_buf] 2086 [v_buf]"+r"(v_buf), // %[v_buf]
2157 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2087 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2158 [width]"+rm"(width) // %[width] 2088 [width]"+rm"(width) // %[width]
2159 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2089 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2160 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 2090 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
2161 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2091 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2162 ); 2092 );
2163 } 2093 }
2164 #endif // HAS_I444TOARGBROW_AVX2 2094 #endif // HAS_I444TOARGBROW_AVX2
2165 2095
2166 #ifdef HAS_I411TOARGBROW_AVX2
2167 // 16 pixels
2168 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2169 void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
2170 const uint8* u_buf,
2171 const uint8* v_buf,
2172 uint8* dst_argb,
2173 const struct YuvConstants* yuvconstants,
2174 int width) {
2175 asm volatile (
2176 YUVTORGB_SETUP_AVX2(yuvconstants)
2177 "sub %[u_buf],%[v_buf] \n"
2178 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2179 LABELALIGN
2180 "1: \n"
2181 READYUV411_AVX2
2182 YUVTORGB_AVX2(yuvconstants)
2183 STOREARGB_AVX2
2184 "sub $0x10,%[width] \n"
2185 "jg 1b \n"
2186 "vzeroupper \n"
2187 : [y_buf]"+r"(y_buf), // %[y_buf]
2188 [u_buf]"+r"(u_buf), // %[u_buf]
2189 [v_buf]"+r"(v_buf), // %[v_buf]
2190 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2191 [width]"+rm"(width) // %[width]
2192 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2193 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
2194 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2195 );
2196 }
2197 #endif // HAS_I411TOARGBROW_AVX2
2198
2199 #if defined(HAS_I422TOARGBROW_AVX2) 2096 #if defined(HAS_I422TOARGBROW_AVX2)
2200 // 16 pixels 2097 // 16 pixels
2201 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2098 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2202 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, 2099 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
2203 const uint8* u_buf, 2100 const uint8* u_buf,
2204 const uint8* v_buf, 2101 const uint8* v_buf,
2205 uint8* dst_argb, 2102 uint8* dst_argb,
2206 const struct YuvConstants* yuvconstants, 2103 const struct YuvConstants* yuvconstants,
2207 int width) { 2104 int width) {
2208 asm volatile ( 2105 asm volatile (
(...skipping 3385 matching lines...) Expand 10 before | Expand all | Expand 10 after
5594 ); 5491 );
5595 } 5492 }
5596 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5493 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5597 5494
5598 #endif // defined(__x86_64__) || defined(__i386__) 5495 #endif // defined(__x86_64__) || defined(__i386__)
5599 5496
5600 #ifdef __cplusplus 5497 #ifdef __cplusplus
5601 } // extern "C" 5498 } // extern "C"
5602 } // namespace libyuv 5499 } // namespace libyuv
5603 #endif 5500 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698