Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(588)

Side by Side Diff: source/row_gcc.cc

Issue 1687253002: Port I411ToARGBRow to AVX2. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1413 matching lines...) Expand 10 before | Expand all | Expand 10 after
1424 "m"(kRGBAToV), // %5 1424 "m"(kRGBAToV), // %5
1425 "m"(kRGBAToU), // %6 1425 "m"(kRGBAToU), // %6
1426 "m"(kAddUV128) // %7 1426 "m"(kAddUV128) // %7
1427 : "memory", "cc", NACL_R14 1427 : "memory", "cc", NACL_R14
1428 "xmm0", "xmm1", "xmm2", "xmm6", "xmm7" 1428 "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"
1429 ); 1429 );
1430 } 1430 }
1431 1431
1432 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) 1432 #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
1433 1433
1434 // Read 8 UV from 411 1434 // Read 8 UV from 444
1435 #define READYUV444 \ 1435 #define READYUV444 \
1436 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1436 "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1437 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1437 MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
1438 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ 1438 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
1439 "punpcklbw %%xmm1,%%xmm0 \n" \ 1439 "punpcklbw %%xmm1,%%xmm0 \n" \
1440 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1440 "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1441 "punpcklbw %%xmm4,%%xmm4 \n" \ 1441 "punpcklbw %%xmm4,%%xmm4 \n" \
1442 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" 1442 "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
1443 1443
1444 // Read 4 UV from 422, upsample to 8 UV 1444 // Read 4 UV from 422, upsample to 8 UV
(...skipping 500 matching lines...) Expand 10 before | Expand all | Expand 10 after
1945 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 1945 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1946 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ 1946 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
1947 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1947 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1948 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ 1948 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
1949 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ 1949 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
1950 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \ 1950 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" \
1951 "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \ 1951 "vmovdqu " MEMACCESS([a_buf]) ",%%xmm5 \n" \
1952 "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ 1952 "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
1953 "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n" 1953 "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
1954 1954
1955 // Read 4 UV from 411, upsample to 16 UV.
1956 #define READYUV411_AVX2 \
1957 "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1958 MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
1959 "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
1960 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
1961 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
1962 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1963 "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
1964 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1965 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
1966 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
1967 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
1968
1955 // Read 8 UV from NV12, upsample to 16 UV. 1969 // Read 8 UV from NV12, upsample to 16 UV.
1956 #define READNV12_AVX2 \ 1970 #define READNV12_AVX2 \
1957 "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \ 1971 "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
1958 "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \ 1972 "lea " MEMLEA(0x10, [uv_buf]) ",%[uv_buf] \n" \
1959 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 1973 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1960 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ 1974 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
1961 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1975 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1962 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ 1976 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
1963 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ 1977 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
1964 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n" 1978 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
(...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after
2060 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). 2074 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
2061 void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf, 2075 void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
2062 const uint8* u_buf, 2076 const uint8* u_buf,
2063 const uint8* v_buf, 2077 const uint8* v_buf,
2064 uint8* dst_argb, 2078 uint8* dst_argb,
2065 const struct YuvConstants* yuvconstants, 2079 const struct YuvConstants* yuvconstants,
2066 int width) { 2080 int width) {
2067 asm volatile ( 2081 asm volatile (
2068 YUVTORGB_SETUP_AVX2(yuvconstants) 2082 YUVTORGB_SETUP_AVX2(yuvconstants)
2069 "sub %[u_buf],%[v_buf] \n" 2083 "sub %[u_buf],%[v_buf] \n"
2070 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2084 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2071 LABELALIGN 2085 LABELALIGN
2072 "1: \n" 2086 "1: \n"
2073 READYUV444_AVX2 2087 READYUV444_AVX2
2074 YUVTORGB_AVX2(yuvconstants) 2088 YUVTORGB_AVX2(yuvconstants)
2075 STOREARGB_AVX2 2089 STOREARGB_AVX2
2076 "sub $0x10,%[width] \n" 2090 "sub $0x10,%[width] \n"
2077 "jg 1b \n" 2091 "jg 1b \n"
2078 "vzeroupper \n" 2092 "vzeroupper \n"
2079 : [y_buf]"+r"(y_buf), // %[y_buf] 2093 : [y_buf]"+r"(y_buf), // %[y_buf]
2080 [u_buf]"+r"(u_buf), // %[u_buf] 2094 [u_buf]"+r"(u_buf), // %[u_buf]
2081 [v_buf]"+r"(v_buf), // %[v_buf] 2095 [v_buf]"+r"(v_buf), // %[v_buf]
2082 [dst_argb]"+r"(dst_argb), // %[dst_argb] 2096 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2083 [width]"+rm"(width) // %[width] 2097 [width]"+rm"(width) // %[width]
2084 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 2098 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2085 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2 2099 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
2086 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 2100 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2087 ); 2101 );
2088 } 2102 }
2089 #endif // HAS_I444TOARGBROW_AVX2 2103 #endif // HAS_I444TOARGBROW_AVX2
2090 2104
2105 #ifdef HAS_I411TOARGBROW_AVX2
2106 // 16 pixels
2107 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2108 void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
2109 const uint8* u_buf,
2110 const uint8* v_buf,
2111 uint8* dst_argb,
2112 const struct YuvConstants* yuvconstants,
2113 int width) {
2114 asm volatile (
2115 YUVTORGB_SETUP_AVX2(yuvconstants)
2116 "sub %[u_buf],%[v_buf] \n"
2117 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2118 LABELALIGN
2119 "1: \n"
2120 READYUV411_AVX2
2121 YUVTORGB_AVX2(yuvconstants)
2122 STOREARGB_AVX2
2123 "sub $0x10,%[width] \n"
2124 "jg 1b \n"
2125 "vzeroupper \n"
2126 : [y_buf]"+r"(y_buf), // %[y_buf]
2127 [u_buf]"+r"(u_buf), // %[u_buf]
2128 [v_buf]"+r"(v_buf), // %[v_buf]
2129 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2130 [width]"+rm"(width) // %[width]
2131 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2132 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
2133 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2134 );
2135 }
2136 #endif // HAS_I411TOARGBROW_AVX2
2137
2091 #if defined(HAS_I422TOARGBROW_AVX2) 2138 #if defined(HAS_I422TOARGBROW_AVX2)
2092 // 16 pixels 2139 // 16 pixels
2093 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2140 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2094 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, 2141 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
2095 const uint8* u_buf, 2142 const uint8* u_buf,
2096 const uint8* v_buf, 2143 const uint8* v_buf,
2097 uint8* dst_argb, 2144 uint8* dst_argb,
2098 const struct YuvConstants* yuvconstants, 2145 const struct YuvConstants* yuvconstants,
2099 int width) { 2146 int width) {
2100 asm volatile ( 2147 asm volatile (
2101 YUVTORGB_SETUP_AVX2(yuvconstants) 2148 YUVTORGB_SETUP_AVX2(yuvconstants)
2102 "sub %[u_buf],%[v_buf] \n" 2149 "sub %[u_buf],%[v_buf] \n"
2103 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2150 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2104 LABELALIGN 2151 LABELALIGN
2105 "1: \n" 2152 "1: \n"
2106 READYUV422_AVX2 2153 READYUV422_AVX2
2107 YUVTORGB_AVX2(yuvconstants) 2154 YUVTORGB_AVX2(yuvconstants)
2108 STOREARGB_AVX2 2155 STOREARGB_AVX2
2109 "sub $0x10,%[width] \n" 2156 "sub $0x10,%[width] \n"
2110 "jg 1b \n" 2157 "jg 1b \n"
2111 "vzeroupper \n" 2158 "vzeroupper \n"
2112 : [y_buf]"+r"(y_buf), // %[y_buf] 2159 : [y_buf]"+r"(y_buf), // %[y_buf]
2113 [u_buf]"+r"(u_buf), // %[u_buf] 2160 [u_buf]"+r"(u_buf), // %[u_buf]
(...skipping 3276 matching lines...) Expand 10 before | Expand all | Expand 10 after
5390 ); 5437 );
5391 } 5438 }
5392 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5439 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5393 5440
5394 #endif // defined(__x86_64__) || defined(__i386__) 5441 #endif // defined(__x86_64__) || defined(__i386__)
5395 5442
5396 #ifdef __cplusplus 5443 #ifdef __cplusplus
5397 } // extern "C" 5444 } // extern "C"
5398 } // namespace libyuv 5445 } // namespace libyuv
5399 #endif 5446 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698