Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(730)

Side by Side Diff: source/row_gcc.cc

Issue 1445893002: initial I444ToARGB avx2 code (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: enable HAS_I444TOARGBROW_AVX2 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/row.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1929 matching lines...) Expand 10 before | Expand all | Expand 10 after
1940 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] 1940 [dst_rgba]"+r"(dst_rgba), // %[dst_rgba]
1941 [width]"+rm"(width) // %[width] 1941 [width]"+rm"(width) // %[width]
1942 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1942 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1943 : "memory", "cc", NACL_R14 YUVTORGB_REGS 1943 : "memory", "cc", NACL_R14 YUVTORGB_REGS
1944 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1944 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1945 ); 1945 );
1946 } 1946 }
1947 1947
1948 #endif // HAS_I422TOARGBROW_SSSE3 1948 #endif // HAS_I422TOARGBROW_SSSE3
1949 1949
1950 // Read 16 UV from 444
1951 #define READYUV444_AVX2 \
1952 "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1953 MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \
1954 "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \
1955 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1956 "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
1957 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
1958 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1959 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
1960 "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
1961 "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
1962
1950 // Read 8 UV from 422, upsample to 16 UV. 1963 // Read 8 UV from 422, upsample to 16 UV.
1951 #define READYUV422_AVX2 \ 1964 #define READYUV422_AVX2 \
1952 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \ 1965 "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
1953 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \ 1966 MEMOPREG(vmovq, 0x00, [u_buf], [v_buf], 1, xmm1) \
1954 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \ 1967 "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
1955 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ 1968 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
1956 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 1969 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
1957 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ 1970 "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
1958 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \ 1971 "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
1959 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ 1972 "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
2072 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ 2085 "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
2073 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ 2086 "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
2074 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ 2087 "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \
2075 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ 2088 "vpermq $0xd8,%%ymm2,%%ymm2 \n" \
2076 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ 2089 "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \
2077 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ 2090 "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \
2078 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \ 2091 "vmovdqu %%ymm1," MEMACCESS([dst_argb]) " \n" \
2079 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \ 2092 "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \
2080 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n" 2093 "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n"
2081 2094
2095 #ifdef HAS_I444TOARGBROW_AVX2
2096 // 16 pixels
2097 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
2098 void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
2099 const uint8* u_buf,
2100 const uint8* v_buf,
2101 uint8* dst_argb,
2102 const struct YuvConstants* yuvconstants,
2103 int width) {
2104 asm volatile (
2105 YUVTORGB_SETUP_AVX2(yuvconstants)
2106 "sub %[u_buf],%[v_buf] \n"
2107 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2108 LABELALIGN
2109 "1: \n"
2110 READYUV444_AVX2
2111 YUVTORGB_AVX2(yuvconstants)
2112 STOREARGB_AVX2
2113 "sub $0x10,%[width] \n"
2114 "jg 1b \n"
2115 "vzeroupper \n"
2116 : [y_buf]"+r"(y_buf), // %[y_buf]
2117 [u_buf]"+r"(u_buf), // %[u_buf]
2118 [v_buf]"+r"(v_buf), // %[v_buf]
2119 [dst_argb]"+r"(dst_argb), // %[dst_argb]
2120 [width]"+rm"(width) // %[width]
2121 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
2122 : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
2123 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2124 );
2125 }
2126 #endif // HAS_I444TOARGBROW_AVX2
2127
2082 #if defined(HAS_I422TOARGBROW_AVX2) 2128 #if defined(HAS_I422TOARGBROW_AVX2)
2083 // 16 pixels 2129 // 16 pixels
2084 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2130 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2085 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf, 2131 void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
2086 const uint8* u_buf, 2132 const uint8* u_buf,
2087 const uint8* v_buf, 2133 const uint8* v_buf,
2088 uint8* dst_argb, 2134 uint8* dst_argb,
2089 const struct YuvConstants* yuvconstants, 2135 const struct YuvConstants* yuvconstants,
2090 int width) { 2136 int width) {
2091 asm volatile ( 2137 asm volatile (
2092 YUVTORGB_SETUP_AVX2(yuvconstants) 2138 YUVTORGB_SETUP_AVX2(yuvconstants)
2093 "sub %[u_buf],%[v_buf] \n" 2139 "sub %[u_buf],%[v_buf] \n"
2094 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" 2140 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
2095 LABELALIGN 2141 LABELALIGN
2096 "1: \n" 2142 "1: \n"
2097 READYUV422_AVX2 2143 READYUV422_AVX2
2098 YUVTORGB_AVX2(yuvconstants) 2144 YUVTORGB_AVX2(yuvconstants)
2099 STOREARGB_AVX2 2145 STOREARGB_AVX2
2100 "sub $0x10,%[width] \n" 2146 "sub $0x10,%[width] \n"
2101 "jg 1b \n" 2147 "jg 1b \n"
2102 "vzeroupper \n" 2148 "vzeroupper \n"
2103 : [y_buf]"+r"(y_buf), // %[y_buf] 2149 : [y_buf]"+r"(y_buf), // %[y_buf]
2104 [u_buf]"+r"(u_buf), // %[u_buf] 2150 [u_buf]"+r"(u_buf), // %[u_buf]
(...skipping 3311 matching lines...) Expand 10 before | Expand all | Expand 10 after
5416 ); 5462 );
5417 } 5463 }
5418 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5464 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5419 5465
5420 #endif // defined(__x86_64__) || defined(__i386__) 5466 #endif // defined(__x86_64__) || defined(__i386__)
5421 5467
5422 #ifdef __cplusplus 5468 #ifdef __cplusplus
5423 } // extern "C" 5469 } // extern "C"
5424 } // namespace libyuv 5470 } // namespace libyuv
5425 #endif 5471 #endif
OLDNEW
« no previous file with comments | « include/libyuv/row.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698