Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(36)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2801183002: [WASM SIMD] Implement primitive shuffles. (Closed)
Patch Set: Add comments, remove S64x2Reverse, as it's redundant. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 2010 matching lines...) Expand 10 before | Expand all | Expand 10 after
2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); 2021 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2022 break; 2022 break;
2023 } 2023 }
2024 case kArmS128Select: { 2024 case kArmS128Select: {
2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. 2025 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst.
2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); 2026 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0)));
2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), 2027 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1),
2028 i.InputSimd128Register(2)); 2028 i.InputSimd128Register(2));
2029 break; 2029 break;
2030 } 2030 }
2031 case kArmS32x4ZipLeft: {
2032 Simd128Register dst = i.OutputSimd128Register(),
2033 src1 = i.InputSimd128Register(1);
2034 DCHECK(dst.is(i.InputSimd128Register(0)));
2035 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2036 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
2037 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
2038 break;
2039 }
2040 case kArmS32x4ZipRight: {
2041 Simd128Register dst = i.OutputSimd128Register(),
2042 src1 = i.InputSimd128Register(1);
2043 DCHECK(dst.is(i.InputSimd128Register(0)));
2044 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2045 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
2046 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
2047 break;
2048 }
2049 case kArmS32x4UnzipLeft: {
martyn.capewell 2017/04/13 17:07:19 I think the unzip cases can still be replaced with
2050 Simd128Register dst = i.OutputSimd128Register(),
2051 src1 = i.InputSimd128Register(1);
2052 DCHECK(dst.is(i.InputSimd128Register(0)));
2053 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2054 __ vmov(kScratchQuadReg, src1);
2055 __ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6]
2056 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2057 break;
2058 }
2059 case kArmS32x4UnzipRight: {
martyn.capewell 2017/04/13 17:07:19 if (dst.is(src0) && dst.is(src1)) { __ vshrn(dst
2060 Simd128Register dst = i.OutputSimd128Register(),
2061 src1 = i.InputSimd128Register(1);
2062 DCHECK(dst.is(i.InputSimd128Register(0)));
2063 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2064 __ vmov(kScratchQuadReg, src1);
2065 __ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7]
2066 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2067 break;
2068 }
2069 case kArmS32x4TransposeLeft: {
2070 Simd128Register dst = i.OutputSimd128Register(),
2071 src1 = i.InputSimd128Register(1);
2072 DCHECK(dst.is(i.InputSimd128Register(0)));
2073 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2074 __ vmov(kScratchQuadReg, src1);
2075 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6]
2076 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2077 break;
2078 }
2079 case kArmS32x4TransposeRight: {
2080 Simd128Register dst = i.OutputSimd128Register(),
2081 src1 = i.InputSimd128Register(1);
2082 DCHECK(dst.is(i.InputSimd128Register(0)));
2083 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2084 __ vmov(kScratchQuadReg, src1);
2085 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7]
2086 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2087 break;
2088 }
2089 case kArmS16x8ZipLeft: {
2090 Simd128Register dst = i.OutputSimd128Register(),
2091 src1 = i.InputSimd128Register(1);
2092 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2093 DCHECK(dst.is(i.InputSimd128Register(0)));
2094 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
2095 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
2096 break;
2097 }
2098 case kArmS16x8ZipRight: {
2099 Simd128Register dst = i.OutputSimd128Register(),
2100 src1 = i.InputSimd128Register(1);
2101 DCHECK(dst.is(i.InputSimd128Register(0)));
2102 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2103 __ vmov(dst.low(), src1.high());
2104 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
2105 break;
2106 }
2107 case kArmS16x8UnzipLeft: {
2108 Simd128Register dst = i.OutputSimd128Register(),
2109 src1 = i.InputSimd128Register(1);
2110 DCHECK(dst.is(i.InputSimd128Register(0)));
2111 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2112 __ vmov(kScratchQuadReg, src1);
2113 __ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14]
2114 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2115 break;
2116 }
2117 case kArmS16x8UnzipRight: {
2118 Simd128Register dst = i.OutputSimd128Register(),
2119 src1 = i.InputSimd128Register(1);
2120 DCHECK(dst.is(i.InputSimd128Register(0)));
2121 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2122 __ vmov(kScratchQuadReg, src1);
2123 __ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15]
2124 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2125 break;
2126 }
2127 case kArmS16x8TransposeLeft: {
2128 Simd128Register dst = i.OutputSimd128Register(),
2129 src1 = i.InputSimd128Register(1);
2130 DCHECK(dst.is(i.InputSimd128Register(0)));
2131 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2132 __ vmov(kScratchQuadReg, src1);
2133 __ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14]
2134 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2135 break;
2136 }
2137 case kArmS16x8TransposeRight: {
2138 Simd128Register dst = i.OutputSimd128Register(),
2139 src1 = i.InputSimd128Register(1);
2140 DCHECK(dst.is(i.InputSimd128Register(0)));
2141 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2142 __ vmov(kScratchQuadReg, src1);
2143 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
2144 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2145 break;
2146 }
2147 case kArmS8x16ZipLeft: {
2148 Simd128Register dst = i.OutputSimd128Register(),
2149 src1 = i.InputSimd128Register(1);
2150 DCHECK(dst.is(i.InputSimd128Register(0)));
2151 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2152 __ vmov(dst.high(), src1.low());
2153 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
2154 break;
2155 }
2156 case kArmS8x16ZipRight: {
2157 Simd128Register dst = i.OutputSimd128Register(),
2158 src1 = i.InputSimd128Register(1);
2159 DCHECK(dst.is(i.InputSimd128Register(0)));
2160 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2161 __ vmov(dst.low(), src1.high());
2162 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
2163 break;
2164 }
2165 case kArmS8x16UnzipLeft: {
2166 Simd128Register dst = i.OutputSimd128Register(),
2167 src1 = i.InputSimd128Register(1);
2168 DCHECK(dst.is(i.InputSimd128Register(0)));
2169 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2170 __ vmov(kScratchQuadReg, src1);
2171 __ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30]
2172 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2173 break;
2174 }
2175 case kArmS8x16UnzipRight: {
2176 Simd128Register dst = i.OutputSimd128Register(),
2177 src1 = i.InputSimd128Register(1);
2178 DCHECK(dst.is(i.InputSimd128Register(0)));
2179 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2180 __ vmov(kScratchQuadReg, src1);
2181 __ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31]
2182 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2183 break;
2184 }
2185 case kArmS8x16TransposeLeft: {
2186 Simd128Register dst = i.OutputSimd128Register(),
2187 src1 = i.InputSimd128Register(1);
2188 DCHECK(dst.is(i.InputSimd128Register(0)));
2189 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2190 __ vmov(kScratchQuadReg, src1);
2191 __ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30]
2192 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2193 break;
2194 }
2195 case kArmS8x16TransposeRight: {
2196 Simd128Register dst = i.OutputSimd128Register(),
2197 src1 = i.InputSimd128Register(1);
2198 DCHECK(dst.is(i.InputSimd128Register(0)));
2199 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2200 __ vmov(kScratchQuadReg, src1);
2201 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31]
2202 __ veor(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero);
2203 break;
2204 }
2205 case kArmS8x16Concat: {
2206 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2207 i.InputSimd128Register(1), i.InputInt4(2));
2208 break;
2209 }
2210 case kArmS32x2Reverse: {
2211 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2212 break;
2213 }
2214 case kArmS16x4Reverse: {
2215 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2216 break;
2217 }
2218 case kArmS16x2Reverse: {
2219 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2220 break;
2221 }
2222 case kArmS8x8Reverse: {
2223 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2224 break;
2225 }
2226 case kArmS8x4Reverse: {
2227 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2228 break;
2229 }
2230 case kArmS8x2Reverse: {
2231 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2232 break;
2233 }
2031 case kArmS1x4AnyTrue: { 2234 case kArmS1x4AnyTrue: {
2032 const QwNeonRegister& src = i.InputSimd128Register(0); 2235 const QwNeonRegister& src = i.InputSimd128Register(0);
2033 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); 2236 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
2034 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, 2237 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
2035 kScratchDoubleReg); 2238 kScratchDoubleReg);
2036 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); 2239 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
2037 break; 2240 break;
2038 } 2241 }
2039 case kArmS1x4AllTrue: { 2242 case kArmS1x4AllTrue: {
2040 const QwNeonRegister& src = i.InputSimd128Register(0); 2243 const QwNeonRegister& src = i.InputSimd128Register(0);
(...skipping 790 matching lines...) Expand 10 before | Expand all | Expand 10 after
2831 padding_size -= v8::internal::Assembler::kInstrSize; 3034 padding_size -= v8::internal::Assembler::kInstrSize;
2832 } 3035 }
2833 } 3036 }
2834 } 3037 }
2835 3038
2836 #undef __ 3039 #undef __
2837 3040
2838 } // namespace compiler 3041 } // namespace compiler
2839 } // namespace internal 3042 } // namespace internal
2840 } // namespace v8 3043 } // namespace v8
OLDNEW
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698