Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(862)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2801183002: [WASM SIMD] Implement primitive shuffles. (Closed)
Patch Set: Fix non-ARM build. Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 2138 matching lines...) Expand 10 before | Expand all | Expand 10 after
2149 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); 2149 __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2150 break; 2150 break;
2151 } 2151 }
2152 case kArmS128Select: { 2152 case kArmS128Select: {
2153 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. 2153 // vbsl clobbers the mask input so make sure it was DefineSameAsFirst.
2154 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); 2154 DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0)));
2155 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), 2155 __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1),
2156 i.InputSimd128Register(2)); 2156 i.InputSimd128Register(2));
2157 break; 2157 break;
2158 } 2158 }
2159 case kArmS32x4ZipLeft: {
2160 Simd128Register dst = i.OutputSimd128Register(),
2161 src1 = i.InputSimd128Register(1);
2162 DCHECK(dst.is(i.InputSimd128Register(0)));
2163 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2164 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
2165 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
2166 break;
2167 }
2168 case kArmS32x4ZipRight: {
2169 Simd128Register dst = i.OutputSimd128Register(),
2170 src1 = i.InputSimd128Register(1);
2171 DCHECK(dst.is(i.InputSimd128Register(0)));
2172 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2173 __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
2174 __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
2175 break;
2176 }
2177 case kArmS32x4UnzipLeft: {
2178 Simd128Register dst = i.OutputSimd128Register(),
2179 src1 = i.InputSimd128Register(1);
2180 DCHECK(dst.is(i.InputSimd128Register(0)));
2181 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2182 __ vmov(kScratchQuadReg, src1);
2183 __ vuzp(Neon32, dst, kScratchQuadReg); // dst = [0, 2, 4, 6]
2184 break;
2185 }
2186 case kArmS32x4UnzipRight: {
2187 Simd128Register dst = i.OutputSimd128Register(),
2188 src1 = i.InputSimd128Register(1);
2189 DCHECK(dst.is(i.InputSimd128Register(0)));
2190 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2191 __ vmov(kScratchQuadReg, src1);
2192 __ vuzp(Neon32, kScratchQuadReg, dst); // dst = [1, 3, 5, 7]
2193 break;
2194 }
2195 case kArmS32x4TransposeLeft: {
2196 Simd128Register dst = i.OutputSimd128Register(),
2197 src1 = i.InputSimd128Register(1);
2198 DCHECK(dst.is(i.InputSimd128Register(0)));
2199 // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2200 __ vmov(kScratchQuadReg, src1);
2201 __ vtrn(Neon32, dst, kScratchQuadReg); // dst = [0, 4, 2, 6]
2202 break;
2203 }
2204 case kArmS32x4TransposeRight: {
2205 Simd128Register dst = i.OutputSimd128Register(),
2206 src1 = i.InputSimd128Register(1);
2207 DCHECK(dst.is(i.InputSimd128Register(0)));
2208 // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2209 __ vmov(kScratchQuadReg, src1);
2210 __ vtrn(Neon32, kScratchQuadReg, dst); // dst = [1, 5, 3, 7]
2211 break;
2212 }
2213 case kArmS16x8ZipLeft: {
2214 Simd128Register dst = i.OutputSimd128Register(),
2215 src1 = i.InputSimd128Register(1);
2216 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2217 DCHECK(dst.is(i.InputSimd128Register(0)));
2218 __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
2219 __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
2220 break;
2221 }
2222 case kArmS16x8ZipRight: {
2223 Simd128Register dst = i.OutputSimd128Register(),
2224 src1 = i.InputSimd128Register(1);
2225 DCHECK(dst.is(i.InputSimd128Register(0)));
2226 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2227 __ vmov(dst.low(), src1.high());
2228 __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
2229 break;
2230 }
2231 case kArmS16x8UnzipLeft: {
2232 Simd128Register dst = i.OutputSimd128Register(),
2233 src1 = i.InputSimd128Register(1);
2234 DCHECK(dst.is(i.InputSimd128Register(0)));
2235 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2236 __ vmov(kScratchQuadReg, src1);
2237 __ vuzp(Neon16, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 14]
2238 break;
2239 }
2240 case kArmS16x8UnzipRight: {
2241 Simd128Register dst = i.OutputSimd128Register(),
2242 src1 = i.InputSimd128Register(1);
2243 DCHECK(dst.is(i.InputSimd128Register(0)));
2244 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2245 __ vmov(kScratchQuadReg, src1);
2246 __ vuzp(Neon16, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 15]
2247 break;
2248 }
2249 case kArmS16x8TransposeLeft: {
2250 Simd128Register dst = i.OutputSimd128Register(),
2251 src1 = i.InputSimd128Register(1);
2252 DCHECK(dst.is(i.InputSimd128Register(0)));
2253 // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2254 __ vmov(kScratchQuadReg, src1);
2255 __ vtrn(Neon16, dst, kScratchQuadReg); // dst = [0, 8, 2, 10, ... 14]
2256 break;
2257 }
2258 case kArmS16x8TransposeRight: {
2259 Simd128Register dst = i.OutputSimd128Register(),
2260 src1 = i.InputSimd128Register(1);
2261 DCHECK(dst.is(i.InputSimd128Register(0)));
2262 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2263 __ vmov(kScratchQuadReg, src1);
2264 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
2265 break;
2266 }
2267 case kArmS8x16ZipLeft: {
2268 Simd128Register dst = i.OutputSimd128Register(),
2269 src1 = i.InputSimd128Register(1);
2270 DCHECK(dst.is(i.InputSimd128Register(0)));
2271 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2272 __ vmov(dst.high(), src1.low());
2273 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
2274 break;
2275 }
2276 case kArmS8x16ZipRight: {
2277 Simd128Register dst = i.OutputSimd128Register(),
2278 src1 = i.InputSimd128Register(1);
2279 DCHECK(dst.is(i.InputSimd128Register(0)));
2280 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2281 __ vmov(dst.low(), src1.high());
2282 __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
2283 break;
2284 }
2285 case kArmS8x16UnzipLeft: {
2286 Simd128Register dst = i.OutputSimd128Register(),
2287 src1 = i.InputSimd128Register(1);
2288 DCHECK(dst.is(i.InputSimd128Register(0)));
2289 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2290 __ vmov(kScratchQuadReg, src1);
2291 __ vuzp(Neon8, dst, kScratchQuadReg); // dst = [0, 2, 4, 6, ... 30]
2292 break;
2293 }
2294 case kArmS8x16UnzipRight: {
2295 Simd128Register dst = i.OutputSimd128Register(),
2296 src1 = i.InputSimd128Register(1);
2297 DCHECK(dst.is(i.InputSimd128Register(0)));
2298 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2299 __ vmov(kScratchQuadReg, src1);
2300 __ vuzp(Neon8, kScratchQuadReg, dst); // dst = [1, 3, 5, 7, ... 31]
2301 break;
2302 }
2303 case kArmS8x16TransposeLeft: {
2304 Simd128Register dst = i.OutputSimd128Register(),
2305 src1 = i.InputSimd128Register(1);
2306 DCHECK(dst.is(i.InputSimd128Register(0)));
2307 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2308 __ vmov(kScratchQuadReg, src1);
2309 __ vtrn(Neon8, dst, kScratchQuadReg); // dst = [0, 16, 2, 18, ... 30]
2310 break;
2311 }
2312 case kArmS8x16TransposeRight: {
2313 Simd128Register dst = i.OutputSimd128Register(),
2314 src1 = i.InputSimd128Register(1);
2315 DCHECK(dst.is(i.InputSimd128Register(0)));
2316 // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2317 __ vmov(kScratchQuadReg, src1);
2318 __ vtrn(Neon8, kScratchQuadReg, dst); // dst = [1, 17, 3, 19, ... 31]
2319 break;
2320 }
2321 case kArmS8x16Concat: {
2322 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2323 i.InputSimd128Register(1), i.InputInt4(2));
2324 break;
2325 }
2326 case kArmS32x2Reverse: {
2327 __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2328 break;
2329 }
2330 case kArmS16x4Reverse: {
2331 __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2332 break;
2333 }
2334 case kArmS16x2Reverse: {
2335 __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2336 break;
2337 }
2338 case kArmS8x8Reverse: {
2339 __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2340 break;
2341 }
2342 case kArmS8x4Reverse: {
2343 __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2344 break;
2345 }
2346 case kArmS8x2Reverse: {
2347 __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2348 break;
2349 }
2159 case kArmS1x4AnyTrue: { 2350 case kArmS1x4AnyTrue: {
2160 const QwNeonRegister& src = i.InputSimd128Register(0); 2351 const QwNeonRegister& src = i.InputSimd128Register(0);
2161 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); 2352 __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
2162 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, 2353 __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
2163 kScratchDoubleReg); 2354 kScratchDoubleReg);
2164 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0); 2355 __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
2165 break; 2356 break;
2166 } 2357 }
2167 case kArmS1x4AllTrue: { 2358 case kArmS1x4AllTrue: {
2168 const QwNeonRegister& src = i.InputSimd128Register(0); 2359 const QwNeonRegister& src = i.InputSimd128Register(0);
(...skipping 802 matching lines...) Expand 10 before | Expand all | Expand 10 after
2971 padding_size -= v8::internal::Assembler::kInstrSize; 3162 padding_size -= v8::internal::Assembler::kInstrSize;
2972 } 3163 }
2973 } 3164 }
2974 } 3165 }
2975 3166
2976 #undef __ 3167 #undef __
2977 3168
2978 } // namespace compiler 3169 } // namespace compiler
2979 } // namespace internal 3170 } // namespace internal
2980 } // namespace v8 3171 } // namespace v8
OLDNEW
« no previous file with comments | « src/arm/macro-assembler-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698