Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(182)

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2923103003: [WASM] Simplify SIMD shuffle opcodes. (Closed)
Patch Set: Mircea's review comments. Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 the V8 project authors. All rights reserved. 1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "src/compiler/code-generator.h" 5 #include "src/compiler/code-generator.h"
6 6
7 #include "src/arm/macro-assembler-arm.h" 7 #include "src/arm/macro-assembler-arm.h"
8 #include "src/assembler-inl.h" 8 #include "src/assembler-inl.h"
9 #include "src/compilation-info.h" 9 #include "src/compilation-info.h"
10 #include "src/compiler/code-generator-impl.h" 10 #include "src/compiler/code-generator-impl.h"
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after
333 case kPositiveOrZero: 333 case kPositiveOrZero:
334 return pl; 334 return pl;
335 case kNegative: 335 case kNegative:
336 return mi; 336 return mi;
337 default: 337 default:
338 break; 338 break;
339 } 339 }
340 UNREACHABLE(); 340 UNREACHABLE();
341 } 341 }
342 342
343 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) {
344 // If unary shuffle, table is src0 (2 d-registers).
345 if (src0.is(src1)) return 2;
346 // Binary shuffle, table is src0, src1. They must be consecutive
347 DCHECK_EQ(src0.code() + 1, src1.code());
348 return 4; // 4 d-registers.
349 }
350
351 } // namespace 343 } // namespace
352 344
353 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \ 345 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \
354 do { \ 346 do { \
355 auto result = i.Output##Type##Register(); \ 347 auto result = i.Output##Type##Register(); \
356 auto offset = i.InputRegister(0); \ 348 auto offset = i.InputRegister(0); \
357 if (instr->InputAt(1)->IsRegister()) { \ 349 if (instr->InputAt(1)->IsRegister()) { \
358 __ cmp(offset, i.InputRegister(1)); \ 350 __ cmp(offset, i.InputRegister(1)); \
359 } else { \ 351 } else { \
360 __ cmp(offset, i.InputImmediate(1)); \ 352 __ cmp(offset, i.InputImmediate(1)); \
(...skipping 1923 matching lines...) Expand 10 before | Expand all | Expand 10 after
2284 } 2276 }
2285 case kArmS16x8TransposeRight: { 2277 case kArmS16x8TransposeRight: {
2286 Simd128Register dst = i.OutputSimd128Register(), 2278 Simd128Register dst = i.OutputSimd128Register(),
2287 src1 = i.InputSimd128Register(1); 2279 src1 = i.InputSimd128Register(1);
2288 DCHECK(dst.is(i.InputSimd128Register(0))); 2280 DCHECK(dst.is(i.InputSimd128Register(0)));
2289 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped). 2281 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2290 __ vmov(kScratchQuadReg, src1); 2282 __ vmov(kScratchQuadReg, src1);
2291 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15] 2283 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]
2292 break; 2284 break;
2293 } 2285 }
2294 case kArmS16x8Shuffle: {
2295 Simd128Register dst = i.OutputSimd128Register(),
2296 src0 = i.InputSimd128Register(0),
2297 src1 = i.InputSimd128Register(1);
2298 DwVfpRegister table_base = src0.low();
2299 int table_size = GetVtblTableSize(src0, src1);
2300 // Convert the shuffle lane masks to byte masks in kScratchQuadReg.
2301 int scratch_s_base = kScratchQuadReg.code() * 4;
2302 for (int j = 0; j < 2; j++) {
2303 int32_t four_lanes = i.InputInt32(2 + j);
2304 for (int k = 0; k < 2; k++) {
2305 uint8_t w0 = (four_lanes & 0xF) * kShortSize;
2306 four_lanes >>= 8;
2307 uint8_t w1 = (four_lanes & 0xF) * kShortSize;
2308 four_lanes >>= 8;
2309 int32_t mask = w0 | ((w0 + 1) << 8) | (w1 << 16) | ((w1 + 1) << 24);
2310 // Ensure byte indices are in [0, 31] so masks are never NaNs.
2311 four_lanes &= 0x1F1F1F1F;
2312 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k),
2313 bit_cast<float>(mask));
2314 }
2315 }
2316 NeonListOperand table(table_base, table_size);
2317 if (!dst.is(src0) && !dst.is(src1)) {
2318 __ vtbl(dst.low(), table, kScratchQuadReg.low());
2319 __ vtbl(dst.high(), table, kScratchQuadReg.high());
2320 } else {
2321 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());
2322 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());
2323 __ vmov(dst, kScratchQuadReg);
2324 }
2325 break;
2326 }
2327 case kArmS8x16ZipLeft: { 2286 case kArmS8x16ZipLeft: {
2328 Simd128Register dst = i.OutputSimd128Register(), 2287 Simd128Register dst = i.OutputSimd128Register(),
2329 src1 = i.InputSimd128Register(1); 2288 src1 = i.InputSimd128Register(1);
2330 DCHECK(dst.is(i.InputSimd128Register(0))); 2289 DCHECK(dst.is(i.InputSimd128Register(0)));
2331 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31] 2290 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2332 __ vmov(dst.high(), src1.low()); 2291 __ vmov(dst.high(), src1.low());
2333 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23] 2292 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
2334 break; 2293 break;
2335 } 2294 }
2336 case kArmS8x16ZipRight: { 2295 case kArmS8x16ZipRight: {
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
2381 case kArmS8x16Concat: { 2340 case kArmS8x16Concat: {
2382 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0), 2341 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2383 i.InputSimd128Register(1), i.InputInt4(2)); 2342 i.InputSimd128Register(1), i.InputInt4(2));
2384 break; 2343 break;
2385 } 2344 }
2386 case kArmS8x16Shuffle: { 2345 case kArmS8x16Shuffle: {
2387 Simd128Register dst = i.OutputSimd128Register(), 2346 Simd128Register dst = i.OutputSimd128Register(),
2388 src0 = i.InputSimd128Register(0), 2347 src0 = i.InputSimd128Register(0),
2389 src1 = i.InputSimd128Register(1); 2348 src1 = i.InputSimd128Register(1);
2390 DwVfpRegister table_base = src0.low(); 2349 DwVfpRegister table_base = src0.low();
2391 int table_size = GetVtblTableSize(src0, src1); 2350 // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
2351 // src1. They must be consecutive.
2352 int table_size = src0.is(src1) ? 2 : 4;
2353 DCHECK_IMPLIES(!src0.is(src1), src0.code() + 1 == src1.code());
2392 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg. 2354 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.
2393 int scratch_s_base = kScratchQuadReg.code() * 4; 2355 int scratch_s_base = kScratchQuadReg.code() * 4;
2394 for (int j = 0; j < 4; j++) { 2356 for (int j = 0; j < 4; j++) {
2395 int32_t four_lanes = i.InputInt32(2 + j); 2357 int32_t four_lanes = i.InputInt32(2 + j);
2396 // Ensure byte indices are in [0, 31] so masks are never NaNs. 2358 // Ensure byte indices are in [0, 31] so masks are never NaNs.
2397 four_lanes &= 0x1F1F1F1F; 2359 four_lanes &= 0x1F1F1F1F;
2398 __ vmov(SwVfpRegister::from_code(scratch_s_base + j), 2360 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
2399 bit_cast<float>(four_lanes)); 2361 bit_cast<float>(four_lanes));
2400 } 2362 }
2401 NeonListOperand table(table_base, table_size); 2363 NeonListOperand table(table_base, table_size);
(...skipping 844 matching lines...) Expand 10 before | Expand all | Expand 10 after
3246 padding_size -= v8::internal::Assembler::kInstrSize; 3208 padding_size -= v8::internal::Assembler::kInstrSize;
3247 } 3209 }
3248 } 3210 }
3249 } 3211 }
3250 3212
3251 #undef __ 3213 #undef __
3252 3214
3253 } // namespace compiler 3215 } // namespace compiler
3254 } // namespace internal 3216 } // namespace internal
3255 } // namespace v8 3217 } // namespace v8
OLDNEW
« no previous file with comments | « no previous file | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698