src/compiler/arm/code-generator-arm.cc - Issue 2923103003: [WASM] Simplify SIMD shuffle opcodes.

Side by Side Diff: src/compiler/arm/code-generator-arm.cc

Issue 2923103003: [WASM] Simplify SIMD shuffle opcodes. (Closed)

Patch Set: Mircea's review comments. Created 3 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2014 the V8 project authors. All rights reserved.	1 // Copyright 2014 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/compiler/code-generator.h"	5 #include "src/compiler/code-generator.h"

6	6

7 #include "src/arm/macro-assembler-arm.h"	7 #include "src/arm/macro-assembler-arm.h"

8 #include "src/assembler-inl.h"	8 #include "src/assembler-inl.h"

9 #include "src/compilation-info.h"	9 #include "src/compilation-info.h"

10 #include "src/compiler/code-generator-impl.h"	10 #include "src/compiler/code-generator-impl.h"

(...skipping 322 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
333 case kPositiveOrZero:	333 case kPositiveOrZero:

334 return pl;	334 return pl;

335 case kNegative:	335 case kNegative:

336 return mi;	336 return mi;

337 default:	337 default:

338 break;	338 break;

339 }	339 }

340 UNREACHABLE();	340 UNREACHABLE();

341 }	341 }

342	342

343 int GetVtblTableSize(const Simd128Register& src0, const Simd128Register& src1) {

344 // If unary shuffle, table is src0 (2 d-registers).

345 if (src0.is(src1)) return 2;

346 // Binary shuffle, table is src0, src1. They must be consecutive

347 DCHECK_EQ(src0.code() + 1, src1.code());

348 return 4; // 4 d-registers.

349 }

350

351 } // namespace	343 } // namespace

352	344

353 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \	345 #define ASSEMBLE_CHECKED_LOAD_FP(Type) \

354 do { \	346 do { \

355 auto result = i.Output##Type##Register(); \	347 auto result = i.Output##Type##Register(); \

356 auto offset = i.InputRegister(0); \	348 auto offset = i.InputRegister(0); \

357 if (instr->InputAt(1)->IsRegister()) { \	349 if (instr->InputAt(1)->IsRegister()) { \

358 __ cmp(offset, i.InputRegister(1)); \	350 __ cmp(offset, i.InputRegister(1)); \

359 } else { \	351 } else { \

360 __ cmp(offset, i.InputImmediate(1)); \	352 __ cmp(offset, i.InputImmediate(1)); \

(...skipping 1923 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2284 }	2276 }

2285 case kArmS16x8TransposeRight: {	2277 case kArmS16x8TransposeRight: {

2286 Simd128Register dst = i.OutputSimd128Register(),	2278 Simd128Register dst = i.OutputSimd128Register(),

2287 src1 = i.InputSimd128Register(1);	2279 src1 = i.InputSimd128Register(1);

2288 DCHECK(dst.is(i.InputSimd128Register(0)));	2280 DCHECK(dst.is(i.InputSimd128Register(0)));

2289 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).	2281 // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).

2290 __ vmov(kScratchQuadReg, src1);	2282 __ vmov(kScratchQuadReg, src1);

2291 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]	2283 __ vtrn(Neon16, kScratchQuadReg, dst); // dst = [1, 9, 3, 11, ... 15]

2292 break;	2284 break;

2293 }	2285 }

2294 case kArmS16x8Shuffle: {

2295 Simd128Register dst = i.OutputSimd128Register(),

2296 src0 = i.InputSimd128Register(0),

2297 src1 = i.InputSimd128Register(1);

2298 DwVfpRegister table_base = src0.low();

2299 int table_size = GetVtblTableSize(src0, src1);

2300 // Convert the shuffle lane masks to byte masks in kScratchQuadReg.

2301 int scratch_s_base = kScratchQuadReg.code() * 4;

2302 for (int j = 0; j < 2; j++) {

2303 int32_t four_lanes = i.InputInt32(2 + j);

2304 for (int k = 0; k < 2; k++) {

2305 uint8_t w0 = (four_lanes & 0xF) * kShortSize;

2306 four_lanes >>= 8;

2307 uint8_t w1 = (four_lanes & 0xF) * kShortSize;

2308 four_lanes >>= 8;

2309 int32_t mask = w0 \| ((w0 + 1) << 8) \| (w1 << 16) \| ((w1 + 1) << 24);

2310 // Ensure byte indices are in [0, 31] so masks are never NaNs.

2311 four_lanes &= 0x1F1F1F1F;

2312 __ vmov(SwVfpRegister::from_code(scratch_s_base + 2 * j + k),

2313 bit_cast<float>(mask));

2314 }

2315 }

2316 NeonListOperand table(table_base, table_size);

2317 if (!dst.is(src0) && !dst.is(src1)) {

2318 __ vtbl(dst.low(), table, kScratchQuadReg.low());

2319 __ vtbl(dst.high(), table, kScratchQuadReg.high());

2320 } else {

2321 __ vtbl(kScratchQuadReg.low(), table, kScratchQuadReg.low());

2322 __ vtbl(kScratchQuadReg.high(), table, kScratchQuadReg.high());

2323 __ vmov(dst, kScratchQuadReg);

2324 }

2325 break;

2326 }

2327 case kArmS8x16ZipLeft: {	2286 case kArmS8x16ZipLeft: {

2328 Simd128Register dst = i.OutputSimd128Register(),	2287 Simd128Register dst = i.OutputSimd128Register(),

2329 src1 = i.InputSimd128Register(1);	2288 src1 = i.InputSimd128Register(1);

2330 DCHECK(dst.is(i.InputSimd128Register(0)));	2289 DCHECK(dst.is(i.InputSimd128Register(0)));

2331 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]	2290 // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]

2332 __ vmov(dst.high(), src1.low());	2291 __ vmov(dst.high(), src1.low());

2333 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]	2292 __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]

2334 break;	2293 break;

2335 }	2294 }

2336 case kArmS8x16ZipRight: {	2295 case kArmS8x16ZipRight: {

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2381 case kArmS8x16Concat: {	2340 case kArmS8x16Concat: {

2382 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),	2341 __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),

2383 i.InputSimd128Register(1), i.InputInt4(2));	2342 i.InputSimd128Register(1), i.InputInt4(2));

2384 break;	2343 break;

2385 }	2344 }

2386 case kArmS8x16Shuffle: {	2345 case kArmS8x16Shuffle: {

2387 Simd128Register dst = i.OutputSimd128Register(),	2346 Simd128Register dst = i.OutputSimd128Register(),

2388 src0 = i.InputSimd128Register(0),	2347 src0 = i.InputSimd128Register(0),

2389 src1 = i.InputSimd128Register(1);	2348 src1 = i.InputSimd128Register(1);

2390 DwVfpRegister table_base = src0.low();	2349 DwVfpRegister table_base = src0.low();

2391 int table_size = GetVtblTableSize(src0, src1);	2350 // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and

	2351 // src1. They must be consecutive.

	2352 int table_size = src0.is(src1) ? 2 : 4;

	2353 DCHECK_IMPLIES(!src0.is(src1), src0.code() + 1 == src1.code());

2392 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.	2354 // The shuffle lane mask is a byte mask, materialize in kScratchQuadReg.

2393 int scratch_s_base = kScratchQuadReg.code() * 4;	2355 int scratch_s_base = kScratchQuadReg.code() * 4;

2394 for (int j = 0; j < 4; j++) {	2356 for (int j = 0; j < 4; j++) {

2395 int32_t four_lanes = i.InputInt32(2 + j);	2357 int32_t four_lanes = i.InputInt32(2 + j);

2396 // Ensure byte indices are in [0, 31] so masks are never NaNs.	2358 // Ensure byte indices are in [0, 31] so masks are never NaNs.

2397 four_lanes &= 0x1F1F1F1F;	2359 four_lanes &= 0x1F1F1F1F;

2398 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),	2360 __ vmov(SwVfpRegister::from_code(scratch_s_base + j),

2399 bit_cast<float>(four_lanes));	2361 bit_cast<float>(four_lanes));

2400 }	2362 }

2401 NeonListOperand table(table_base, table_size);	2363 NeonListOperand table(table_base, table_size);

(...skipping 844 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3246 padding_size -= v8::internal::Assembler::kInstrSize;	3208 padding_size -= v8::internal::Assembler::kInstrSize;

3247 }	3209 }

3248 }	3210 }

3249 }	3211 }

3250	3212

3251 #undef __	3213 #undef __

3252	3214

3253 } // namespace compiler	3215 } // namespace compiler

3254 } // namespace internal	3216 } // namespace internal

3255 } // namespace v8	3217 } // namespace v8

OLD	NEW

« no previous file with comments | « no previous file | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »