src/compiler/arm/instruction-selector-arm.cc - Issue 2769083003: [ARM] Make Simd 128 bit load/store more like existing load/store.

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769083003: [ARM] Make Simd 128 bit load/store more like existing load/store. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2014 the V8 project authors. All rights reserved.	1 // Copyright 2014 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/base/adapters.h"	5 #include "src/base/adapters.h"

6 #include "src/base/bits.h"	6 #include "src/base/bits.h"

7 #include "src/compiler/instruction-selector-impl.h"	7 #include "src/compiler/instruction-selector-impl.h"

8 #include "src/compiler/node-matchers.h"	8 #include "src/compiler/node-matchers.h"

9 #include "src/compiler/node-properties.h"	9 #include "src/compiler/node-properties.h"

10	10

(...skipping 409 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
420 case MachineRepresentation::kWord16:	420 case MachineRepresentation::kWord16:

421 opcode = load_rep.IsUnsigned() ? kArmLdrh : kArmLdrsh;	421 opcode = load_rep.IsUnsigned() ? kArmLdrh : kArmLdrsh;

422 break;	422 break;

423 case MachineRepresentation::kTaggedSigned: // Fall through.	423 case MachineRepresentation::kTaggedSigned: // Fall through.

424 case MachineRepresentation::kTaggedPointer: // Fall through.	424 case MachineRepresentation::kTaggedPointer: // Fall through.

425 case MachineRepresentation::kTagged: // Fall through.	425 case MachineRepresentation::kTagged: // Fall through.

426 case MachineRepresentation::kWord32:	426 case MachineRepresentation::kWord32:

427 opcode = kArmLdr;	427 opcode = kArmLdr;

428 break;	428 break;

429 case MachineRepresentation::kSimd128:	429 case MachineRepresentation::kSimd128:

430 opcode = kArmSimd128Load;	430 opcode = kArmVld1S128;

431 break;	431 break;

432 case MachineRepresentation::kWord64: // Fall through.	432 case MachineRepresentation::kWord64: // Fall through.

433 case MachineRepresentation::kSimd1x4: // Fall through.	433 case MachineRepresentation::kSimd1x4: // Fall through.

434 case MachineRepresentation::kSimd1x8: // Fall through.	434 case MachineRepresentation::kSimd1x8: // Fall through.

435 case MachineRepresentation::kSimd1x16: // Fall through.	435 case MachineRepresentation::kSimd1x16: // Fall through.

436 case MachineRepresentation::kNone:	436 case MachineRepresentation::kNone:

437 UNREACHABLE();	437 UNREACHABLE();

438 return;	438 return;

439 }	439 }

440	440

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
510 case MachineRepresentation::kWord16:	510 case MachineRepresentation::kWord16:

511 opcode = kArmStrh;	511 opcode = kArmStrh;

512 break;	512 break;

513 case MachineRepresentation::kTaggedSigned: // Fall through.	513 case MachineRepresentation::kTaggedSigned: // Fall through.

514 case MachineRepresentation::kTaggedPointer: // Fall through.	514 case MachineRepresentation::kTaggedPointer: // Fall through.

515 case MachineRepresentation::kTagged: // Fall through.	515 case MachineRepresentation::kTagged: // Fall through.

516 case MachineRepresentation::kWord32:	516 case MachineRepresentation::kWord32:

517 opcode = kArmStr;	517 opcode = kArmStr;

518 break;	518 break;

519 case MachineRepresentation::kSimd128:	519 case MachineRepresentation::kSimd128:

520 opcode = kArmSimd128Store;	520 opcode = kArmVst1S128;

521 break;	521 break;

522 case MachineRepresentation::kWord64: // Fall through.	522 case MachineRepresentation::kWord64: // Fall through.

523 case MachineRepresentation::kSimd1x4: // Fall through.	523 case MachineRepresentation::kSimd1x4: // Fall through.

524 case MachineRepresentation::kSimd1x8: // Fall through.	524 case MachineRepresentation::kSimd1x8: // Fall through.

525 case MachineRepresentation::kSimd1x16: // Fall through.	525 case MachineRepresentation::kSimd1x16: // Fall through.

526 case MachineRepresentation::kNone:	526 case MachineRepresentation::kNone:

527 UNREACHABLE();	527 UNREACHABLE();

528 return;	528 return;

529 }	529 }

530	530

531 InstructionOperand inputs[4];	531 InstructionOperand inputs[4];

532 size_t input_count = 0;	532 size_t input_count = 0;

533 inputs[input_count++] = g.UseRegister(value);	533 inputs[input_count++] = g.UseRegister(value);

534 inputs[input_count++] = g.UseRegister(base);	534 inputs[input_count++] = g.UseRegister(base);

535 EmitStore(this, opcode, input_count, inputs, index);	535 EmitStore(this, opcode, input_count, inputs, index);

536 }	536 }

537 }	537 }

538	538

539 void InstructionSelector::VisitProtectedStore(Node* node) {	539 void InstructionSelector::VisitProtectedStore(Node* node) {

540 // TODO(eholk)	540 // TODO(eholk)

541 UNIMPLEMENTED();	541 UNIMPLEMENTED();

542 }	542 }

543	543

544 void InstructionSelector::VisitUnalignedLoad(Node* node) {	544 void InstructionSelector::VisitUnalignedLoad(Node* node) {

545 UnalignedLoadRepresentation load_rep =	545 MachineRepresentation load_rep =

546 UnalignedLoadRepresentationOf(node->op());	546 UnalignedLoadRepresentationOf(node->op()).representation();

547 ArmOperandGenerator g(this);	547 ArmOperandGenerator g(this);

548 Node* base = node->InputAt(0);	548 Node* base = node->InputAt(0);

549 Node* index = node->InputAt(1);	549 Node* index = node->InputAt(1);

550	550

551 InstructionCode opcode = kArmLdr;	551 InstructionCode opcode = kArmLdr;

552 // Only floating point loads need to be specially handled; integer loads	552 // Only floating point loads need to be specially handled; integer loads

553 // support unaligned access. We support unaligned FP loads by loading to	553 // support unaligned access. We support unaligned FP loads by loading to

554 // integer registers first, then moving to the destination FP register.	554 // integer registers first, then moving to the destination FP register. If

555 switch (load_rep.representation()) {	555 // NEON is supported, we use the vld1.8 instruction.

	556 switch (load_rep) {

556 case MachineRepresentation::kFloat32: {	557 case MachineRepresentation::kFloat32: {

557 InstructionOperand temp = g.TempRegister();	558 InstructionOperand temp = g.TempRegister();

558 EmitLoad(this, opcode, &temp, base, index);	559 EmitLoad(this, opcode, &temp, base, index);

559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);	560 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);

560 return;	561 return;

561 }	562 }

562 case MachineRepresentation::kFloat64: {	563 case MachineRepresentation::kFloat64:

563 // Compute the address of the least-significant half of the FP value.	564 case MachineRepresentation::kSimd128: {

	565 // Compute the address of the least-significant byte of the FP value.

564 // We assume that the base node is unlikely to be an encodable immediate	566 // We assume that the base node is unlikely to be an encodable immediate

565 // or the result of a shift operation, so only consider the addressing	567 // or the result of a shift operation, so only consider the addressing

566 // mode that should be used for the index node.	568 // mode that should be used for the index node.

567 InstructionCode add_opcode = kArmAdd;	569 InstructionCode add_opcode = kArmAdd;

568 InstructionOperand inputs[3];	570 InstructionOperand inputs[3];

569 inputs[0] = g.UseRegister(base);	571 inputs[0] = g.UseRegister(base);

570	572

571 size_t input_count;	573 size_t input_count;

572 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,	574 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,

573 &inputs[1])) {	575 &inputs[1])) {

574 // input_count has been set by TryMatchImmediateOrShift(), so	576 // input_count has been set by TryMatchImmediateOrShift(), so

575 // increment it to account for the base register in inputs[0].	577 // increment it to account for the base register in inputs[0].

576 input_count++;	578 input_count++;

577 } else {	579 } else {

578 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);	580 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);

579 inputs[1] = g.UseRegister(index);	581 inputs[1] = g.UseRegister(index);

580 input_count = 2; // Base register and index.	582 input_count = 2; // Base register and index.

581 }	583 }

582	584

583 InstructionOperand addr = g.TempRegister();	585 InstructionOperand addr = g.TempRegister();

584 Emit(add_opcode, 1, &addr, input_count, inputs);	586 Emit(add_opcode, 1, &addr, input_count, inputs);

585	587

586 if (CpuFeatures::IsSupported(NEON)) {	588 if (CpuFeatures::IsSupported(NEON)) {

587 // With NEON we can load directly from the calculated address.	589 // With NEON we can load directly from the calculated address.

588 Emit(kArmVld1F64, g.DefineAsRegister(node), addr);	590 ArchOpcode op = load_rep == MachineRepresentation::kFloat64

	591 ? kArmVld1F64

	592 : kArmVld1S128;

	593 Emit(op, g.DefineAsRegister(node), addr);

589 } else {	594 } else {

	595 DCHECK_NE(MachineRepresentation::kSimd128, load_rep);

590 // Load both halves and move to an FP register.	596 // Load both halves and move to an FP register.

591 InstructionOperand fp_lo = g.TempRegister();	597 InstructionOperand fp_lo = g.TempRegister();

592 InstructionOperand fp_hi = g.TempRegister();	598 InstructionOperand fp_hi = g.TempRegister();

593 opcode \|= AddressingModeField::encode(kMode_Offset_RI);	599 opcode \|= AddressingModeField::encode(kMode_Offset_RI);

594 Emit(opcode, fp_lo, addr, g.TempImmediate(0));	600 Emit(opcode, fp_lo, addr, g.TempImmediate(0));

595 Emit(opcode, fp_hi, addr, g.TempImmediate(4));	601 Emit(opcode, fp_hi, addr, g.TempImmediate(4));

596 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);	602 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);

597 }	603 }

598 return;	604 return;

599 }	605 }

(...skipping 12 matching lines...) Expand all Loading...
612	618

613 InstructionOperand inputs[4];	619 InstructionOperand inputs[4];

614 size_t input_count = 0;	620 size_t input_count = 0;

615	621

616 UnalignedStoreRepresentation store_rep =	622 UnalignedStoreRepresentation store_rep =

617 UnalignedStoreRepresentationOf(node->op());	623 UnalignedStoreRepresentationOf(node->op());

618	624

619 // Only floating point stores need to be specially handled; integer stores	625 // Only floating point stores need to be specially handled; integer stores

620 // support unaligned access. We support unaligned FP stores by moving the	626 // support unaligned access. We support unaligned FP stores by moving the

621 // value to integer registers first, then storing to the destination address.	627 // value to integer registers first, then storing to the destination address.

	628 // If NEON is supported, we use the vst1.8 instruction.

622 switch (store_rep) {	629 switch (store_rep) {

623 case MachineRepresentation::kFloat32: {	630 case MachineRepresentation::kFloat32: {

624 inputs[input_count++] = g.TempRegister();	631 inputs[input_count++] = g.TempRegister();

625 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));	632 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));

626 inputs[input_count++] = g.UseRegister(base);	633 inputs[input_count++] = g.UseRegister(base);

627 EmitStore(this, kArmStr, input_count, inputs, index);	634 EmitStore(this, kArmStr, input_count, inputs, index);

628 return;	635 return;

629 }	636 }

630 case MachineRepresentation::kFloat64: {	637 case MachineRepresentation::kFloat64:

	638 case MachineRepresentation::kSimd128: {

631 if (CpuFeatures::IsSupported(NEON)) {	639 if (CpuFeatures::IsSupported(NEON)) {

632 InstructionOperand address = g.TempRegister();	640 InstructionOperand address = g.TempRegister();

633 {	641 {

634 // First we have to calculate the actual address.	642 // First we have to calculate the actual address.

635 InstructionCode add_opcode = kArmAdd;	643 InstructionCode add_opcode = kArmAdd;

636 InstructionOperand inputs[3];	644 InstructionOperand inputs[3];

637 inputs[0] = g.UseRegister(base);	645 inputs[0] = g.UseRegister(base);

638	646

639 size_t input_count;	647 size_t input_count;

640 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,	648 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,

641 &inputs[1])) {	649 &inputs[1])) {

642 // input_count has been set by TryMatchImmediateOrShift(), so	650 // input_count has been set by TryMatchImmediateOrShift(), so

643 // increment it to account for the base register in inputs[0].	651 // increment it to account for the base register in inputs[0].

644 input_count++;	652 input_count++;

645 } else {	653 } else {

646 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);	654 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);

647 inputs[1] = g.UseRegister(index);	655 inputs[1] = g.UseRegister(index);

648 input_count = 2; // Base register and index.	656 input_count = 2; // Base register and index.

649 }	657 }

650	658

651 Emit(add_opcode, 1, &address, input_count, inputs);	659 Emit(add_opcode, 1, &address, input_count, inputs);

652 }	660 }

653	661

654 inputs[input_count++] = g.UseRegister(value);	662 inputs[input_count++] = g.UseRegister(value);

655 inputs[input_count++] = address;	663 inputs[input_count++] = address;

656 Emit(kArmVst1F64, 0, nullptr, input_count, inputs);	664 ArchOpcode op = store_rep == MachineRepresentation::kFloat64

	665 ? kArmVst1F64

	666 : kArmVst1S128;

	667 Emit(op, 0, nullptr, input_count, inputs);

657 } else {	668 } else {

	669 DCHECK_NE(MachineRepresentation::kSimd128, store_rep);

658 // Store a 64-bit floating point value using two 32-bit integer stores.	670 // Store a 64-bit floating point value using two 32-bit integer stores.

659 // Computing the store address here would require three live temporary	671 // Computing the store address here would require three live temporary

660 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after	672 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after

661 // storing the least-significant half of the value.	673 // storing the least-significant half of the value.

662	674

663 // First, move the 64-bit FP value into two temporary integer registers.	675 // First, move the 64-bit FP value into two temporary integer registers.

664 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};	676 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};

665 inputs[input_count++] = g.UseRegister(value);	677 inputs[input_count++] = g.UseRegister(value);

666 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);	678 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);

667	679

(...skipping 1836 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2504 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);	2516 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);

2505 req_aligned[0] = MachineType::Float32();	2517 req_aligned[0] = MachineType::Float32();

2506 req_aligned[1] = MachineType::Float64();	2518 req_aligned[1] = MachineType::Float64();

2507 return MachineOperatorBuilder::AlignmentRequirements::	2519 return MachineOperatorBuilder::AlignmentRequirements::

2508 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);	2520 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);

2509 }	2521 }

2510	2522

2511 } // namespace compiler	2523 } // namespace compiler

2512 } // namespace internal	2524 } // namespace internal

2513 } // namespace v8	2525 } // namespace v8

OLD	NEW

« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »