src/compiler/arm/instruction-selector-arm.cc - Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2014 the V8 project authors. All rights reserved.	1 // Copyright 2014 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/base/adapters.h"	5 #include "src/base/adapters.h"

6 #include "src/base/bits.h"	6 #include "src/base/bits.h"

7 #include "src/compiler/instruction-selector-impl.h"	7 #include "src/compiler/instruction-selector-impl.h"

8 #include "src/compiler/node-matchers.h"	8 #include "src/compiler/node-matchers.h"

9 #include "src/compiler/node-properties.h"	9 #include "src/compiler/node-properties.h"

10	10

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
553 // support unaligned access. We support unaligned FP loads by loading to	553 // support unaligned access. We support unaligned FP loads by loading to

554 // integer registers first, then moving to the destination FP register.	554 // integer registers first, then moving to the destination FP register.

555 switch (load_rep.representation()) {	555 switch (load_rep.representation()) {

556 case MachineRepresentation::kFloat32: {	556 case MachineRepresentation::kFloat32: {

557 InstructionOperand temp = g.TempRegister();	557 InstructionOperand temp = g.TempRegister();

558 EmitLoad(this, opcode, &temp, base, index);	558 EmitLoad(this, opcode, &temp, base, index);

559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);	559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);

560 return;	560 return;

561 }	561 }

562 case MachineRepresentation::kFloat64: {	562 case MachineRepresentation::kFloat64: {

563 // TODO(arm): use vld1.8 for this when NEON is available.	563 if (CpuFeatures::IsSupported(NEON)) {

564 // Compute the address of the least-significant half of the FP value.	564 InstructionOperand output = g.DefineAsRegister(node);

565 // We assume that the base node is unlikely to be an encodable immediate	565 EmitLoad(this, kArmVld1F64, &output, base, index);

566 // or the result of a shift operation, so only consider the addressing	566 } else {

567 // mode that should be used for the index node.	567 // Compute the address of the least-significant half of the FP value.

568 InstructionCode add_opcode = kArmAdd;	568 // We assume that the base node is unlikely to be an encodable immediate

569 InstructionOperand inputs[3];	569 // or the result of a shift operation, so only consider the addressing

570 inputs[0] = g.UseRegister(base);	570 // mode that should be used for the index node.

	571 InstructionCode add_opcode = kArmAdd;

	572 InstructionOperand inputs[3];

	573 inputs[0] = g.UseRegister(base);

571	574

572 size_t input_count;	575 size_t input_count;

573 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,	576 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,

574 &inputs[1])) {	577 &inputs[1])) {

575 // input_count has been set by TryMatchImmediateOrShift(), so increment	578 // input_count has been set by TryMatchImmediateOrShift(), so

576 // it to account for the base register in inputs[0].	579 // increment it to account for the base register in inputs[0].

577 input_count++;	580 input_count++;

578 } else {	581 } else {

579 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);	582 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);

580 inputs[1] = g.UseRegister(index);	583 inputs[1] = g.UseRegister(index);

581 input_count = 2; // Base register and index.	584 input_count = 2; // Base register and index.

	585 }

	586

	587 InstructionOperand addr = g.TempRegister();

	588 Emit(add_opcode, 1, &addr, input_count, inputs);

	589

	590 // Load both halves and move to an FP register.

	591 InstructionOperand fp_lo = g.TempRegister();

	592 InstructionOperand fp_hi = g.TempRegister();

	593 opcode \|= AddressingModeField::encode(kMode_Offset_RI);

	594 Emit(opcode, fp_lo, addr, g.TempImmediate(0));

	595 Emit(opcode, fp_hi, addr, g.TempImmediate(4));

	596 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);

582 }	597 }

583

584 InstructionOperand addr = g.TempRegister();

585 Emit(add_opcode, 1, &addr, input_count, inputs);

586

587 // Load both halves and move to an FP register.

588 InstructionOperand fp_lo = g.TempRegister();

589 InstructionOperand fp_hi = g.TempRegister();

590 opcode \|= AddressingModeField::encode(kMode_Offset_RI);

591 Emit(opcode, fp_lo, addr, g.TempImmediate(0));

592 Emit(opcode, fp_hi, addr, g.TempImmediate(4));

593 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);

594 return;	598 return;

595 }	599 }

596 default:	600 default:

597 // All other cases should support unaligned accesses.	601 // All other cases should support unaligned accesses.

598 UNREACHABLE();	602 UNREACHABLE();

599 return;	603 return;

600 }	604 }

601 }	605 }

602	606

603 void InstructionSelector::VisitUnalignedStore(Node* node) {	607 void InstructionSelector::VisitUnalignedStore(Node* node) {

(...skipping 13 matching lines...) Expand all Loading...
617 // value to integer registers first, then storing to the destination address.	621 // value to integer registers first, then storing to the destination address.

618 switch (store_rep) {	622 switch (store_rep) {

619 case MachineRepresentation::kFloat32: {	623 case MachineRepresentation::kFloat32: {

620 inputs[input_count++] = g.TempRegister();	624 inputs[input_count++] = g.TempRegister();

621 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));	625 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));

622 inputs[input_count++] = g.UseRegister(base);	626 inputs[input_count++] = g.UseRegister(base);

623 EmitStore(this, kArmStr, input_count, inputs, index);	627 EmitStore(this, kArmStr, input_count, inputs, index);

624 return;	628 return;

625 }	629 }

626 case MachineRepresentation::kFloat64: {	630 case MachineRepresentation::kFloat64: {

627 // TODO(arm): use vst1.8 for this when NEON is available.	631 if (CpuFeatures::IsSupported(NEON)) {

628 // Store a 64-bit floating point value using two 32-bit integer stores.	632 inputs[input_count++] = g.UseRegister(value);

629 // Computing the store address here would require three live temporary	633 inputs[input_count++] = g.UseRegister(base);

630 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after	634 EmitStore(this, kArmVst1F64, input_count, inputs, index);

631 // storing the least-significant half of the value.	635 } else {

	636 // Store a 64-bit floating point value using two 32-bit integer stores.

	637 // Computing the store address here would require three live temporary

	638 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after

	639 // storing the least-significant half of the value.

632	640

633 // First, move the 64-bit FP value into two temporary integer registers.	641 // First, move the 64-bit FP value into two temporary integer registers.

634 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};	642 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};

635 inputs[input_count++] = g.UseRegister(value);	643 inputs[input_count++] = g.UseRegister(value);

636 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count,	644 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);

637 inputs);

638	645

639 // Store the least-significant half.	646 // Store the least-significant half.

640 inputs[0] = fp[0]; // Low 32-bits of FP value.	647 inputs[0] = fp[0]; // Low 32-bits of FP value.

641 inputs[input_count++] = g.UseRegister(base); // First store base address.	648 inputs[input_count++] =

642 EmitStore(this, kArmStr, input_count, inputs, index);	649 g.UseRegister(base); // First store base address.

	650 EmitStore(this, kArmStr, input_count, inputs, index);

643	651

644 // Store the most-significant half.	652 // Store the most-significant half.

645 InstructionOperand base4 = g.TempRegister();	653 InstructionOperand base4 = g.TempRegister();

646 Emit(kArmAdd \| AddressingModeField::encode(kMode_Operand2_I), base4,	654 Emit(kArmAdd \| AddressingModeField::encode(kMode_Operand2_I), base4,

647 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.	655 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.

648 inputs[0] = fp[1]; // High 32-bits of FP value.	656 inputs[0] = fp[1]; // High 32-bits of FP value.

649 inputs[1] = base4; // Second store base + 4 address.	657 inputs[1] = base4; // Second store base + 4 address.

650 EmitStore(this, kArmStr, input_count, inputs, index);	658 EmitStore(this, kArmStr, input_count, inputs, index);

	659 }

651 return;	660 return;

652 }	661 }

653 default:	662 default:

654 // All other cases should support unaligned accesses.	663 // All other cases should support unaligned accesses.

655 UNREACHABLE();	664 UNREACHABLE();

656 return;	665 return;

657 }	666 }

658 }	667 }

659	668

660 void InstructionSelector::VisitCheckedLoad(Node* node) {	669 void InstructionSelector::VisitCheckedLoad(Node* node) {

(...skipping 1812 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2473 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);	2482 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);

2474 req_aligned[0] = MachineType::Float32();	2483 req_aligned[0] = MachineType::Float32();

2475 req_aligned[1] = MachineType::Float64();	2484 req_aligned[1] = MachineType::Float64();

2476 return MachineOperatorBuilder::AlignmentRequirements::	2485 return MachineOperatorBuilder::AlignmentRequirements::

2477 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);	2486 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);

2478 }	2487 }

2479	2488

2480 } // namespace compiler	2489 } // namespace compiler

2481 } // namespace internal	2490 } // namespace internal

2482 } // namespace v8	2491 } // namespace v8

OLD	NEW

« src/compiler/arm/code-generator-arm.cc ('K') | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »