src/compiler/arm/instruction-selector-arm.cc - Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses

Side by Side Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses (Closed)

Patch Set: Calculate the address separately Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2014 the V8 project authors. All rights reserved.	1 // Copyright 2014 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "src/base/adapters.h"	5 #include "src/base/adapters.h"

6 #include "src/base/bits.h"	6 #include "src/base/bits.h"

7 #include "src/compiler/instruction-selector-impl.h"	7 #include "src/compiler/instruction-selector-impl.h"

8 #include "src/compiler/node-matchers.h"	8 #include "src/compiler/node-matchers.h"

9 #include "src/compiler/node-properties.h"	9 #include "src/compiler/node-properties.h"

10	10

(...skipping 542 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
553 // support unaligned access. We support unaligned FP loads by loading to	553 // support unaligned access. We support unaligned FP loads by loading to

554 // integer registers first, then moving to the destination FP register.	554 // integer registers first, then moving to the destination FP register.

555 switch (load_rep.representation()) {	555 switch (load_rep.representation()) {

556 case MachineRepresentation::kFloat32: {	556 case MachineRepresentation::kFloat32: {

557 InstructionOperand temp = g.TempRegister();	557 InstructionOperand temp = g.TempRegister();

558 EmitLoad(this, opcode, &temp, base, index);	558 EmitLoad(this, opcode, &temp, base, index);

559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);	559 Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);

560 return;	560 return;

561 }	561 }

562 case MachineRepresentation::kFloat64: {	562 case MachineRepresentation::kFloat64: {

563 // TODO(arm): use vld1.8 for this when NEON is available.

564 // Compute the address of the least-significant half of the FP value.	563 // Compute the address of the least-significant half of the FP value.

565 // We assume that the base node is unlikely to be an encodable immediate	564 // We assume that the base node is unlikely to be an encodable immediate

566 // or the result of a shift operation, so only consider the addressing	565 // or the result of a shift operation, so only consider the addressing

567 // mode that should be used for the index node.	566 // mode that should be used for the index node.

568 InstructionCode add_opcode = kArmAdd;	567 InstructionCode add_opcode = kArmAdd;

569 InstructionOperand inputs[3];	568 InstructionOperand inputs[3];

570 inputs[0] = g.UseRegister(base);	569 inputs[0] = g.UseRegister(base);

571	570

572 size_t input_count;	571 size_t input_count;

573 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,	572 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,

574 &inputs[1])) {	573 &inputs[1])) {

575 // input_count has been set by TryMatchImmediateOrShift(), so increment	574 // input_count has been set by TryMatchImmediateOrShift(), so

576 // it to account for the base register in inputs[0].	575 // increment it to account for the base register in inputs[0].

577 input_count++;	576 input_count++;

578 } else {	577 } else {

579 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);	578 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);

580 inputs[1] = g.UseRegister(index);	579 inputs[1] = g.UseRegister(index);

581 input_count = 2; // Base register and index.	580 input_count = 2; // Base register and index.

582 }	581 }

583	582

584 InstructionOperand addr = g.TempRegister();	583 InstructionOperand addr = g.TempRegister();

585 Emit(add_opcode, 1, &addr, input_count, inputs);	584 Emit(add_opcode, 1, &addr, input_count, inputs);

586	585

587 // Load both halves and move to an FP register.	586 if (CpuFeatures::IsSupported(NEON)) {

588 InstructionOperand fp_lo = g.TempRegister();	587 // With NEON we can load directly from the calculated address.

589 InstructionOperand fp_hi = g.TempRegister();	588 Emit(kArmVld1F64, g.DefineAsRegister(node), addr);

590 opcode \|= AddressingModeField::encode(kMode_Offset_RI);	589 } else {

591 Emit(opcode, fp_lo, addr, g.TempImmediate(0));	590 // Load both halves and move to an FP register.

592 Emit(opcode, fp_hi, addr, g.TempImmediate(4));	591 InstructionOperand fp_lo = g.TempRegister();

593 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);	592 InstructionOperand fp_hi = g.TempRegister();

	593 opcode \|= AddressingModeField::encode(kMode_Offset_RI);

	594 Emit(opcode, fp_lo, addr, g.TempImmediate(0));

	595 Emit(opcode, fp_hi, addr, g.TempImmediate(4));

	596 Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);

	597 }

594 return;	598 return;

595 }	599 }

596 default:	600 default:

597 // All other cases should support unaligned accesses.	601 // All other cases should support unaligned accesses.

598 UNREACHABLE();	602 UNREACHABLE();

599 return;	603 return;

600 }	604 }

601 }	605 }

602	606

603 void InstructionSelector::VisitUnalignedStore(Node* node) {	607 void InstructionSelector::VisitUnalignedStore(Node* node) {

(...skipping 13 matching lines...) Expand all Loading...
617 // value to integer registers first, then storing to the destination address.	621 // value to integer registers first, then storing to the destination address.

618 switch (store_rep) {	622 switch (store_rep) {

619 case MachineRepresentation::kFloat32: {	623 case MachineRepresentation::kFloat32: {

620 inputs[input_count++] = g.TempRegister();	624 inputs[input_count++] = g.TempRegister();

621 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));	625 Emit(kArmVmovU32F32, inputs[0], g.UseRegister(value));

622 inputs[input_count++] = g.UseRegister(base);	626 inputs[input_count++] = g.UseRegister(base);

623 EmitStore(this, kArmStr, input_count, inputs, index);	627 EmitStore(this, kArmStr, input_count, inputs, index);

624 return;	628 return;

625 }	629 }

626 case MachineRepresentation::kFloat64: {	630 case MachineRepresentation::kFloat64: {

627 // TODO(arm): use vst1.8 for this when NEON is available.	631 if (CpuFeatures::IsSupported(NEON)) {

628 // Store a 64-bit floating point value using two 32-bit integer stores.	632 InstructionOperand address = g.TempRegister();

629 // Computing the store address here would require three live temporary	633 {

630 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after	634 // First we have to calculate the actual address.

631 // storing the least-significant half of the value.	635 InstructionCode add_opcode = kArmAdd;

	636 InstructionOperand inputs[3];

	637 inputs[0] = g.UseRegister(base);

632	638

633 // First, move the 64-bit FP value into two temporary integer registers.	639 size_t input_count;

634 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};	640 if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,

635 inputs[input_count++] = g.UseRegister(value);	641 &inputs[1])) {

636 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count,	642 // input_count has been set by TryMatchImmediateOrShift(), so

637 inputs);	643 // increment it to account for the base register in inputs[0].

	644 input_count++;

	645 } else {

	646 add_opcode \|= AddressingModeField::encode(kMode_Operand2_R);

	647 inputs[1] = g.UseRegister(index);

	648 input_count = 2; // Base register and index.

	649 }

638	650

639 // Store the least-significant half.	651 Emit(add_opcode, 1, &address, input_count, inputs);

640 inputs[0] = fp[0]; // Low 32-bits of FP value.	652 }

641 inputs[input_count++] = g.UseRegister(base); // First store base address.

642 EmitStore(this, kArmStr, input_count, inputs, index);

643	653

644 // Store the most-significant half.	654 inputs[input_count++] = g.UseRegister(value);

645 InstructionOperand base4 = g.TempRegister();	655 inputs[input_count++] = address;

646 Emit(kArmAdd \| AddressingModeField::encode(kMode_Operand2_I), base4,	656 Emit(kArmVst1F64, 0, nullptr, input_count, inputs);

647 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.	657 } else {

648 inputs[0] = fp[1]; // High 32-bits of FP value.	658 // Store a 64-bit floating point value using two 32-bit integer stores.

649 inputs[1] = base4; // Second store base + 4 address.	659 // Computing the store address here would require three live temporary

650 EmitStore(this, kArmStr, input_count, inputs, index);	660 // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after

	661 // storing the least-significant half of the value.

	662

	663 // First, move the 64-bit FP value into two temporary integer registers.

	664 InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};

	665 inputs[input_count++] = g.UseRegister(value);

	666 Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);

	667

	668 // Store the least-significant half.

	669 inputs[0] = fp[0]; // Low 32-bits of FP value.

	670 inputs[input_count++] =

	671 g.UseRegister(base); // First store base address.

	672 EmitStore(this, kArmStr, input_count, inputs, index);

	673

	674 // Store the most-significant half.

	675 InstructionOperand base4 = g.TempRegister();

	676 Emit(kArmAdd \| AddressingModeField::encode(kMode_Operand2_I), base4,

	677 g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.

	678 inputs[0] = fp[1]; // High 32-bits of FP value.

	679 inputs[1] = base4; // Second store base + 4 address.

	680 EmitStore(this, kArmStr, input_count, inputs, index);

	681 }

651 return;	682 return;

652 }	683 }

653 default:	684 default:

654 // All other cases should support unaligned accesses.	685 // All other cases should support unaligned accesses.

655 UNREACHABLE();	686 UNREACHABLE();

656 return;	687 return;

657 }	688 }

658 }	689 }

659	690

660 void InstructionSelector::VisitCheckedLoad(Node* node) {	691 void InstructionSelector::VisitCheckedLoad(Node* node) {

(...skipping 1812 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2473 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);	2504 Vector<MachineType> req_aligned = Vector<MachineType>::New(2);

2474 req_aligned[0] = MachineType::Float32();	2505 req_aligned[0] = MachineType::Float32();

2475 req_aligned[1] = MachineType::Float64();	2506 req_aligned[1] = MachineType::Float64();

2476 return MachineOperatorBuilder::AlignmentRequirements::	2507 return MachineOperatorBuilder::AlignmentRequirements::

2477 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);	2508 SomeUnalignedAccessUnsupported(req_aligned, req_aligned);

2478 }	2509 }

2479	2510

2480 } // namespace compiler	2511 } // namespace compiler

2481 } // namespace internal	2512 } // namespace internal

2482 } // namespace v8	2513 } // namespace v8

OLD	NEW

« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »