src/compiler/arm/instruction-selector-arm.cc - Issue 2769083003: [ARM] Make Simd 128 bit load/store more like existing load/store.

Unified Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769083003: [ARM] Make Simd 128 bit load/store more like existing load/store. (Closed)

Patch Set: Created 3 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/compiler/arm/instruction-selector-arm.cc

diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc

index 6f3957183f33d3b8b8e4700deaa726c55cb68076..c29afb87cab2baeb175349beb556dccd3f7ad264 100644

--- a/src/compiler/arm/instruction-selector-arm.cc

+++ b/src/compiler/arm/instruction-selector-arm.cc

@@ -427,7 +427,7 @@ void InstructionSelector::VisitLoad(Node* node) {

opcode = kArmLdr;

break;

case MachineRepresentation::kSimd128:

- opcode = kArmSimd128Load;

+ opcode = kArmVld1S128;

break;

case MachineRepresentation::kWord64: // Fall through.

case MachineRepresentation::kSimd1x4: // Fall through.

@@ -517,7 +517,7 @@ void InstructionSelector::VisitStore(Node* node) {

opcode = kArmStr;

break;

case MachineRepresentation::kSimd128:

- opcode = kArmSimd128Store;

+ opcode = kArmVst1S128;

break;

case MachineRepresentation::kWord64: // Fall through.

case MachineRepresentation::kSimd1x4: // Fall through.

@@ -542,8 +542,8 @@ void InstructionSelector::VisitProtectedStore(Node* node) {

}

void InstructionSelector::VisitUnalignedLoad(Node* node) {

- UnalignedLoadRepresentation load_rep =

- UnalignedLoadRepresentationOf(node->op());

+ MachineRepresentation load_rep =

+ UnalignedLoadRepresentationOf(node->op()).representation();

ArmOperandGenerator g(this);

Node* base = node->InputAt(0);

Node* index = node->InputAt(1);

@@ -551,16 +551,18 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {

InstructionCode opcode = kArmLdr;

// Only floating point loads need to be specially handled; integer loads

// support unaligned access. We support unaligned FP loads by loading to

- // integer registers first, then moving to the destination FP register.

- switch (load_rep.representation()) {

+ // integer registers first, then moving to the destination FP register. If

+ // NEON is supported, we use the vld1.8 instruction.

+ switch (load_rep) {

case MachineRepresentation::kFloat32: {

InstructionOperand temp = g.TempRegister();

EmitLoad(this, opcode, &temp, base, index);

Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);

return;

}

- case MachineRepresentation::kFloat64: {

- // Compute the address of the least-significant half of the FP value.

+ case MachineRepresentation::kFloat64:

+ case MachineRepresentation::kSimd128: {

+ // Compute the address of the least-significant byte of the FP value.

// We assume that the base node is unlikely to be an encodable immediate

// or the result of a shift operation, so only consider the addressing

// mode that should be used for the index node.

@@ -585,8 +587,12 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {

if (CpuFeatures::IsSupported(NEON)) {

// With NEON we can load directly from the calculated address.

- Emit(kArmVld1F64, g.DefineAsRegister(node), addr);

+ ArchOpcode op = load_rep == MachineRepresentation::kFloat64

+ ? kArmVld1F64

+ : kArmVld1S128;

+ Emit(op, g.DefineAsRegister(node), addr);

} else {

+ DCHECK_NE(MachineRepresentation::kSimd128, load_rep);

// Load both halves and move to an FP register.

InstructionOperand fp_lo = g.TempRegister();

InstructionOperand fp_hi = g.TempRegister();

@@ -619,6 +625,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {

// Only floating point stores need to be specially handled; integer stores

// support unaligned access. We support unaligned FP stores by moving the

// value to integer registers first, then storing to the destination address.

+ // If NEON is supported, we use the vst1.8 instruction.

switch (store_rep) {

case MachineRepresentation::kFloat32: {

inputs[input_count++] = g.TempRegister();

@@ -627,7 +634,8 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {

EmitStore(this, kArmStr, input_count, inputs, index);

return;

}

- case MachineRepresentation::kFloat64: {

+ case MachineRepresentation::kFloat64:

+ case MachineRepresentation::kSimd128: {

if (CpuFeatures::IsSupported(NEON)) {

InstructionOperand address = g.TempRegister();

{

@@ -653,8 +661,12 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {

inputs[input_count++] = g.UseRegister(value);

inputs[input_count++] = address;

- Emit(kArmVst1F64, 0, nullptr, input_count, inputs);

+ ArchOpcode op = store_rep == MachineRepresentation::kFloat64

+ ? kArmVst1F64

+ : kArmVst1S128;

+ Emit(op, 0, nullptr, input_count, inputs);

} else {

+ DCHECK_NE(MachineRepresentation::kSimd128, store_rep);

// Store a 64-bit floating point value using two 32-bit integer stores.

// Computing the store address here would require three live temporary

// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after

« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »