Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Unified Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769083003: [ARM] Make Simd 128 bit load/store more like existing load/store. (Closed)
Patch Set: Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/compiler/arm/instruction-selector-arm.cc
diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc
index 6f3957183f33d3b8b8e4700deaa726c55cb68076..c29afb87cab2baeb175349beb556dccd3f7ad264 100644
--- a/src/compiler/arm/instruction-selector-arm.cc
+++ b/src/compiler/arm/instruction-selector-arm.cc
@@ -427,7 +427,7 @@ void InstructionSelector::VisitLoad(Node* node) {
opcode = kArmLdr;
break;
case MachineRepresentation::kSimd128:
- opcode = kArmSimd128Load;
+ opcode = kArmVld1S128;
break;
case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd1x4: // Fall through.
@@ -517,7 +517,7 @@ void InstructionSelector::VisitStore(Node* node) {
opcode = kArmStr;
break;
case MachineRepresentation::kSimd128:
- opcode = kArmSimd128Store;
+ opcode = kArmVst1S128;
break;
case MachineRepresentation::kWord64: // Fall through.
case MachineRepresentation::kSimd1x4: // Fall through.
@@ -542,8 +542,8 @@ void InstructionSelector::VisitProtectedStore(Node* node) {
}
void InstructionSelector::VisitUnalignedLoad(Node* node) {
- UnalignedLoadRepresentation load_rep =
- UnalignedLoadRepresentationOf(node->op());
+ MachineRepresentation load_rep =
+ UnalignedLoadRepresentationOf(node->op()).representation();
ArmOperandGenerator g(this);
Node* base = node->InputAt(0);
Node* index = node->InputAt(1);
@@ -551,16 +551,18 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
InstructionCode opcode = kArmLdr;
// Only floating point loads need to be specially handled; integer loads
// support unaligned access. We support unaligned FP loads by loading to
- // integer registers first, then moving to the destination FP register.
- switch (load_rep.representation()) {
+ // integer registers first, then moving to the destination FP register. If
+ // NEON is supported, we use the vld1.8 instruction.
+ switch (load_rep) {
case MachineRepresentation::kFloat32: {
InstructionOperand temp = g.TempRegister();
EmitLoad(this, opcode, &temp, base, index);
Emit(kArmVmovF32U32, g.DefineAsRegister(node), temp);
return;
}
- case MachineRepresentation::kFloat64: {
- // Compute the address of the least-significant half of the FP value.
+ case MachineRepresentation::kFloat64:
+ case MachineRepresentation::kSimd128: {
+ // Compute the address of the least-significant byte of the FP value.
// We assume that the base node is unlikely to be an encodable immediate
// or the result of a shift operation, so only consider the addressing
// mode that should be used for the index node.
@@ -585,8 +587,12 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
if (CpuFeatures::IsSupported(NEON)) {
// With NEON we can load directly from the calculated address.
- Emit(kArmVld1F64, g.DefineAsRegister(node), addr);
+ ArchOpcode op = load_rep == MachineRepresentation::kFloat64
+ ? kArmVld1F64
+ : kArmVld1S128;
+ Emit(op, g.DefineAsRegister(node), addr);
} else {
+ DCHECK_NE(MachineRepresentation::kSimd128, load_rep);
// Load both halves and move to an FP register.
InstructionOperand fp_lo = g.TempRegister();
InstructionOperand fp_hi = g.TempRegister();
@@ -619,6 +625,7 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
// Only floating point stores need to be specially handled; integer stores
// support unaligned access. We support unaligned FP stores by moving the
// value to integer registers first, then storing to the destination address.
+ // If NEON is supported, we use the vst1.8 instruction.
switch (store_rep) {
case MachineRepresentation::kFloat32: {
inputs[input_count++] = g.TempRegister();
@@ -627,7 +634,8 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
EmitStore(this, kArmStr, input_count, inputs, index);
return;
}
- case MachineRepresentation::kFloat64: {
+ case MachineRepresentation::kFloat64:
+ case MachineRepresentation::kSimd128: {
if (CpuFeatures::IsSupported(NEON)) {
InstructionOperand address = g.TempRegister();
{
@@ -653,8 +661,12 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
inputs[input_count++] = g.UseRegister(value);
inputs[input_count++] = address;
- Emit(kArmVst1F64, 0, nullptr, input_count, inputs);
+ ArchOpcode op = store_rep == MachineRepresentation::kFloat64
+ ? kArmVst1F64
+ : kArmVst1S128;
+ Emit(op, 0, nullptr, input_count, inputs);
} else {
+ DCHECK_NE(MachineRepresentation::kSimd128, store_rep);
// Store a 64-bit floating point value using two 32-bit integer stores.
// Computing the store address here would require three live temporary
// registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698