Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(78)

Unified Diff: src/compiler/arm/instruction-selector-arm.cc

Issue 2769723003: [arm][turbofan] Use NEON for unaligned float64 memory accesses (Closed)
Patch Set: Calculate the address separately Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/compiler/arm/instruction-selector-arm.cc
diff --git a/src/compiler/arm/instruction-selector-arm.cc b/src/compiler/arm/instruction-selector-arm.cc
index fb04eb1cf44c929eebdb9899e9a25a1db8a42a44..6f3957183f33d3b8b8e4700deaa726c55cb68076 100644
--- a/src/compiler/arm/instruction-selector-arm.cc
+++ b/src/compiler/arm/instruction-selector-arm.cc
@@ -560,7 +560,6 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
return;
}
case MachineRepresentation::kFloat64: {
- // TODO(arm): use vld1.8 for this when NEON is available.
// Compute the address of the least-significant half of the FP value.
// We assume that the base node is unlikely to be an encodable immediate
// or the result of a shift operation, so only consider the addressing
@@ -572,8 +571,8 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
size_t input_count;
if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
&inputs[1])) {
- // input_count has been set by TryMatchImmediateOrShift(), so increment
- // it to account for the base register in inputs[0].
+ // input_count has been set by TryMatchImmediateOrShift(), so
+ // increment it to account for the base register in inputs[0].
input_count++;
} else {
add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
@@ -584,13 +583,18 @@ void InstructionSelector::VisitUnalignedLoad(Node* node) {
InstructionOperand addr = g.TempRegister();
Emit(add_opcode, 1, &addr, input_count, inputs);
- // Load both halves and move to an FP register.
- InstructionOperand fp_lo = g.TempRegister();
- InstructionOperand fp_hi = g.TempRegister();
- opcode |= AddressingModeField::encode(kMode_Offset_RI);
- Emit(opcode, fp_lo, addr, g.TempImmediate(0));
- Emit(opcode, fp_hi, addr, g.TempImmediate(4));
- Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
+ if (CpuFeatures::IsSupported(NEON)) {
+ // With NEON we can load directly from the calculated address.
+ Emit(kArmVld1F64, g.DefineAsRegister(node), addr);
+ } else {
+ // Load both halves and move to an FP register.
+ InstructionOperand fp_lo = g.TempRegister();
+ InstructionOperand fp_hi = g.TempRegister();
+ opcode |= AddressingModeField::encode(kMode_Offset_RI);
+ Emit(opcode, fp_lo, addr, g.TempImmediate(0));
+ Emit(opcode, fp_hi, addr, g.TempImmediate(4));
+ Emit(kArmVmovF64U32U32, g.DefineAsRegister(node), fp_lo, fp_hi);
+ }
return;
}
default:
@@ -624,30 +628,57 @@ void InstructionSelector::VisitUnalignedStore(Node* node) {
return;
}
case MachineRepresentation::kFloat64: {
- // TODO(arm): use vst1.8 for this when NEON is available.
- // Store a 64-bit floating point value using two 32-bit integer stores.
- // Computing the store address here would require three live temporary
- // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
- // storing the least-significant half of the value.
-
- // First, move the 64-bit FP value into two temporary integer registers.
- InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};
- inputs[input_count++] = g.UseRegister(value);
- Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count,
- inputs);
-
- // Store the least-significant half.
- inputs[0] = fp[0]; // Low 32-bits of FP value.
- inputs[input_count++] = g.UseRegister(base); // First store base address.
- EmitStore(this, kArmStr, input_count, inputs, index);
+ if (CpuFeatures::IsSupported(NEON)) {
+ InstructionOperand address = g.TempRegister();
+ {
+ // First we have to calculate the actual address.
+ InstructionCode add_opcode = kArmAdd;
+ InstructionOperand inputs[3];
+ inputs[0] = g.UseRegister(base);
+
+ size_t input_count;
+ if (TryMatchImmediateOrShift(this, &add_opcode, index, &input_count,
+ &inputs[1])) {
+ // input_count has been set by TryMatchImmediateOrShift(), so
+ // increment it to account for the base register in inputs[0].
+ input_count++;
+ } else {
+ add_opcode |= AddressingModeField::encode(kMode_Operand2_R);
+ inputs[1] = g.UseRegister(index);
+ input_count = 2; // Base register and index.
+ }
- // Store the most-significant half.
- InstructionOperand base4 = g.TempRegister();
- Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4,
- g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.
- inputs[0] = fp[1]; // High 32-bits of FP value.
- inputs[1] = base4; // Second store base + 4 address.
- EmitStore(this, kArmStr, input_count, inputs, index);
+ Emit(add_opcode, 1, &address, input_count, inputs);
+ }
+
+ inputs[input_count++] = g.UseRegister(value);
+ inputs[input_count++] = address;
+ Emit(kArmVst1F64, 0, nullptr, input_count, inputs);
+ } else {
+ // Store a 64-bit floating point value using two 32-bit integer stores.
+ // Computing the store address here would require three live temporary
+ // registers (fp<63:32>, fp<31:0>, address), so compute base + 4 after
+ // storing the least-significant half of the value.
+
+ // First, move the 64-bit FP value into two temporary integer registers.
+ InstructionOperand fp[] = {g.TempRegister(), g.TempRegister()};
+ inputs[input_count++] = g.UseRegister(value);
+ Emit(kArmVmovU32U32F64, arraysize(fp), fp, input_count, inputs);
+
+ // Store the least-significant half.
+ inputs[0] = fp[0]; // Low 32-bits of FP value.
+ inputs[input_count++] =
+ g.UseRegister(base); // First store base address.
+ EmitStore(this, kArmStr, input_count, inputs, index);
+
+ // Store the most-significant half.
+ InstructionOperand base4 = g.TempRegister();
+ Emit(kArmAdd | AddressingModeField::encode(kMode_Operand2_I), base4,
+ g.UseRegister(base), g.TempImmediate(4)); // Compute base + 4.
+ inputs[0] = fp[1]; // High 32-bits of FP value.
+ inputs[1] = base4; // Second store base + 4 address.
+ EmitStore(this, kArmStr, input_count, inputs, index);
+ }
return;
}
default:
« no previous file with comments | « src/compiler/arm/instruction-scheduler-arm.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698