Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(652)

Unified Diff: src/compiler/gap-resolver.cc

Issue 2410673002: [Turbofan] Add concept of FP register aliasing on ARM 32. (Closed)
Patch Set: Move helper fn / macro into OperandSet class. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/compiler/gap-resolver.cc
diff --git a/src/compiler/gap-resolver.cc b/src/compiler/gap-resolver.cc
index 7b04198e818f2a8bc529079b25419305e1184e42..21188795b5f440b2902e1ce5e838026e548e6067 100644
--- a/src/compiler/gap-resolver.cc
+++ b/src/compiler/gap-resolver.cc
@@ -14,27 +14,119 @@ namespace compiler {
namespace {
+#define REP_BIT(rep) (1 << static_cast<int>(rep));
Jarin 2016/10/18 11:44:27 I think the semicolon at the end might be surprisi
bbudge 2016/10/19 00:32:20 Whoops. Done.
+
+const int kFloat32Bit = REP_BIT(MachineRepresentation::kFloat32);
+const int kFloat64Bit = REP_BIT(MachineRepresentation::kFloat64);
+
inline bool Blocks(MoveOperands* move, InstructionOperand destination) {
- return move->Blocks(destination);
+ return !move->IsEliminated() && move->source().InterferesWith(destination);
}
+// Splits a FP move between two location operands into the equivalent series of
+// moves between smaller sub-operands, e.g. a double move to two single moves.
+// This helps reduce the number of cycles that would normally occur under FP
+// aliasing, and makes swaps much easier to implement.
+MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep,
+ ParallelMove* moves) {
+ DCHECK(!kSimpleFPAliasing);
Jarin 2016/10/18 11:44:27 STATIC_ASSERT?
bbudge 2016/10/19 00:32:20 Wouldn't that break non-Arm builds?
Jarin 2016/10/19 19:23:51 You are right, silly me.
+ const LocationOperand& src_loc = LocationOperand::cast(move->source());
+ const LocationOperand& dst_loc = LocationOperand::cast(move->destination());
+ MachineRepresentation dst_rep = dst_loc.representation();
+ DCHECK_NE(smaller_rep, dst_rep);
+ auto src_kind = src_loc.location_kind();
+ auto dst_kind = dst_loc.location_kind();
+
+ int base = -1;
+ int aliases = RegisterConfiguration::Turbofan()->GetAliases(
+ dst_rep, 0, smaller_rep, &base);
Jarin 2016/10/18 11:44:27 I think this code makes a lot of assumptions, whic
bbudge 2016/10/19 00:32:20 Yes, it's true I'm cheating by calling GetAliases
+ DCHECK(base::bits::IsPowerOfTwo32(aliases));
+
+ int src_index = -1;
+ const int kBaseRep = static_cast<int>(MachineRepresentation::kFloat32);
Jarin 2016/10/18 11:44:27 Nit: Should not this use MachineRepresentation::kF
bbudge 2016/10/19 00:32:20 Yep, but I changed the calculation to use ElementS
+ int slot_size = 1 << (static_cast<int>(smaller_rep) - kBaseRep);
Jarin 2016/10/18 11:44:27 Is this supposed to mean (element_size / kPointerS
bbudge 2016/10/19 00:32:20 Changed to use ElementSiveLog2Of.
Jarin 2016/10/19 19:23:51 Actually, thinking about it more, I am not sure if
bbudge 2016/10/25 00:58:23 Agreed. Added DCHECK.
+ int src_step = 1;
+ if (src_kind == LocationOperand::REGISTER) {
+ src_index = src_loc.register_code() * aliases;
+ } else {
+ src_index = src_loc.index();
+ src_step = -slot_size;
Jarin 2016/10/18 11:44:27 To be honest, I was a bit confused how the indexin
bbudge 2016/10/19 00:32:20 I observed that double-aligned slots on Arm are al
+ }
+ int dst_index = -1;
+ int dst_step = 1;
+ if (dst_kind == LocationOperand::REGISTER) {
+ dst_index = dst_loc.register_code() * aliases;
+ } else {
+ dst_index = dst_loc.index();
+ dst_step = -slot_size;
+ }
-inline bool IsRedundant(MoveOperands* move) { return move->IsRedundant(); }
+ // Reuse 'move' for the first fragment. It is not pending.
+ move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index));
+ move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index));
+ // Add the remaining fragment moves.
+ for (int i = 1; i < aliases; ++i) {
+ src_index += src_step;
+ dst_index += dst_step;
+ moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index),
+ AllocatedOperand(dst_kind, smaller_rep, dst_index));
+ }
Jarin 2016/10/18 11:44:27 Overall, I am wondering about the benefit of movin
bbudge 2016/10/19 00:32:20 Yes, it's going to be at least 2x worse. At least
+ // Return the first fragment.
+ return move;
+}
} // namespace
+void GapResolver::Resolve(ParallelMove* moves) {
+ // Clear redundant moves, and collect FP move representations if aliasing
+ // is non-simple.
+ int reps = 0;
+ for (size_t i = 0; i < moves->size();) {
+ MoveOperands* move = (*moves)[i];
+ if (move->IsRedundant()) {
+ (*moves)[i] = moves->back();
+ moves->pop_back();
+ continue;
+ }
+ i++;
+ if (!kSimpleFPAliasing && move->destination().IsFPRegister()) {
+ reps |=
+ REP_BIT(LocationOperand::cast(move->destination()).representation());
+ }
+ }
+
+ if (!kSimpleFPAliasing) {
+ if (reps && !base::bits::IsPowerOfTwo32(reps)) {
+ // Start with the smallest FP moves, so we never encounter smaller moves
+ // in
+ // the middle of a cycle of larger moves.
+ if ((reps & kFloat32Bit) != 0) {
+ split_rep_ = MachineRepresentation::kFloat32;
+ for (size_t i = 0; i < moves->size(); ++i) {
+ auto move = (*moves)[i];
+ if (!move->IsEliminated() && move->destination().IsFloatRegister())
+ PerformMove(moves, move);
+ }
+ }
+ if ((reps & kFloat64Bit) != 0) {
+ split_rep_ = MachineRepresentation::kFloat64;
+ for (size_t i = 0; i < moves->size(); ++i) {
+ auto move = (*moves)[i];
+ if (!move->IsEliminated() && move->destination().IsDoubleRegister())
+ PerformMove(moves, move);
+ }
+ }
+ }
+ split_rep_ = MachineRepresentation::kSimd128;
+ }
-void GapResolver::Resolve(ParallelMove* moves) const {
- // Clear redundant moves.
- auto it =
- std::remove_if(moves->begin(), moves->end(), std::ptr_fun(IsRedundant));
- moves->erase(it, moves->end());
- for (MoveOperands* move : *moves) {
+ for (size_t i = 0; i < moves->size(); ++i) {
+ auto move = (*moves)[i];
if (!move->IsEliminated()) PerformMove(moves, move);
}
}
-void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
+void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) {
// Each call to this function performs a move and deletes it from the move
// graph. We first recursively perform any move blocking this one. We mark a
// move as "pending" on entry to PerformMove in order to detect cycles in the
@@ -45,15 +137,32 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
// Clear this move's destination to indicate a pending move. The actual
// destination is saved on the side.
- DCHECK(!move->source().IsInvalid()); // Or else it will look eliminated.
+ InstructionOperand source = move->source();
+ DCHECK(!source.IsInvalid()); // Or else it will look eliminated.
InstructionOperand destination = move->destination();
move->SetPending();
+ // We may need to split moves between FP locations differently.
+ bool is_fp_loc_move = !kSimpleFPAliasing && destination.IsFPLocationOperand();
+
// Perform a depth-first traversal of the move graph to resolve dependencies.
// Any unperformed, unpending move with a source the same as this one's
// destination blocks this one so recursively perform all such moves.
- for (MoveOperands* other : *moves) {
- if (other->Blocks(destination) && !other->IsPending()) {
+ for (size_t i = 0; i < moves->size(); ++i) {
+ auto other = (*moves)[i];
+ if (other->IsEliminated()) continue;
+ if (other->IsPending()) continue;
+ if (other->source().InterferesWith(destination)) {
+ if (!kSimpleFPAliasing && is_fp_loc_move &&
+ LocationOperand::cast(other->source()).representation() >
+ split_rep_) {
+ // 'other' must also be an FP location move. Break it into fragments
+ // of the same size as 'move'. 'other' is set to one of the fragments,
+ // and the rest are appended to 'moves'.
+ other = Split(other, split_rep_, moves);
+ // 'other' may not block destination now.
+ if (!other->source().InterferesWith(destination)) continue;
+ }
// Though PerformMove can change any source operand in the move graph,
// this call cannot create a blocking move via a swap (this loop does not
// miss any). Assume there is a non-blocking move with source A and this
@@ -67,18 +176,18 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
}
}
- // We are about to resolve this move and don't need it marked as pending, so
- // restore its destination.
- move->set_destination(destination);
-
// This move's source may have changed due to swaps to resolve cycles and so
// it may now be the last move in the cycle. If so remove it.
- InstructionOperand source = move->source();
- if (source.InterferesWith(destination)) {
+ source = move->source();
+ if (source.EqualsCanonicalized(destination)) {
move->Eliminate();
return;
}
+ // We are about to resolve this move and don't need it marked as pending, so
+ // restore its destination.
+ move->set_destination(destination);
+
// The move may be blocked on a (at most one) pending move, in which case we
// have a cycle. Search for such a blocking move and perform a swap to
// resolve it.
@@ -91,7 +200,6 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
return;
}
- DCHECK((*blocker)->IsPending());
// Ensure source is a register or both are stack slots, to limit swap cases.
if (source.IsStackSlot() || source.IsFPStackSlot()) {
std::swap(source, destination);
@@ -99,14 +207,36 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const {
assembler_->AssembleSwap(&source, &destination);
move->Eliminate();
- // Any unperformed (including pending) move with a source of either this
- // move's source or destination needs to have their source changed to
- // reflect the state of affairs after the swap.
- for (MoveOperands* other : *moves) {
- if (other->Blocks(source)) {
- other->set_source(destination);
- } else if (other->Blocks(destination)) {
- other->set_source(source);
+ // Update outstanding moves whose source may now have been moved.
+ if (!kSimpleFPAliasing && is_fp_loc_move) {
+ // We may have to split larger moves.
+ for (size_t i = 0; i < moves->size(); ++i) {
+ auto other = (*moves)[i];
+ if (other->IsEliminated()) continue;
+ if (source.InterferesWith(other->source())) {
+ if (LocationOperand::cast(other->source()).representation() >
+ split_rep_) {
+ other = Split(other, split_rep_, moves);
+ if (!source.InterferesWith(other->source())) continue;
+ }
+ other->set_source(destination);
+ } else if (destination.InterferesWith(other->source())) {
+ if (LocationOperand::cast(other->source()).representation() >
+ split_rep_) {
+ other = Split(other, split_rep_, moves);
+ if (!destination.InterferesWith(other->source())) continue;
+ }
+ other->set_source(source);
+ }
+ }
+ } else {
+ for (auto other : *moves) {
+ if (other->IsEliminated()) continue;
+ if (source.EqualsCanonicalized(other->source())) {
+ other->set_source(destination);
+ } else if (destination.EqualsCanonicalized(other->source())) {
+ other->set_source(source);
+ }
}
}
}

Powered by Google App Engine
This is Rietveld 408576698