Index: src/compiler/gap-resolver.cc |
diff --git a/src/compiler/gap-resolver.cc b/src/compiler/gap-resolver.cc |
index 7b04198e818f2a8bc529079b25419305e1184e42..1ba1044eabf076cba069dfc568113fe2e7687a32 100644 |
--- a/src/compiler/gap-resolver.cc |
+++ b/src/compiler/gap-resolver.cc |
@@ -14,27 +14,124 @@ namespace compiler { |
namespace { |
+#define REP_BIT(rep) (1 << static_cast<int>(rep)) |
+ |
+const int kFloat32Bit = REP_BIT(MachineRepresentation::kFloat32); |
+const int kFloat64Bit = REP_BIT(MachineRepresentation::kFloat64); |
+ |
inline bool Blocks(MoveOperands* move, InstructionOperand destination) { |
- return move->Blocks(destination); |
+ return !move->IsEliminated() && move->source().InterferesWith(destination); |
} |
+// Splits a FP move between two location operands into the equivalent series of |
+// moves between smaller sub-operands, e.g. a double move to two single moves. |
+// This helps reduce the number of cycles that would normally occur under FP |
+// aliasing, and makes swaps much easier to implement. |
+MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep, |
+ ParallelMove* moves) { |
+ DCHECK(!kSimpleFPAliasing); |
+ // Splitting is only possible when the slot size is the same as float size. |
+ DCHECK_EQ(kPointerSize, kFloatSize); |
+ const LocationOperand& src_loc = LocationOperand::cast(move->source()); |
+ const LocationOperand& dst_loc = LocationOperand::cast(move->destination()); |
+ MachineRepresentation dst_rep = dst_loc.representation(); |
+ DCHECK_NE(smaller_rep, dst_rep); |
+ auto src_kind = src_loc.location_kind(); |
+ auto dst_kind = dst_loc.location_kind(); |
+ |
+ int aliases = |
+ 1 << (ElementSizeLog2Of(dst_rep) - ElementSizeLog2Of(smaller_rep)); |
+ int base = -1; |
+ USE(base); |
+ DCHECK_EQ(aliases, RegisterConfiguration::Turbofan()->GetAliases( |
+ dst_rep, 0, smaller_rep, &base)); |
+ |
+ int src_index = -1; |
+ int slot_size = (1 << ElementSizeLog2Of(smaller_rep)) / kPointerSize; |
+ int src_step = 1; |
+ if (src_kind == LocationOperand::REGISTER) { |
+ src_index = src_loc.register_code() * aliases; |
+ } else { |
+ src_index = src_loc.index(); |
+ // For operands that occuply multiple slots, the index refers to the last |
+ // slot. On little-endian architectures, we start at the high slot and use a |
+ // negative step so that register-to-slot moves are in the correct order. |
+ src_step = -slot_size; |
+ } |
+ int dst_index = -1; |
+ int dst_step = 1; |
+ if (dst_kind == LocationOperand::REGISTER) { |
+ dst_index = dst_loc.register_code() * aliases; |
+ } else { |
+ dst_index = dst_loc.index(); |
+ dst_step = -slot_size; |
+ } |
-inline bool IsRedundant(MoveOperands* move) { return move->IsRedundant(); } |
+ // Reuse 'move' for the first fragment. It is not pending. |
+ move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index)); |
+ move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index)); |
+ // Add the remaining fragment moves. |
+ for (int i = 1; i < aliases; ++i) { |
+ src_index += src_step; |
+ dst_index += dst_step; |
+ moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index), |
+ AllocatedOperand(dst_kind, smaller_rep, dst_index)); |
+ } |
+ // Return the first fragment. |
+ return move; |
+} |
} // namespace |
+void GapResolver::Resolve(ParallelMove* moves) { |
+ // Clear redundant moves, and collect FP move representations if aliasing |
+ // is non-simple. |
+ int reps = 0; |
+ for (size_t i = 0; i < moves->size();) { |
+ MoveOperands* move = (*moves)[i]; |
+ if (move->IsRedundant()) { |
+ (*moves)[i] = moves->back(); |
+ moves->pop_back(); |
+ continue; |
+ } |
+ i++; |
+ if (!kSimpleFPAliasing && move->destination().IsFPRegister()) { |
+ reps |= |
+ REP_BIT(LocationOperand::cast(move->destination()).representation()); |
+ } |
+ } |
+ |
+ if (!kSimpleFPAliasing) { |
+ if (reps && !base::bits::IsPowerOfTwo32(reps)) { |
+ // Start with the smallest FP moves, so we never encounter smaller moves |
+ // in the middle of a cycle of larger moves. |
+ if ((reps & kFloat32Bit) != 0) { |
+ split_rep_ = MachineRepresentation::kFloat32; |
+ for (size_t i = 0; i < moves->size(); ++i) { |
+ auto move = (*moves)[i]; |
+ if (!move->IsEliminated() && move->destination().IsFloatRegister()) |
+ PerformMove(moves, move); |
+ } |
+ } |
+ if ((reps & kFloat64Bit) != 0) { |
+ split_rep_ = MachineRepresentation::kFloat64; |
+ for (size_t i = 0; i < moves->size(); ++i) { |
+ auto move = (*moves)[i]; |
+ if (!move->IsEliminated() && move->destination().IsDoubleRegister()) |
+ PerformMove(moves, move); |
+ } |
+ } |
+ } |
+ split_rep_ = MachineRepresentation::kSimd128; |
+ } |
-void GapResolver::Resolve(ParallelMove* moves) const { |
- // Clear redundant moves. |
- auto it = |
- std::remove_if(moves->begin(), moves->end(), std::ptr_fun(IsRedundant)); |
- moves->erase(it, moves->end()); |
- for (MoveOperands* move : *moves) { |
+ for (size_t i = 0; i < moves->size(); ++i) { |
+ auto move = (*moves)[i]; |
if (!move->IsEliminated()) PerformMove(moves, move); |
} |
} |
-void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
+void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) { |
// Each call to this function performs a move and deletes it from the move |
// graph. We first recursively perform any move blocking this one. We mark a |
// move as "pending" on entry to PerformMove in order to detect cycles in the |
@@ -45,15 +142,32 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
// Clear this move's destination to indicate a pending move. The actual |
// destination is saved on the side. |
- DCHECK(!move->source().IsInvalid()); // Or else it will look eliminated. |
+ InstructionOperand source = move->source(); |
+ DCHECK(!source.IsInvalid()); // Or else it will look eliminated. |
InstructionOperand destination = move->destination(); |
move->SetPending(); |
+ // We may need to split moves between FP locations differently. |
+ bool is_fp_loc_move = !kSimpleFPAliasing && destination.IsFPLocationOperand(); |
+ |
// Perform a depth-first traversal of the move graph to resolve dependencies. |
// Any unperformed, unpending move with a source the same as this one's |
// destination blocks this one so recursively perform all such moves. |
- for (MoveOperands* other : *moves) { |
- if (other->Blocks(destination) && !other->IsPending()) { |
+ for (size_t i = 0; i < moves->size(); ++i) { |
+ auto other = (*moves)[i]; |
+ if (other->IsEliminated()) continue; |
+ if (other->IsPending()) continue; |
+ if (other->source().InterferesWith(destination)) { |
+ if (!kSimpleFPAliasing && is_fp_loc_move && |
+ LocationOperand::cast(other->source()).representation() > |
+ split_rep_) { |
+ // 'other' must also be an FP location move. Break it into fragments |
+ // of the same size as 'move'. 'other' is set to one of the fragments, |
+ // and the rest are appended to 'moves'. |
+ other = Split(other, split_rep_, moves); |
+ // 'other' may not block destination now. |
+ if (!other->source().InterferesWith(destination)) continue; |
+ } |
// Though PerformMove can change any source operand in the move graph, |
// this call cannot create a blocking move via a swap (this loop does not |
// miss any). Assume there is a non-blocking move with source A and this |
@@ -67,18 +181,18 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
} |
} |
- // We are about to resolve this move and don't need it marked as pending, so |
- // restore its destination. |
- move->set_destination(destination); |
- |
// This move's source may have changed due to swaps to resolve cycles and so |
// it may now be the last move in the cycle. If so remove it. |
- InstructionOperand source = move->source(); |
- if (source.InterferesWith(destination)) { |
+ source = move->source(); |
+ if (source.EqualsCanonicalized(destination)) { |
move->Eliminate(); |
return; |
} |
+ // We are about to resolve this move and don't need it marked as pending, so |
+ // restore its destination. |
+ move->set_destination(destination); |
+ |
// The move may be blocked on a (at most one) pending move, in which case we |
// have a cycle. Search for such a blocking move and perform a swap to |
// resolve it. |
@@ -91,7 +205,6 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
return; |
} |
- DCHECK((*blocker)->IsPending()); |
// Ensure source is a register or both are stack slots, to limit swap cases. |
if (source.IsStackSlot() || source.IsFPStackSlot()) { |
std::swap(source, destination); |
@@ -99,14 +212,36 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
assembler_->AssembleSwap(&source, &destination); |
move->Eliminate(); |
- // Any unperformed (including pending) move with a source of either this |
- // move's source or destination needs to have their source changed to |
- // reflect the state of affairs after the swap. |
- for (MoveOperands* other : *moves) { |
- if (other->Blocks(source)) { |
- other->set_source(destination); |
- } else if (other->Blocks(destination)) { |
- other->set_source(source); |
+ // Update outstanding moves whose source may now have been moved. |
+ if (!kSimpleFPAliasing && is_fp_loc_move) { |
+ // We may have to split larger moves. |
+ for (size_t i = 0; i < moves->size(); ++i) { |
+ auto other = (*moves)[i]; |
+ if (other->IsEliminated()) continue; |
+ if (source.InterferesWith(other->source())) { |
+ if (LocationOperand::cast(other->source()).representation() > |
+ split_rep_) { |
+ other = Split(other, split_rep_, moves); |
+ if (!source.InterferesWith(other->source())) continue; |
+ } |
+ other->set_source(destination); |
+ } else if (destination.InterferesWith(other->source())) { |
+ if (LocationOperand::cast(other->source()).representation() > |
+ split_rep_) { |
+ other = Split(other, split_rep_, moves); |
+ if (!destination.InterferesWith(other->source())) continue; |
+ } |
+ other->set_source(source); |
+ } |
+ } |
+ } else { |
+ for (auto other : *moves) { |
+ if (other->IsEliminated()) continue; |
+ if (source.EqualsCanonicalized(other->source())) { |
+ other->set_source(destination); |
+ } else if (destination.EqualsCanonicalized(other->source())) { |
+ other->set_source(source); |
+ } |
} |
} |
} |