Chromium Code Reviews| Index: src/compiler/gap-resolver.cc |
| diff --git a/src/compiler/gap-resolver.cc b/src/compiler/gap-resolver.cc |
| index 7b04198e818f2a8bc529079b25419305e1184e42..21188795b5f440b2902e1ce5e838026e548e6067 100644 |
| --- a/src/compiler/gap-resolver.cc |
| +++ b/src/compiler/gap-resolver.cc |
| @@ -14,27 +14,119 @@ namespace compiler { |
| namespace { |
| +#define REP_BIT(rep) (1 << static_cast<int>(rep)); |
|
Jarin
2016/10/18 11:44:27
I think the semicolon at the end might be surprisi
bbudge
2016/10/19 00:32:20
Whoops. Done.
|
| + |
| +const int kFloat32Bit = REP_BIT(MachineRepresentation::kFloat32); |
| +const int kFloat64Bit = REP_BIT(MachineRepresentation::kFloat64); |
| + |
| inline bool Blocks(MoveOperands* move, InstructionOperand destination) { |
| - return move->Blocks(destination); |
| + return !move->IsEliminated() && move->source().InterferesWith(destination); |
| } |
| +// Splits a FP move between two location operands into the equivalent series of |
| +// moves between smaller sub-operands, e.g. a double move to two single moves. |
| +// This helps reduce the number of cycles that would normally occur under FP |
| +// aliasing, and makes swaps much easier to implement. |
| +MoveOperands* Split(MoveOperands* move, MachineRepresentation smaller_rep, |
| + ParallelMove* moves) { |
| + DCHECK(!kSimpleFPAliasing); |
|
Jarin
2016/10/18 11:44:27
STATIC_ASSERT?
bbudge
2016/10/19 00:32:20
Wouldn't that break non-Arm builds?
Jarin
2016/10/19 19:23:51
You are right, silly me.
|
| + const LocationOperand& src_loc = LocationOperand::cast(move->source()); |
| + const LocationOperand& dst_loc = LocationOperand::cast(move->destination()); |
| + MachineRepresentation dst_rep = dst_loc.representation(); |
| + DCHECK_NE(smaller_rep, dst_rep); |
| + auto src_kind = src_loc.location_kind(); |
| + auto dst_kind = dst_loc.location_kind(); |
| + |
| + int base = -1; |
| + int aliases = RegisterConfiguration::Turbofan()->GetAliases( |
| + dst_rep, 0, smaller_rep, &base); |
|
Jarin
2016/10/18 11:44:27
I think this code makes a lot of assumptions, whic
bbudge
2016/10/19 00:32:20
Yes, it's true I'm cheating by calling GetAliases
|
| + DCHECK(base::bits::IsPowerOfTwo32(aliases)); |
| + |
| + int src_index = -1; |
| + const int kBaseRep = static_cast<int>(MachineRepresentation::kFloat32); |
|
Jarin
2016/10/18 11:44:27
Nit: Should not this use MachineRepresentation::kF
bbudge
2016/10/19 00:32:20
Yep, but I changed the calculation to use ElementS
|
| + int slot_size = 1 << (static_cast<int>(smaller_rep) - kBaseRep); |
|
Jarin
2016/10/18 11:44:27
Is this supposed to mean (element_size / kPointerS
bbudge
2016/10/19 00:32:20
Changed to use ElementSiveLog2Of.
Jarin
2016/10/19 19:23:51
Actually, thinking about it more, I am not sure if
bbudge
2016/10/25 00:58:23
Agreed. Added DCHECK.
|
| + int src_step = 1; |
| + if (src_kind == LocationOperand::REGISTER) { |
| + src_index = src_loc.register_code() * aliases; |
| + } else { |
| + src_index = src_loc.index(); |
| + src_step = -slot_size; |
|
Jarin
2016/10/18 11:44:27
To be honest, I was a bit confused how the indexin
bbudge
2016/10/19 00:32:20
I observed that double-aligned slots on Arm are al
|
| + } |
| + int dst_index = -1; |
| + int dst_step = 1; |
| + if (dst_kind == LocationOperand::REGISTER) { |
| + dst_index = dst_loc.register_code() * aliases; |
| + } else { |
| + dst_index = dst_loc.index(); |
| + dst_step = -slot_size; |
| + } |
| -inline bool IsRedundant(MoveOperands* move) { return move->IsRedundant(); } |
| + // Reuse 'move' for the first fragment. It is not pending. |
| + move->set_source(AllocatedOperand(src_kind, smaller_rep, src_index)); |
| + move->set_destination(AllocatedOperand(dst_kind, smaller_rep, dst_index)); |
| + // Add the remaining fragment moves. |
| + for (int i = 1; i < aliases; ++i) { |
| + src_index += src_step; |
| + dst_index += dst_step; |
| + moves->AddMove(AllocatedOperand(src_kind, smaller_rep, src_index), |
| + AllocatedOperand(dst_kind, smaller_rep, dst_index)); |
| + } |
|
Jarin
2016/10/18 11:44:27
Overall, I am wondering about the benefit of movin
bbudge
2016/10/19 00:32:20
Yes, it's going to be at least 2x worse. At least
|
| + // Return the first fragment. |
| + return move; |
| +} |
| } // namespace |
| +void GapResolver::Resolve(ParallelMove* moves) { |
| + // Clear redundant moves, and collect FP move representations if aliasing |
| + // is non-simple. |
| + int reps = 0; |
| + for (size_t i = 0; i < moves->size();) { |
| + MoveOperands* move = (*moves)[i]; |
| + if (move->IsRedundant()) { |
| + (*moves)[i] = moves->back(); |
| + moves->pop_back(); |
| + continue; |
| + } |
| + i++; |
| + if (!kSimpleFPAliasing && move->destination().IsFPRegister()) { |
| + reps |= |
| + REP_BIT(LocationOperand::cast(move->destination()).representation()); |
| + } |
| + } |
| + |
| + if (!kSimpleFPAliasing) { |
| + if (reps && !base::bits::IsPowerOfTwo32(reps)) { |
| + // Start with the smallest FP moves, so we never encounter smaller moves |
| + // in |
| + // the middle of a cycle of larger moves. |
| + if ((reps & kFloat32Bit) != 0) { |
| + split_rep_ = MachineRepresentation::kFloat32; |
| + for (size_t i = 0; i < moves->size(); ++i) { |
| + auto move = (*moves)[i]; |
| + if (!move->IsEliminated() && move->destination().IsFloatRegister()) |
| + PerformMove(moves, move); |
| + } |
| + } |
| + if ((reps & kFloat64Bit) != 0) { |
| + split_rep_ = MachineRepresentation::kFloat64; |
| + for (size_t i = 0; i < moves->size(); ++i) { |
| + auto move = (*moves)[i]; |
| + if (!move->IsEliminated() && move->destination().IsDoubleRegister()) |
| + PerformMove(moves, move); |
| + } |
| + } |
| + } |
| + split_rep_ = MachineRepresentation::kSimd128; |
| + } |
| -void GapResolver::Resolve(ParallelMove* moves) const { |
| - // Clear redundant moves. |
| - auto it = |
| - std::remove_if(moves->begin(), moves->end(), std::ptr_fun(IsRedundant)); |
| - moves->erase(it, moves->end()); |
| - for (MoveOperands* move : *moves) { |
| + for (size_t i = 0; i < moves->size(); ++i) { |
| + auto move = (*moves)[i]; |
| if (!move->IsEliminated()) PerformMove(moves, move); |
| } |
| } |
| -void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
| +void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) { |
| // Each call to this function performs a move and deletes it from the move |
| // graph. We first recursively perform any move blocking this one. We mark a |
| // move as "pending" on entry to PerformMove in order to detect cycles in the |
| @@ -45,15 +137,32 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
| // Clear this move's destination to indicate a pending move. The actual |
| // destination is saved on the side. |
| - DCHECK(!move->source().IsInvalid()); // Or else it will look eliminated. |
| + InstructionOperand source = move->source(); |
| + DCHECK(!source.IsInvalid()); // Or else it will look eliminated. |
| InstructionOperand destination = move->destination(); |
| move->SetPending(); |
| + // We may need to split moves between FP locations differently. |
| + bool is_fp_loc_move = !kSimpleFPAliasing && destination.IsFPLocationOperand(); |
| + |
| // Perform a depth-first traversal of the move graph to resolve dependencies. |
| // Any unperformed, unpending move with a source the same as this one's |
| // destination blocks this one so recursively perform all such moves. |
| - for (MoveOperands* other : *moves) { |
| - if (other->Blocks(destination) && !other->IsPending()) { |
| + for (size_t i = 0; i < moves->size(); ++i) { |
| + auto other = (*moves)[i]; |
| + if (other->IsEliminated()) continue; |
| + if (other->IsPending()) continue; |
| + if (other->source().InterferesWith(destination)) { |
| + if (!kSimpleFPAliasing && is_fp_loc_move && |
| + LocationOperand::cast(other->source()).representation() > |
| + split_rep_) { |
| + // 'other' must also be an FP location move. Break it into fragments |
| + // of the same size as 'move'. 'other' is set to one of the fragments, |
| + // and the rest are appended to 'moves'. |
| + other = Split(other, split_rep_, moves); |
| + // 'other' may not block destination now. |
| + if (!other->source().InterferesWith(destination)) continue; |
| + } |
| // Though PerformMove can change any source operand in the move graph, |
| // this call cannot create a blocking move via a swap (this loop does not |
| // miss any). Assume there is a non-blocking move with source A and this |
| @@ -67,18 +176,18 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
| } |
| } |
| - // We are about to resolve this move and don't need it marked as pending, so |
| - // restore its destination. |
| - move->set_destination(destination); |
| - |
| // This move's source may have changed due to swaps to resolve cycles and so |
| // it may now be the last move in the cycle. If so remove it. |
| - InstructionOperand source = move->source(); |
| - if (source.InterferesWith(destination)) { |
| + source = move->source(); |
| + if (source.EqualsCanonicalized(destination)) { |
| move->Eliminate(); |
| return; |
| } |
| + // We are about to resolve this move and don't need it marked as pending, so |
| + // restore its destination. |
| + move->set_destination(destination); |
| + |
| // The move may be blocked on a (at most one) pending move, in which case we |
| // have a cycle. Search for such a blocking move and perform a swap to |
| // resolve it. |
| @@ -91,7 +200,6 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
| return; |
| } |
| - DCHECK((*blocker)->IsPending()); |
| // Ensure source is a register or both are stack slots, to limit swap cases. |
| if (source.IsStackSlot() || source.IsFPStackSlot()) { |
| std::swap(source, destination); |
| @@ -99,14 +207,36 @@ void GapResolver::PerformMove(ParallelMove* moves, MoveOperands* move) const { |
| assembler_->AssembleSwap(&source, &destination); |
| move->Eliminate(); |
| - // Any unperformed (including pending) move with a source of either this |
| - // move's source or destination needs to have their source changed to |
| - // reflect the state of affairs after the swap. |
| - for (MoveOperands* other : *moves) { |
| - if (other->Blocks(source)) { |
| - other->set_source(destination); |
| - } else if (other->Blocks(destination)) { |
| - other->set_source(source); |
| + // Update outstanding moves whose source may now have been moved. |
| + if (!kSimpleFPAliasing && is_fp_loc_move) { |
| + // We may have to split larger moves. |
| + for (size_t i = 0; i < moves->size(); ++i) { |
| + auto other = (*moves)[i]; |
| + if (other->IsEliminated()) continue; |
| + if (source.InterferesWith(other->source())) { |
| + if (LocationOperand::cast(other->source()).representation() > |
| + split_rep_) { |
| + other = Split(other, split_rep_, moves); |
| + if (!source.InterferesWith(other->source())) continue; |
| + } |
| + other->set_source(destination); |
| + } else if (destination.InterferesWith(other->source())) { |
| + if (LocationOperand::cast(other->source()).representation() > |
| + split_rep_) { |
| + other = Split(other, split_rep_, moves); |
| + if (!destination.InterferesWith(other->source())) continue; |
| + } |
| + other->set_source(source); |
| + } |
| + } |
| + } else { |
| + for (auto other : *moves) { |
| + if (other->IsEliminated()) continue; |
| + if (source.EqualsCanonicalized(other->source())) { |
| + other->set_source(destination); |
| + } else if (destination.EqualsCanonicalized(other->source())) { |
| + other->set_source(source); |
| + } |
| } |
| } |
| } |