Chromium Code Reviews| Index: src/compiler/instruction-scheduler.cc |
| diff --git a/src/compiler/instruction-scheduler.cc b/src/compiler/instruction-scheduler.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..517204a2244257ceaea6de485f8fd84df7a423a0 |
| --- /dev/null |
| +++ b/src/compiler/instruction-scheduler.cc |
| @@ -0,0 +1,197 @@ |
| +// Copyright 2015 the V8 project authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include "src/compiler/instruction-scheduler.h" |
| + |
| +namespace v8 { |
| +namespace internal { |
| +namespace compiler { |
| + |
| +const int InstructionScheduler::instruction_flags_[] = { |
|
Jarin
2015/10/26 15:12:35
I think Chrome cannot have static variables. How a
baptiste.afsa1
2015/10/27 16:00:23
Done.
|
| +#define ARCH_OPCODE_FLAGS(Name, Flags) Flags, |
| + ARCH_OPCODE_LIST(ARCH_OPCODE_FLAGS) |
| +#undef ARCH_OPCODE_FLAGS |
| +}; |
| + |
| + |
| +InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode( |
| + Zone* zone, |
| + Instruction* instr) |
| + : instr_(instr), |
| + successors_(zone), |
| + dependency_count_(0), |
| + latency_(GetInstructionLatency(instr)), |
| + total_latency_(-1), |
| + start_cycle_(-1) { |
| +} |
| + |
| + |
| +void InstructionScheduler::ScheduleGraphNode::AddSuccessor( |
| + ScheduleGraphNode* node) { |
| + successors_.push_back(node); |
| + node->dependency_count_++; |
| +} |
| + |
| + |
| +InstructionScheduler::InstructionScheduler(Zone* zone, |
| + InstructionSequence* sequence) |
| + : zone_(zone), |
| + sequence_(sequence), |
| + graph_(zone) { |
| +} |
| + |
| + |
| +void InstructionScheduler::StartBlock(RpoNumber rpo) { |
| + DCHECK(graph_.empty()); |
| + sequence()->StartBlock(rpo); |
| +} |
| + |
| + |
| +void InstructionScheduler::EndBlock(RpoNumber rpo) { |
| + ScheduleBlock(); |
| + sequence()->EndBlock(rpo); |
| + graph_.clear(); |
| +} |
| + |
| + |
| +void InstructionScheduler::AddInstruction(Instruction* instr) { |
| + ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr); |
| + |
| + for (auto node : graph_) { |
| + if (HasOperandDependency(node->instruction(), instr) || |
| + // Make sure that basic block terminators are not moved by adding them |
| + // as successor of every instruction. |
| + IsBlockTerminator(instr) || |
| + |
| + // Instructions with side effects and memory operations can't be |
| + // reordered. |
| + (HasSideEffect(node->instruction()) && HasSideEffect(instr)) || |
| + (IsLoadOperation(node->instruction()) && HasSideEffect(instr)) || |
| + (HasSideEffect(node->instruction()) && IsLoadOperation(instr)) || |
| + |
| + // These nops are used to mark a defining instruction for some live |
| + // ranges in the register allocator. They must not be moved. |
| + ((node->instruction()->arch_opcode() == kArchNop) && |
| + (node->instruction()->OutputAt(0)->IsUnallocated()) && |
| + UnallocatedOperand::cast( |
| + node->instruction()->OutputAt(0))->HasFixedRegisterPolicy())) { |
| + node->AddSuccessor(new_node); |
| + } |
|
Jarin
2015/10/27 09:02:31
This loop is worrisome: for a basic block with n i
baptiste.afsa1
2015/10/27 16:00:24
Yes, good idea. I'll fix this.
|
| + } |
| + |
| + graph_.push_back(new_node); |
| +} |
| + |
| + |
| +bool InstructionScheduler::CompareNodes(ScheduleGraphNode *node1, |
| + ScheduleGraphNode *node2) const { |
| + return node1->total_latency() > node2->total_latency(); |
| +} |
| + |
| + |
| +void InstructionScheduler::ScheduleBlock() { |
| + ZoneLinkedList<ScheduleGraphNode*> ready_list(zone()); |
| + |
| + // Compute total latency so that we can schedule the critical path first and |
| + // add nodes which don't have dependencies to the ready list. |
| + for (auto node : graph_) { |
|
Jarin
2015/10/26 15:12:35
nit: If you went from the end of the graph, then y
baptiste.afsa1
2015/10/27 16:00:24
I would also need to be able to go from a node to
|
| + if (!node->HasDependency()) { |
| + ComputeTotalLatency(node); |
| + ready_list.push_back(node); |
| + } |
| + } |
| + |
| + // Go through the ready list and schedule the instructions. |
| + int cycle = 0; |
| + while (!ready_list.empty()) { |
| + auto candidate = ready_list.end(); |
| + for (auto iterator = ready_list.begin(); iterator != ready_list.end(); |
| + ++iterator) { |
| + if (cycle >= (*iterator)->start_cycle()) { |
|
Jarin
2015/10/26 15:12:35
Could you add a comment here explaining that you o
baptiste.afsa1
2015/10/27 16:00:24
Done.
|
| + if ((candidate == ready_list.end()) || |
| + CompareNodes(*iterator, *candidate)) { |
| + candidate = iterator; |
| + } |
| + } |
| + } |
| + |
| + if (candidate != ready_list.end()) { |
| + sequence()->AddInstruction((*candidate)->instruction()); |
| + |
| + for (auto successor : (*candidate)->successors()) { |
| + successor->DropDependency(); |
| + successor->set_start_cycle( |
| + std::max(successor->start_cycle(), |
| + cycle + (*candidate)->latency())); |
| + |
| + if (!successor->HasDependency()) { |
| + ready_list.push_back(successor); |
| + } |
| + } |
| + |
| + ready_list.erase(candidate); |
| + } |
| + |
| + cycle++; |
| + } |
| +} |
| + |
| + |
| +bool InstructionScheduler::HasOperandDependency( |
| + const Instruction* instr1, const Instruction* instr2) const { |
| + for (int i = 0; i < instr1->OutputCount(); ++i) { |
| + for (int j = 0; j < instr2->InputCount(); ++j) { |
| + const InstructionOperand* output = instr1->OutputAt(i); |
| + const InstructionOperand* input = instr2->InputAt(j); |
| + |
| + if (output->IsUnallocated() && input->IsUnallocated() && |
| + (UnallocatedOperand::cast(output)->virtual_register() == |
| + UnallocatedOperand::cast(input)->virtual_register())) { |
| + return true; |
| + } |
| + |
| + if (output->IsConstant() && input->IsUnallocated() && |
| + (ConstantOperand::cast(output)->virtual_register() == |
| + UnallocatedOperand::cast(input)->virtual_register())) { |
| + return true; |
| + } |
| + } |
| + } |
| + |
| + // TODO(bafsa): Do we need to look for anti-dependencies/output-dependencies? |
|
Jarin
2015/10/26 15:12:35
I think we don't because SSA (although I am not en
baptiste.afsa1
2015/10/27 16:00:23
It was my initial thought but I wasn't too sure.
|
| + |
| + return false; |
| +} |
| + |
| + |
| +bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const { |
| + return ((instruction_flags_[instr->arch_opcode()] & kIsBlockTerminator) || |
| + (instr->flags_mode() == kFlags_branch) || |
| + // TODO(all): TurboFan throw nodes are currently turned into nops. |
| + // These nops must not be reordered when they occur at the end of a |
| + // basic block. We need to find a more reliable way to catch those. |
|
Jarin
2015/10/26 15:12:35
Why can't they be reordered? These should be unrea
baptiste.afsa1
2015/10/27 16:00:23
It triggers an assertion in the register allocator
|
| + ((instr->arch_opcode() == kArchNop) && (instr->OutputCount() == 0) && |
| + (instr->InputCount() == 0))); |
| +} |
| + |
| + |
| +void InstructionScheduler::ComputeTotalLatency(ScheduleGraphNode* node) { |
| + int max_latency = 0; |
| + |
| + for (auto successor : node->successors()) { |
| + if (successor->total_latency() == -1) { |
| + ComputeTotalLatency(successor); |
| + } |
| + |
| + if (successor->total_latency() > max_latency) { |
| + max_latency = successor->total_latency(); |
| + } |
| + } |
| + |
| + node->set_total_latency(max_latency + node->latency()); |
| +} |
| + |
| +} // namespace compiler |
| +} // namespace internal |
| +} // namespace v8 |