| OLD | NEW |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 #include "vm/flow_graph_allocator.h" | 5 #include "vm/flow_graph_allocator.h" |
| 6 | 6 |
| 7 #include "vm/bit_vector.h" | 7 #include "vm/bit_vector.h" |
| 8 #include "vm/intermediate_language.h" | 8 #include "vm/intermediate_language.h" |
| 9 #include "vm/il_printer.h" | 9 #include "vm/il_printer.h" |
| 10 #include "vm/flow_graph.h" | 10 #include "vm/flow_graph.h" |
| 11 #include "vm/flow_graph_compiler.h" | 11 #include "vm/flow_graph_compiler.h" |
| (...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 65 reaching_defs_(flow_graph), | 65 reaching_defs_(flow_graph), |
| 66 mint_values_(NULL), | 66 mint_values_(NULL), |
| 67 block_order_(flow_graph.reverse_postorder()), | 67 block_order_(flow_graph.reverse_postorder()), |
| 68 postorder_(flow_graph.postorder()), | 68 postorder_(flow_graph.postorder()), |
| 69 live_out_(block_order_.length()), | 69 live_out_(block_order_.length()), |
| 70 kill_(block_order_.length()), | 70 kill_(block_order_.length()), |
| 71 live_in_(block_order_.length()), | 71 live_in_(block_order_.length()), |
| 72 vreg_count_(flow_graph.max_virtual_register_number()), | 72 vreg_count_(flow_graph.max_virtual_register_number()), |
| 73 live_ranges_(flow_graph.max_virtual_register_number()), | 73 live_ranges_(flow_graph.max_virtual_register_number()), |
| 74 cpu_regs_(), | 74 cpu_regs_(), |
| 75 xmm_regs_(), | 75 fpu_regs_(), |
| 76 blocked_cpu_registers_(), | 76 blocked_cpu_registers_(), |
| 77 blocked_xmm_registers_(), | 77 blocked_fpu_registers_(), |
| 78 cpu_spill_slot_count_(0) { | 78 cpu_spill_slot_count_(0) { |
| 79 for (intptr_t i = 0; i < vreg_count_; i++) live_ranges_.Add(NULL); | 79 for (intptr_t i = 0; i < vreg_count_; i++) live_ranges_.Add(NULL); |
| 80 | 80 |
| 81 blocked_cpu_registers_[CTX] = true; | 81 blocked_cpu_registers_[CTX] = true; |
| 82 if (TMP != kNoRegister) { | 82 if (TMP != kNoRegister) { |
| 83 blocked_cpu_registers_[TMP] = true; | 83 blocked_cpu_registers_[TMP] = true; |
| 84 } | 84 } |
| 85 blocked_cpu_registers_[SPREG] = true; | 85 blocked_cpu_registers_[SPREG] = true; |
| 86 blocked_cpu_registers_[FPREG] = true; | 86 blocked_cpu_registers_[FPREG] = true; |
| 87 | 87 |
| 88 // XMM0 is used as scratch by optimized code and parallel move resolver. | 88 // FpuTMP is used as scratch by optimized code and parallel move resolver. |
| 89 blocked_xmm_registers_[XMM0] = true; | 89 blocked_fpu_registers_[FpuTMP] = true; |
| 90 } | 90 } |
| 91 | 91 |
| 92 | 92 |
| 93 // Remove environments from the instructions which can't deoptimize. | 93 // Remove environments from the instructions which can't deoptimize. |
| 94 // Replace dead phis uses with null values in environments. | 94 // Replace dead phis uses with null values in environments. |
| 95 void FlowGraphAllocator::EliminateEnvironmentUses() { | 95 void FlowGraphAllocator::EliminateEnvironmentUses() { |
| 96 ConstantInstr* constant_null = | 96 ConstantInstr* constant_null = |
| 97 postorder_.Last()->AsGraphEntry()->constant_null(); | 97 postorder_.Last()->AsGraphEntry()->constant_null(); |
| 98 for (intptr_t i = 0; i < block_order_.length(); ++i) { | 98 for (intptr_t i = 0; i < block_order_.length(); ++i) { |
| 99 BlockEntryInstr* block = block_order_[i]; | 99 BlockEntryInstr* block = block_order_[i]; |
| (...skipping 342 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 442 blocking_ranges[loc.register_code()]->AddUseInterval(from, to); | 442 blocking_ranges[loc.register_code()]->AddUseInterval(from, to); |
| 443 } | 443 } |
| 444 | 444 |
| 445 | 445 |
| 446 // Block location from the start of the instruction to its end. | 446 // Block location from the start of the instruction to its end. |
| 447 void FlowGraphAllocator::BlockLocation(Location loc, | 447 void FlowGraphAllocator::BlockLocation(Location loc, |
| 448 intptr_t from, | 448 intptr_t from, |
| 449 intptr_t to) { | 449 intptr_t to) { |
| 450 if (loc.IsRegister()) { | 450 if (loc.IsRegister()) { |
| 451 BlockRegisterLocation(loc, from, to, blocked_cpu_registers_, cpu_regs_); | 451 BlockRegisterLocation(loc, from, to, blocked_cpu_registers_, cpu_regs_); |
| 452 } else if (loc.IsXmmRegister()) { | 452 } else if (loc.IsFpuRegister()) { |
| 453 BlockRegisterLocation(loc, from, to, blocked_xmm_registers_, xmm_regs_); | 453 BlockRegisterLocation(loc, from, to, blocked_fpu_registers_, fpu_regs_); |
| 454 } else { | 454 } else { |
| 455 UNREACHABLE(); | 455 UNREACHABLE(); |
| 456 } | 456 } |
| 457 } | 457 } |
| 458 | 458 |
| 459 | 459 |
| 460 void LiveRange::Print() { | 460 void LiveRange::Print() { |
| 461 if (first_use_interval() == NULL) { | 461 if (first_use_interval() == NULL) { |
| 462 return; | 462 return; |
| 463 } | 463 } |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 622 ConvertAllUses(range); | 622 ConvertAllUses(range); |
| 623 | 623 |
| 624 if (defn->IsParameter() && flow_graph_.num_copied_params() > 0) { | 624 if (defn->IsParameter() && flow_graph_.num_copied_params() > 0) { |
| 625 MarkAsObjectAtSafepoints(range); | 625 MarkAsObjectAtSafepoints(range); |
| 626 } | 626 } |
| 627 } | 627 } |
| 628 } | 628 } |
| 629 | 629 |
| 630 | 630 |
| 631 static Location::Kind RegisterKindFromPolicy(Location loc) { | 631 static Location::Kind RegisterKindFromPolicy(Location loc) { |
| 632 if (loc.policy() == Location::kRequiresXmmRegister) { | 632 if (loc.policy() == Location::kRequiresFpuRegister) { |
| 633 return Location::kXmmRegister; | 633 return Location::kFpuRegister; |
| 634 } else { | 634 } else { |
| 635 return Location::kRegister; | 635 return Location::kRegister; |
| 636 } | 636 } |
| 637 } | 637 } |
| 638 | 638 |
| 639 | 639 |
| 640 static Location::Kind RegisterKindForResult(Instruction* instr) { | 640 static Location::Kind RegisterKindForResult(Instruction* instr) { |
| 641 if ((instr->representation() == kUnboxedDouble) || | 641 if ((instr->representation() == kUnboxedDouble) || |
| 642 (instr->representation() == kUnboxedMint)) { | 642 (instr->representation() == kUnboxedMint)) { |
| 643 return Location::kXmmRegister; | 643 return Location::kFpuRegister; |
| 644 } else { | 644 } else { |
| 645 return Location::kRegister; | 645 return Location::kRegister; |
| 646 } | 646 } |
| 647 } | 647 } |
| 648 | 648 |
| 649 | 649 |
| 650 // | 650 // |
| 651 // When describing shape of live ranges in comments below we are going to use | 651 // When describing shape of live ranges in comments below we are going to use |
| 652 // the following notation: | 652 // the following notation: |
| 653 // | 653 // |
| (...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 971 // i i' | 971 // i i' |
| 972 // [--) | 972 // [--) |
| 973 // | 973 // |
| 974 // The stack bitmap describes the position i. | 974 // The stack bitmap describes the position i. |
| 975 for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; reg++) { | 975 for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; reg++) { |
| 976 BlockLocation(Location::RegisterLocation(static_cast<Register>(reg)), | 976 BlockLocation(Location::RegisterLocation(static_cast<Register>(reg)), |
| 977 pos, | 977 pos, |
| 978 pos + 1); | 978 pos + 1); |
| 979 } | 979 } |
| 980 | 980 |
| 981 for (intptr_t reg = 0; reg < kNumberOfXmmRegisters; reg++) { | 981 for (intptr_t reg = 0; reg < kNumberOfFpuRegisters; reg++) { |
| 982 Location::Representation ignored = Location::kDouble; | 982 Location::Representation ignored = Location::kDouble; |
| 983 BlockLocation( | 983 BlockLocation( |
| 984 Location::XmmRegisterLocation(static_cast<XmmRegister>(reg), ignored), | 984 Location::FpuRegisterLocation(static_cast<FpuRegister>(reg), ignored), |
| 985 pos, | 985 pos, |
| 986 pos + 1); | 986 pos + 1); |
| 987 } | 987 } |
| 988 | 988 |
| 989 | 989 |
| 990 #if defined(DEBUG) | 990 #if defined(DEBUG) |
| 991 // Verify that temps, inputs and output were specified as fixed | 991 // Verify that temps, inputs and output were specified as fixed |
| 992 // locations. Every register is blocked now so attempt to | 992 // locations. Every register is blocked now so attempt to |
| 993 // allocate will not succeed. | 993 // allocate will not succeed. |
| 994 for (intptr_t j = 0; j < locs->temp_count(); j++) { | 994 for (intptr_t j = 0; j < locs->temp_count(); j++) { |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1062 range->AddHintedUse(pos + 1, move->dest_slot(), out); | 1062 range->AddHintedUse(pos + 1, move->dest_slot(), out); |
| 1063 } else if (output_same_as_first_input) { | 1063 } else if (output_same_as_first_input) { |
| 1064 // Output register will contain a value of the first input at instruction's | 1064 // Output register will contain a value of the first input at instruction's |
| 1065 // start. Expected shape of live ranges: | 1065 // start. Expected shape of live ranges: |
| 1066 // | 1066 // |
| 1067 // i i' | 1067 // i i' |
| 1068 // input #0 --* | 1068 // input #0 --* |
| 1069 // output [---- | 1069 // output [---- |
| 1070 // | 1070 // |
| 1071 ASSERT(locs->in(0).Equals(Location::RequiresRegister()) || | 1071 ASSERT(locs->in(0).Equals(Location::RequiresRegister()) || |
| 1072 locs->in(0).Equals(Location::RequiresXmmRegister())); | 1072 locs->in(0).Equals(Location::RequiresFpuRegister())); |
| 1073 | 1073 |
| 1074 // Create move that will copy value between input and output. | 1074 // Create move that will copy value between input and output. |
| 1075 locs->set_out(Location::RequiresRegister()); | 1075 locs->set_out(Location::RequiresRegister()); |
| 1076 MoveOperands* move = AddMoveAt(pos, | 1076 MoveOperands* move = AddMoveAt(pos, |
| 1077 Location::RequiresRegister(), | 1077 Location::RequiresRegister(), |
| 1078 Location::Any()); | 1078 Location::Any()); |
| 1079 | 1079 |
| 1080 // Add uses to the live range of the input. | 1080 // Add uses to the live range of the input. |
| 1081 Definition* input = current->InputAt(0)->definition(); | 1081 Definition* input = current->InputAt(0)->definition(); |
| 1082 LiveRange* input_range = | 1082 LiveRange* input_range = |
| (...skipping 14 matching lines...) Expand all Loading... |
| 1097 interference_set->Add(input->ssa_temp_index()); | 1097 interference_set->Add(input->ssa_temp_index()); |
| 1098 } | 1098 } |
| 1099 } else { | 1099 } else { |
| 1100 // Normal unallocated location that requires a register. Expected shape of | 1100 // Normal unallocated location that requires a register. Expected shape of |
| 1101 // live range: | 1101 // live range: |
| 1102 // | 1102 // |
| 1103 // i i' | 1103 // i i' |
| 1104 // output [------- | 1104 // output [------- |
| 1105 // | 1105 // |
| 1106 ASSERT(locs->out().Equals(Location::RequiresRegister()) || | 1106 ASSERT(locs->out().Equals(Location::RequiresRegister()) || |
| 1107 locs->out().Equals(Location::RequiresXmmRegister())); | 1107 locs->out().Equals(Location::RequiresFpuRegister())); |
| 1108 | 1108 |
| 1109 // Shorten live range to the point of definition and add use to be filled by | 1109 // Shorten live range to the point of definition and add use to be filled by |
| 1110 // allocator. | 1110 // allocator. |
| 1111 range->DefineAt(pos); | 1111 range->DefineAt(pos); |
| 1112 range->AddUse(pos, out); | 1112 range->AddUse(pos, out); |
| 1113 } | 1113 } |
| 1114 | 1114 |
| 1115 AssignSafepoints(range); | 1115 AssignSafepoints(range); |
| 1116 CompleteRange(range, RegisterKindForResult(current)); | 1116 CompleteRange(range, RegisterKindForResult(current)); |
| 1117 } | 1117 } |
| (...skipping 190 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1308 } | 1308 } |
| 1309 | 1309 |
| 1310 | 1310 |
| 1311 UsePosition* AllocationFinger::FirstRegisterUse(intptr_t after) { | 1311 UsePosition* AllocationFinger::FirstRegisterUse(intptr_t after) { |
| 1312 for (UsePosition* use = FirstUseAfter(first_register_use_, after); | 1312 for (UsePosition* use = FirstUseAfter(first_register_use_, after); |
| 1313 use != NULL; | 1313 use != NULL; |
| 1314 use = use->next()) { | 1314 use = use->next()) { |
| 1315 Location* loc = use->location_slot(); | 1315 Location* loc = use->location_slot(); |
| 1316 if (loc->IsUnallocated() && | 1316 if (loc->IsUnallocated() && |
| 1317 ((loc->policy() == Location::kRequiresRegister) || | 1317 ((loc->policy() == Location::kRequiresRegister) || |
| 1318 (loc->policy() == Location::kRequiresXmmRegister))) { | 1318 (loc->policy() == Location::kRequiresFpuRegister))) { |
| 1319 first_register_use_ = use; | 1319 first_register_use_ = use; |
| 1320 return use; | 1320 return use; |
| 1321 } | 1321 } |
| 1322 } | 1322 } |
| 1323 return NULL; | 1323 return NULL; |
| 1324 } | 1324 } |
| 1325 | 1325 |
| 1326 | 1326 |
| 1327 UsePosition* AllocationFinger::FirstRegisterBeneficialUse(intptr_t after) { | 1327 UsePosition* AllocationFinger::FirstRegisterBeneficialUse(intptr_t after) { |
| 1328 for (UsePosition* use = FirstUseAfter(first_register_beneficial_use_, after); | 1328 for (UsePosition* use = FirstUseAfter(first_register_beneficial_use_, after); |
| (...skipping 412 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1741 | 1741 |
| 1742 // We have a very good candidate (either hinted to us or completely free). | 1742 // We have a very good candidate (either hinted to us or completely free). |
| 1743 // If we are in a loop try to reduce number of moves on the back edge by | 1743 // If we are in a loop try to reduce number of moves on the back edge by |
| 1744 // searching for a candidate that does not interfere with phis on the back | 1744 // searching for a candidate that does not interfere with phis on the back |
| 1745 // edge. | 1745 // edge. |
| 1746 BlockInfo* loop_header = BlockInfoAt(unallocated->Start())->loop_header(); | 1746 BlockInfo* loop_header = BlockInfoAt(unallocated->Start())->loop_header(); |
| 1747 if ((unallocated->vreg() >= 0) && | 1747 if ((unallocated->vreg() >= 0) && |
| 1748 (loop_header != NULL) && | 1748 (loop_header != NULL) && |
| 1749 (free_until >= loop_header->last_block()->end_pos()) && | 1749 (free_until >= loop_header->last_block()->end_pos()) && |
| 1750 loop_header->backedge_interference()->Contains(unallocated->vreg())) { | 1750 loop_header->backedge_interference()->Contains(unallocated->vreg())) { |
| 1751 ASSERT(static_cast<intptr_t>(kNumberOfXmmRegisters) <= | 1751 ASSERT(static_cast<intptr_t>(kNumberOfFpuRegisters) <= |
| 1752 kNumberOfCpuRegisters); | 1752 kNumberOfCpuRegisters); |
| 1753 bool used_on_backedge[kNumberOfCpuRegisters] = { false }; | 1753 bool used_on_backedge[kNumberOfCpuRegisters] = { false }; |
| 1754 | 1754 |
| 1755 for (PhiIterator it(loop_header->entry()->AsJoinEntry()); | 1755 for (PhiIterator it(loop_header->entry()->AsJoinEntry()); |
| 1756 !it.Done(); | 1756 !it.Done(); |
| 1757 it.Advance()) { | 1757 it.Advance()) { |
| 1758 PhiInstr* phi = it.Current(); | 1758 PhiInstr* phi = it.Current(); |
| 1759 if (phi->is_alive()) { | 1759 if (phi->is_alive()) { |
| 1760 const intptr_t phi_vreg = phi->ssa_temp_index(); | 1760 const intptr_t phi_vreg = phi->ssa_temp_index(); |
| 1761 LiveRange* range = GetLiveRange(phi_vreg); | 1761 LiveRange* range = GetLiveRange(phi_vreg); |
| (...skipping 430 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2192 AddToSortedListOfRanges(&unallocated_, range); | 2192 AddToSortedListOfRanges(&unallocated_, range); |
| 2193 } | 2193 } |
| 2194 | 2194 |
| 2195 | 2195 |
| 2196 void FlowGraphAllocator::CompleteRange(LiveRange* range, Location::Kind kind) { | 2196 void FlowGraphAllocator::CompleteRange(LiveRange* range, Location::Kind kind) { |
| 2197 switch (kind) { | 2197 switch (kind) { |
| 2198 case Location::kRegister: | 2198 case Location::kRegister: |
| 2199 AddToSortedListOfRanges(&unallocated_cpu_, range); | 2199 AddToSortedListOfRanges(&unallocated_cpu_, range); |
| 2200 break; | 2200 break; |
| 2201 | 2201 |
| 2202 case Location::kXmmRegister: | 2202 case Location::kFpuRegister: |
| 2203 AddToSortedListOfRanges(&unallocated_xmm_, range); | 2203 AddToSortedListOfRanges(&unallocated_xmm_, range); |
| 2204 break; | 2204 break; |
| 2205 | 2205 |
| 2206 default: | 2206 default: |
| 2207 UNREACHABLE(); | 2207 UNREACHABLE(); |
| 2208 } | 2208 } |
| 2209 } | 2209 } |
| 2210 | 2210 |
| 2211 | 2211 |
| 2212 #if defined(DEBUG) | 2212 #if defined(DEBUG) |
| (...skipping 263 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2476 PrepareForAllocation(Location::kRegister, | 2476 PrepareForAllocation(Location::kRegister, |
| 2477 kNumberOfCpuRegisters, | 2477 kNumberOfCpuRegisters, |
| 2478 unallocated_cpu_, | 2478 unallocated_cpu_, |
| 2479 cpu_regs_, | 2479 cpu_regs_, |
| 2480 blocked_cpu_registers_); | 2480 blocked_cpu_registers_); |
| 2481 AllocateUnallocatedRanges(); | 2481 AllocateUnallocatedRanges(); |
| 2482 | 2482 |
| 2483 cpu_spill_slot_count_ = spill_slots_.length(); | 2483 cpu_spill_slot_count_ = spill_slots_.length(); |
| 2484 spill_slots_.Clear(); | 2484 spill_slots_.Clear(); |
| 2485 | 2485 |
| 2486 PrepareForAllocation(Location::kXmmRegister, | 2486 PrepareForAllocation(Location::kFpuRegister, |
| 2487 kNumberOfXmmRegisters, | 2487 kNumberOfFpuRegisters, |
| 2488 unallocated_xmm_, | 2488 unallocated_xmm_, |
| 2489 xmm_regs_, | 2489 fpu_regs_, |
| 2490 blocked_xmm_registers_); | 2490 blocked_fpu_registers_); |
| 2491 AllocateUnallocatedRanges(); | 2491 AllocateUnallocatedRanges(); |
| 2492 | 2492 |
| 2493 ResolveControlFlow(); | 2493 ResolveControlFlow(); |
| 2494 | 2494 |
| 2495 GraphEntryInstr* entry = block_order_[0]->AsGraphEntry(); | 2495 GraphEntryInstr* entry = block_order_[0]->AsGraphEntry(); |
| 2496 ASSERT(entry != NULL); | 2496 ASSERT(entry != NULL); |
| 2497 intptr_t double_spill_slot_count = | 2497 intptr_t double_spill_slot_count = |
| 2498 spill_slots_.length() * kDoubleSpillSlotFactor; | 2498 spill_slots_.length() * kDoubleSpillSlotFactor; |
| 2499 entry->set_spill_slot_count(cpu_spill_slot_count_ + double_spill_slot_count); | 2499 entry->set_spill_slot_count(cpu_spill_slot_count_ + double_spill_slot_count); |
| 2500 | 2500 |
| 2501 if (FLAG_print_ssa_liveranges) { | 2501 if (FLAG_print_ssa_liveranges) { |
| 2502 const Function& function = flow_graph_.parsed_function().function(); | 2502 const Function& function = flow_graph_.parsed_function().function(); |
| 2503 | 2503 |
| 2504 OS::Print("-- [after ssa allocator] ranges [%s] ---------\n", | 2504 OS::Print("-- [after ssa allocator] ranges [%s] ---------\n", |
| 2505 function.ToFullyQualifiedCString()); | 2505 function.ToFullyQualifiedCString()); |
| 2506 PrintLiveRanges(); | 2506 PrintLiveRanges(); |
| 2507 OS::Print("----------------------------------------------\n"); | 2507 OS::Print("----------------------------------------------\n"); |
| 2508 | 2508 |
| 2509 OS::Print("-- [after ssa allocator] ir [%s] -------------\n", | 2509 OS::Print("-- [after ssa allocator] ir [%s] -------------\n", |
| 2510 function.ToFullyQualifiedCString()); | 2510 function.ToFullyQualifiedCString()); |
| 2511 FlowGraphPrinter printer(flow_graph_, true); | 2511 FlowGraphPrinter printer(flow_graph_, true); |
| 2512 printer.PrintBlocks(); | 2512 printer.PrintBlocks(); |
| 2513 OS::Print("----------------------------------------------\n"); | 2513 OS::Print("----------------------------------------------\n"); |
| 2514 } | 2514 } |
| 2515 } | 2515 } |
| 2516 | 2516 |
| 2517 | 2517 |
| 2518 } // namespace dart | 2518 } // namespace dart |
| OLD | NEW |