Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(879)

Side by Side Diff: runtime/vm/flow_graph_allocator.cc

Issue 13801014: Fix bug in ParallelMoveResolver::EmitSwap: implement swaps of FPU spill slots. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Add comments Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 #include "vm/flow_graph_allocator.h" 5 #include "vm/flow_graph_allocator.h"
6 6
7 #include "vm/bit_vector.h" 7 #include "vm/bit_vector.h"
8 #include "vm/intermediate_language.h" 8 #include "vm/intermediate_language.h"
9 #include "vm/il_printer.h" 9 #include "vm/il_printer.h"
10 #include "vm/flow_graph.h" 10 #include "vm/flow_graph.h"
(...skipping 588 matching lines...) Expand 10 before | Expand all | Expand 10 after
599 // Slot index for the leftmost copied parameter is 0. 599 // Slot index for the leftmost copied parameter is 0.
600 intptr_t slot_index = param->index(); 600 intptr_t slot_index = param->index();
601 // Slot index for the rightmost fixed parameter is -1. 601 // Slot index for the rightmost fixed parameter is -1.
602 slot_index -= flow_graph_.num_non_copied_params(); 602 slot_index -= flow_graph_.num_non_copied_params();
603 603
604 range->set_assigned_location(Location::StackSlot(slot_index)); 604 range->set_assigned_location(Location::StackSlot(slot_index));
605 range->set_spill_slot(Location::StackSlot(slot_index)); 605 range->set_spill_slot(Location::StackSlot(slot_index));
606 if (flow_graph_.num_copied_params() > 0) { 606 if (flow_graph_.num_copied_params() > 0) {
607 ASSERT(spill_slots_.length() == slot_index); 607 ASSERT(spill_slots_.length() == slot_index);
608 spill_slots_.Add(range->End()); 608 spill_slots_.Add(range->End());
609 quad_spill_slots_.Add(false);
609 } 610 }
610 AssignSafepoints(range); 611 AssignSafepoints(range);
611 } else { 612 } else {
612 ConstantInstr* constant = defn->AsConstant(); 613 ConstantInstr* constant = defn->AsConstant();
613 ASSERT(constant != NULL); 614 ASSERT(constant != NULL);
614 range->set_assigned_location(Location::Constant(constant->value())); 615 range->set_assigned_location(Location::Constant(constant->value()));
615 range->set_spill_slot(Location::Constant(constant->value())); 616 range->set_spill_slot(Location::Constant(constant->value()));
616 } 617 }
617 range->finger()->Initialize(range); 618 range->finger()->Initialize(range);
618 UsePosition* use = 619 UsePosition* use =
(...skipping 367 matching lines...) Expand 10 before | Expand all | Expand 10 after
986 // [--) 987 // [--)
987 // 988 //
988 // The stack bitmap describes the position i. 989 // The stack bitmap describes the position i.
989 for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; reg++) { 990 for (intptr_t reg = 0; reg < kNumberOfCpuRegisters; reg++) {
990 BlockLocation(Location::RegisterLocation(static_cast<Register>(reg)), 991 BlockLocation(Location::RegisterLocation(static_cast<Register>(reg)),
991 pos, 992 pos,
992 pos + 1); 993 pos + 1);
993 } 994 }
994 995
995 for (intptr_t reg = 0; reg < kNumberOfFpuRegisters; reg++) { 996 for (intptr_t reg = 0; reg < kNumberOfFpuRegisters; reg++) {
996 Representation ignored = kNoRepresentation;
997 BlockLocation( 997 BlockLocation(
998 Location::FpuRegisterLocation(static_cast<FpuRegister>(reg), ignored), 998 Location::FpuRegisterLocation(static_cast<FpuRegister>(reg)),
999 pos, 999 pos,
1000 pos + 1); 1000 pos + 1);
1001 } 1001 }
1002 1002
1003 1003
1004 #if defined(DEBUG) 1004 #if defined(DEBUG)
1005 // Verify that temps, inputs and output were specified as fixed 1005 // Verify that temps, inputs and output were specified as fixed
1006 // locations. Every register is blocked now so attempt to 1006 // locations. Every register is blocked now so attempt to
1007 // allocate will not succeed. 1007 // allocate will not succeed.
1008 for (intptr_t j = 0; j < locs->temp_count(); j++) { 1008 for (intptr_t j = 0; j < locs->temp_count(); j++) {
(...skipping 565 matching lines...) Expand 10 before | Expand all | Expand 10 after
1574 } 1574 }
1575 1575
1576 LiveRange* tail = range->SplitAt(from); 1576 LiveRange* tail = range->SplitAt(from);
1577 Spill(tail); 1577 Spill(tail);
1578 } 1578 }
1579 1579
1580 1580
1581 void FlowGraphAllocator::AllocateSpillSlotFor(LiveRange* range) { 1581 void FlowGraphAllocator::AllocateSpillSlotFor(LiveRange* range) {
1582 ASSERT(range->spill_slot().IsInvalid()); 1582 ASSERT(range->spill_slot().IsInvalid());
1583 1583
1584 intptr_t idx = 0; 1584 // Compute range start and end.
1585 for (; idx < spill_slots_.length(); idx++) {
1586 if (spill_slots_[idx] <= range->Start()) break;
1587 }
1588
1589 if (idx == spill_slots_.length()) spill_slots_.Add(0);
1590
1591 LiveRange* last_sibling = range; 1585 LiveRange* last_sibling = range;
1592 while (last_sibling->next_sibling() != NULL) { 1586 while (last_sibling->next_sibling() != NULL) {
1593 last_sibling = last_sibling->next_sibling(); 1587 last_sibling = last_sibling->next_sibling();
1594 } 1588 }
1595 1589
1596 spill_slots_[idx] = last_sibling->End(); 1590 const intptr_t start = range->Start();
1591 const intptr_t end = last_sibling->End();
1597 1592
1593 // During fpu register allocation spill slot indices are computed in terms of
1594 // double (64bit) stack slots. We treat quad stack slot (128bit) as a
1595 // consecutive pair of two double spill slots.
1596 // Special care is taken to never allocate the same index to both
1597 // double and quad spill slots as it complicates disambiguation during
1598 // parallel move resolution.
1599 const bool need_quad = (register_kind_ == Location::kFpuRegister) &&
1600 ((range->representation() == kUnboxedFloat32x4) ||
1601 (range->representation() == kUnboxedUint32x4));
1602
1603 // Search for a free spill slot among allocated: the value in it should be
1604 // dead and its type should match (e.g. it should not be a part of the quad if
1605 // we are allocating normal double slot).
1606 intptr_t idx = 0;
1607 for (; idx < spill_slots_.length(); idx++) {
1608 if ((need_quad == quad_spill_slots_[idx]) &&
1609 (spill_slots_[idx] <= start)) {
1610 break;
1611 }
1612 }
1613
1614 if (idx == spill_slots_.length()) {
1615 // No free spill slot found. Allocate a new one.
1616 spill_slots_.Add(0);
1617 quad_spill_slots_.Add(need_quad);
1618 if (need_quad) { // Allocate two double stack slots if we need quad slot.
1619 spill_slots_.Add(0);
1620 quad_spill_slots_.Add(need_quad);
1621 }
1622 }
1623
1624
1625 // Set spill slot expiration boundary to the live range's end.
1626 spill_slots_[idx] = end;
1627 if (need_quad) {
1628 ASSERT(quad_spill_slots_[idx] && quad_spill_slots_[idx + 1]);
1629 idx++; // Use the higher index it corresponds to the lower stack address.
1630 spill_slots_[idx] = end;
1631 } else {
1632 ASSERT(!quad_spill_slots_[idx]);
1633 }
1634
1635 // Assign spill slot to the range.
1598 if (register_kind_ == Location::kRegister) { 1636 if (register_kind_ == Location::kRegister) {
1599 range->set_spill_slot(Location::StackSlot(idx)); 1637 range->set_spill_slot(Location::StackSlot(idx));
1600 } else { 1638 } else {
1601 // FPU register spill slots are essentially two (x64) or four (ia32) normal 1639 // We use the index of the slot with the lowest address as an index for the
1602 // word size spill slots. We use the index of the slot with the lowest 1640 // FPU register spill slot. In terms of indexes this relation is inverted:
1603 // address as an index for the FPU register spill slot. In terms of indexes 1641 // so we have to take the highest index.
1604 // this relation is inverted: so we have to take the highest index.
1605 const intptr_t slot_idx = cpu_spill_slot_count_ + 1642 const intptr_t slot_idx = cpu_spill_slot_count_ +
1606 idx * kFpuRegisterSpillFactor + (kFpuRegisterSpillFactor - 1); 1643 idx * kDoubleSpillFactor + (kDoubleSpillFactor - 1);
1607 1644
1608 Location location; 1645 Location location;
1609 if (range->representation() == kUnboxedFloat32x4) { 1646 if ((range->representation() == kUnboxedFloat32x4) ||
1610 location = Location::Float32x4StackSlot(slot_idx); 1647 (range->representation() == kUnboxedUint32x4)) {
1611 } else if (range->representation() == kUnboxedUint32x4) { 1648 ASSERT(need_quad);
1612 location = Location::Uint32x4StackSlot(slot_idx); 1649 location = Location::QuadStackSlot(slot_idx);
1613 } else { 1650 } else {
1614 ASSERT((range->representation() == kUnboxedDouble) || 1651 ASSERT((range->representation() == kUnboxedDouble) ||
1615 (range->representation() == kUnboxedMint)); 1652 (range->representation() == kUnboxedMint));
1616 location = Location::DoubleStackSlot(slot_idx, 1653 location = Location::DoubleStackSlot(slot_idx);
1617 range->representation());
1618 } 1654 }
1619 range->set_spill_slot(location); 1655 range->set_spill_slot(location);
1620 } 1656 }
1621 1657
1622 spilled_.Add(range); 1658 spilled_.Add(range);
1623 } 1659 }
1624 1660
1625 1661
1626 void FlowGraphAllocator::MarkAsObjectAtSafepoints(LiveRange* range) { 1662 void FlowGraphAllocator::MarkAsObjectAtSafepoints(LiveRange* range) {
1627 intptr_t stack_index = range->spill_slot().stack_index(); 1663 intptr_t stack_index = range->spill_slot().stack_index();
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
1808 if (!reaching_defs_.Get(phi)->Contains(unallocated->vreg())) { 1844 if (!reaching_defs_.Get(phi)->Contains(unallocated->vreg())) {
1809 used_on_backedge[reg] = true; 1845 used_on_backedge[reg] = true;
1810 } 1846 }
1811 } 1847 }
1812 } 1848 }
1813 1849
1814 if (used_on_backedge[candidate]) { 1850 if (used_on_backedge[candidate]) {
1815 TRACE_ALLOC(OS::Print( 1851 TRACE_ALLOC(OS::Print(
1816 "considering %s for v%"Pd": has interference on the back edge" 1852 "considering %s for v%"Pd": has interference on the back edge"
1817 " {loop [%"Pd", %"Pd")}\n", 1853 " {loop [%"Pd", %"Pd")}\n",
1818 MakeRegisterLocation(candidate, kUnboxedDouble).Name(), 1854 MakeRegisterLocation(candidate).Name(),
1819 unallocated->vreg(), 1855 unallocated->vreg(),
1820 loop_header->entry()->start_pos(), 1856 loop_header->entry()->start_pos(),
1821 loop_header->last_block()->end_pos())); 1857 loop_header->last_block()->end_pos()));
1822 for (intptr_t reg = 0; reg < NumberOfRegisters(); ++reg) { 1858 for (intptr_t reg = 0; reg < NumberOfRegisters(); ++reg) {
1823 if (blocked_registers_[reg] || 1859 if (blocked_registers_[reg] ||
1824 (reg == candidate) || 1860 (reg == candidate) ||
1825 used_on_backedge[reg]) { 1861 used_on_backedge[reg]) {
1826 continue; 1862 continue;
1827 } 1863 }
1828 1864
1829 const intptr_t intersection = 1865 const intptr_t intersection =
1830 FirstIntersectionWithAllocated(reg, unallocated); 1866 FirstIntersectionWithAllocated(reg, unallocated);
1831 if (intersection >= free_until) { 1867 if (intersection >= free_until) {
1832 candidate = reg; 1868 candidate = reg;
1833 free_until = intersection; 1869 free_until = intersection;
1834 TRACE_ALLOC(OS::Print( 1870 TRACE_ALLOC(OS::Print(
1835 "found %s for v%"Pd" with no interference on the back edge\n", 1871 "found %s for v%"Pd" with no interference on the back edge\n",
1836 MakeRegisterLocation(candidate, kUnboxedDouble).Name(), 1872 MakeRegisterLocation(candidate).Name(),
1837 candidate)); 1873 candidate));
1838 break; 1874 break;
1839 } 1875 }
1840 } 1876 }
1841 } 1877 }
1842 } 1878 }
1843 1879
1844 TRACE_ALLOC(OS::Print("assigning free register ")); 1880 TRACE_ALLOC(OS::Print("assigning free register "));
1845 TRACE_ALLOC(MakeRegisterLocation(candidate, kUnboxedDouble).Print()); 1881 TRACE_ALLOC(MakeRegisterLocation(candidate).Print());
1846 TRACE_ALLOC(OS::Print(" to v%"Pd"\n", unallocated->vreg())); 1882 TRACE_ALLOC(OS::Print(" to v%"Pd"\n", unallocated->vreg()));
1847 1883
1848 if (free_until != kMaxPosition) { 1884 if (free_until != kMaxPosition) {
1849 // There was an intersection. Split unallocated. 1885 // There was an intersection. Split unallocated.
1850 TRACE_ALLOC(OS::Print(" splitting at %"Pd"\n", free_until)); 1886 TRACE_ALLOC(OS::Print(" splitting at %"Pd"\n", free_until));
1851 LiveRange* tail = unallocated->SplitAt(free_until); 1887 LiveRange* tail = unallocated->SplitAt(free_until);
1852 AddToUnallocated(tail); 1888 AddToUnallocated(tail);
1853 } 1889 }
1854 1890
1855 registers_[candidate].Add(unallocated); 1891 registers_[candidate].Add(unallocated);
1856 unallocated->set_assigned_location( 1892 unallocated->set_assigned_location(MakeRegisterLocation(candidate));
1857 MakeRegisterLocation(candidate, unallocated->representation()));
1858 1893
1859 return true; 1894 return true;
1860 } 1895 }
1861 1896
1862 1897
1863 bool FlowGraphAllocator::RangeHasOnlyUnconstrainedUsesInLoop(LiveRange* range, 1898 bool FlowGraphAllocator::RangeHasOnlyUnconstrainedUsesInLoop(LiveRange* range,
1864 intptr_t loop_id) { 1899 intptr_t loop_id) {
1865 if (range->vreg() >= 0) { 1900 if (range->vreg() >= 0) {
1866 return GetLiveRange(range->vreg())->HasOnlyUnconstrainedUsesInLoop(loop_id); 1901 return GetLiveRange(range->vreg())->HasOnlyUnconstrainedUsesInLoop(loop_id);
1867 } 1902 }
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
1938 (register_use != NULL) ? register_use->pos() 1973 (register_use != NULL) ? register_use->pos()
1939 : unallocated->Start(); 1974 : unallocated->Start();
1940 if (free_until < register_use_pos) { 1975 if (free_until < register_use_pos) {
1941 // Can't acquire free register. Spill until we really need one. 1976 // Can't acquire free register. Spill until we really need one.
1942 ASSERT(unallocated->Start() < ToInstructionStart(register_use_pos)); 1977 ASSERT(unallocated->Start() < ToInstructionStart(register_use_pos));
1943 SpillBetween(unallocated, unallocated->Start(), register_use->pos()); 1978 SpillBetween(unallocated, unallocated->Start(), register_use->pos());
1944 return; 1979 return;
1945 } 1980 }
1946 1981
1947 TRACE_ALLOC(OS::Print("assigning blocked register ")); 1982 TRACE_ALLOC(OS::Print("assigning blocked register "));
1948 TRACE_ALLOC(MakeRegisterLocation(candidate, kUnboxedDouble).Print()); 1983 TRACE_ALLOC(MakeRegisterLocation(candidate).Print());
1949 TRACE_ALLOC(OS::Print(" to live range v%"Pd" until %"Pd"\n", 1984 TRACE_ALLOC(OS::Print(" to live range v%"Pd" until %"Pd"\n",
1950 unallocated->vreg(), blocked_at)); 1985 unallocated->vreg(), blocked_at));
1951 1986
1952 if (blocked_at < unallocated->End()) { 1987 if (blocked_at < unallocated->End()) {
1953 // Register is blocked before the end of the live range. Split the range 1988 // Register is blocked before the end of the live range. Split the range
1954 // at latest at blocked_at position. 1989 // at latest at blocked_at position.
1955 LiveRange* tail = SplitBetween(unallocated, 1990 LiveRange* tail = SplitBetween(unallocated,
1956 unallocated->Start(), 1991 unallocated->Start(),
1957 blocked_at + 1); 1992 blocked_at + 1);
1958 AddToUnallocated(tail); 1993 AddToUnallocated(tail);
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
2044 } 2079 }
2045 registers_[reg][i] = NULL; 2080 registers_[reg][i] = NULL;
2046 first_evicted = i; 2081 first_evicted = i;
2047 } 2082 }
2048 } 2083 }
2049 2084
2050 // Remove evicted ranges from the array. 2085 // Remove evicted ranges from the array.
2051 if (first_evicted != -1) RemoveEvicted(reg, first_evicted); 2086 if (first_evicted != -1) RemoveEvicted(reg, first_evicted);
2052 2087
2053 registers_[reg].Add(unallocated); 2088 registers_[reg].Add(unallocated);
2054 unallocated->set_assigned_location( 2089 unallocated->set_assigned_location(MakeRegisterLocation(reg));
2055 MakeRegisterLocation(reg, unallocated->representation()));
2056 } 2090 }
2057 2091
2058 2092
2059 bool FlowGraphAllocator::EvictIntersection(LiveRange* allocated, 2093 bool FlowGraphAllocator::EvictIntersection(LiveRange* allocated,
2060 LiveRange* unallocated) { 2094 LiveRange* unallocated) {
2061 UseInterval* first_unallocated = 2095 UseInterval* first_unallocated =
2062 unallocated->finger()->first_pending_use_interval(); 2096 unallocated->finger()->first_pending_use_interval();
2063 const intptr_t intersection = FirstIntersection( 2097 const intptr_t intersection = FirstIntersection(
2064 allocated->finger()->first_pending_use_interval(), 2098 allocated->finger()->first_pending_use_interval(),
2065 first_unallocated); 2099 first_unallocated);
(...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after
2515 2549
2516 PrepareForAllocation(Location::kRegister, 2550 PrepareForAllocation(Location::kRegister,
2517 kNumberOfCpuRegisters, 2551 kNumberOfCpuRegisters,
2518 unallocated_cpu_, 2552 unallocated_cpu_,
2519 cpu_regs_, 2553 cpu_regs_,
2520 blocked_cpu_registers_); 2554 blocked_cpu_registers_);
2521 AllocateUnallocatedRanges(); 2555 AllocateUnallocatedRanges();
2522 2556
2523 cpu_spill_slot_count_ = spill_slots_.length(); 2557 cpu_spill_slot_count_ = spill_slots_.length();
2524 spill_slots_.Clear(); 2558 spill_slots_.Clear();
2559 quad_spill_slots_.Clear();
2525 2560
2526 PrepareForAllocation(Location::kFpuRegister, 2561 PrepareForAllocation(Location::kFpuRegister,
2527 kNumberOfFpuRegisters, 2562 kNumberOfFpuRegisters,
2528 unallocated_xmm_, 2563 unallocated_xmm_,
2529 fpu_regs_, 2564 fpu_regs_,
2530 blocked_fpu_registers_); 2565 blocked_fpu_registers_);
2531 AllocateUnallocatedRanges(); 2566 AllocateUnallocatedRanges();
2532 2567
2533 ResolveControlFlow(); 2568 ResolveControlFlow();
2534 2569
2535 GraphEntryInstr* entry = block_order_[0]->AsGraphEntry(); 2570 GraphEntryInstr* entry = block_order_[0]->AsGraphEntry();
2536 ASSERT(entry != NULL); 2571 ASSERT(entry != NULL);
2537 intptr_t double_spill_slot_count = 2572 intptr_t double_spill_slot_count = spill_slots_.length() * kDoubleSpillFactor;
2538 spill_slots_.length() * kFpuRegisterSpillFactor;
2539 entry->set_spill_slot_count(cpu_spill_slot_count_ + double_spill_slot_count); 2573 entry->set_spill_slot_count(cpu_spill_slot_count_ + double_spill_slot_count);
2540 2574
2541 if (FLAG_print_ssa_liveranges) { 2575 if (FLAG_print_ssa_liveranges) {
2542 const Function& function = flow_graph_.parsed_function().function(); 2576 const Function& function = flow_graph_.parsed_function().function();
2543 2577
2544 OS::Print("-- [after ssa allocator] ranges [%s] ---------\n", 2578 OS::Print("-- [after ssa allocator] ranges [%s] ---------\n",
2545 function.ToFullyQualifiedCString()); 2579 function.ToFullyQualifiedCString());
2546 PrintLiveRanges(); 2580 PrintLiveRanges();
2547 OS::Print("----------------------------------------------\n"); 2581 OS::Print("----------------------------------------------\n");
2548 2582
2549 OS::Print("-- [after ssa allocator] ir [%s] -------------\n", 2583 OS::Print("-- [after ssa allocator] ir [%s] -------------\n",
2550 function.ToFullyQualifiedCString()); 2584 function.ToFullyQualifiedCString());
2551 FlowGraphPrinter printer(flow_graph_, true); 2585 FlowGraphPrinter printer(flow_graph_, true);
2552 printer.PrintBlocks(); 2586 printer.PrintBlocks();
2553 OS::Print("----------------------------------------------\n"); 2587 OS::Print("----------------------------------------------\n");
2554 } 2588 }
2555 } 2589 }
2556 2590
2557 2591
2558 } // namespace dart 2592 } // namespace dart
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698