| OLD | NEW |
| 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// | 1 //===- subzero/src/IceCfgNode.cpp - Basic block (node) implementation -----===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| 11 /// This file implements the CfgNode class, including the complexities | 11 /// This file implements the CfgNode class, including the complexities of |
| 12 /// of instruction insertion and in-edge calculation. | 12 /// instruction insertion and in-edge calculation. |
| 13 /// | 13 /// |
| 14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 15 | 15 |
| 16 #include "IceCfgNode.h" | 16 #include "IceCfgNode.h" |
| 17 | 17 |
| 18 #include "IceAssembler.h" | 18 #include "IceAssembler.h" |
| 19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
| 20 #include "IceGlobalInits.h" | 20 #include "IceGlobalInits.h" |
| 21 #include "IceInst.h" | 21 #include "IceInst.h" |
| 22 #include "IceInstVarIter.h" | 22 #include "IceInstVarIter.h" |
| 23 #include "IceLiveness.h" | 23 #include "IceLiveness.h" |
| 24 #include "IceOperand.h" | 24 #include "IceOperand.h" |
| 25 #include "IceTargetLowering.h" | 25 #include "IceTargetLowering.h" |
| 26 | 26 |
| 27 namespace Ice { | 27 namespace Ice { |
| 28 | 28 |
| 29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) | 29 CfgNode::CfgNode(Cfg *Func, SizeT LabelNumber) |
| 30 : Func(Func), Number(LabelNumber) {} | 30 : Func(Func), Number(LabelNumber) {} |
| 31 | 31 |
| 32 // Returns the name the node was created with. If no name was given, | 32 // Returns the name the node was created with. If no name was given, it |
| 33 // it synthesizes a (hopefully) unique name. | 33 // synthesizes a (hopefully) unique name. |
| 34 IceString CfgNode::getName() const { | 34 IceString CfgNode::getName() const { |
| 35 if (NameIndex >= 0) | 35 if (NameIndex >= 0) |
| 36 return Func->getIdentifierName(NameIndex); | 36 return Func->getIdentifierName(NameIndex); |
| 37 return "__" + std::to_string(getIndex()); | 37 return "__" + std::to_string(getIndex()); |
| 38 } | 38 } |
| 39 | 39 |
| 40 // Adds an instruction to either the Phi list or the regular | 40 // Adds an instruction to either the Phi list or the regular instruction list. |
| 41 // instruction list. Validates that all Phis are added before all | 41 // Validates that all Phis are added before all regular instructions. |
| 42 // regular instructions. | |
| 43 void CfgNode::appendInst(Inst *Inst) { | 42 void CfgNode::appendInst(Inst *Inst) { |
| 44 ++InstCountEstimate; | 43 ++InstCountEstimate; |
| 45 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { | 44 if (InstPhi *Phi = llvm::dyn_cast<InstPhi>(Inst)) { |
| 46 if (!Insts.empty()) { | 45 if (!Insts.empty()) { |
| 47 Func->setError("Phi instruction added to the middle of a block"); | 46 Func->setError("Phi instruction added to the middle of a block"); |
| 48 return; | 47 return; |
| 49 } | 48 } |
| 50 Phis.push_back(Phi); | 49 Phis.push_back(Phi); |
| 51 } else { | 50 } else { |
| 52 Insts.push_back(Inst); | 51 Insts.push_back(Inst); |
| 53 } | 52 } |
| 54 } | 53 } |
| 55 | 54 |
| 56 // Renumbers the non-deleted instructions in the node. This needs to | 55 // Renumbers the non-deleted instructions in the node. This needs to be done in |
| 57 // be done in preparation for live range analysis. The instruction | 56 // preparation for live range analysis. The instruction numbers in a block must |
| 58 // numbers in a block must be monotonically increasing. The range of | 57 // be monotonically increasing. The range of instruction numbers in a block, |
| 59 // instruction numbers in a block, from lowest to highest, must not | 58 // from lowest to highest, must not overlap with the range of any other block. |
| 60 // overlap with the range of any other block. | |
| 61 void CfgNode::renumberInstructions() { | 59 void CfgNode::renumberInstructions() { |
| 62 InstNumberT FirstNumber = Func->getNextInstNumber(); | 60 InstNumberT FirstNumber = Func->getNextInstNumber(); |
| 63 for (Inst &I : Phis) | 61 for (Inst &I : Phis) |
| 64 I.renumber(Func); | 62 I.renumber(Func); |
| 65 for (Inst &I : Insts) | 63 for (Inst &I : Insts) |
| 66 I.renumber(Func); | 64 I.renumber(Func); |
| 67 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; | 65 InstCountEstimate = Func->getNextInstNumber() - FirstNumber; |
| 68 } | 66 } |
| 69 | 67 |
| 70 // When a node is created, the OutEdges are immediately known, but the | 68 // When a node is created, the OutEdges are immediately known, but the InEdges |
| 71 // InEdges have to be built up incrementally. After the CFG has been | 69 // have to be built up incrementally. After the CFG has been constructed, the |
| 72 // constructed, the computePredecessors() pass finalizes it by | 70 // computePredecessors() pass finalizes it by creating the InEdges list. |
| 73 // creating the InEdges list. | |
| 74 void CfgNode::computePredecessors() { | 71 void CfgNode::computePredecessors() { |
| 75 for (CfgNode *Succ : OutEdges) | 72 for (CfgNode *Succ : OutEdges) |
| 76 Succ->InEdges.push_back(this); | 73 Succ->InEdges.push_back(this); |
| 77 } | 74 } |
| 78 | 75 |
| 79 void CfgNode::computeSuccessors() { | 76 void CfgNode::computeSuccessors() { |
| 80 OutEdges = Insts.rbegin()->getTerminatorEdges(); | 77 OutEdges = Insts.rbegin()->getTerminatorEdges(); |
| 81 } | 78 } |
| 82 | 79 |
| 83 // This does part 1 of Phi lowering, by creating a new dest variable | 80 // This does part 1 of Phi lowering, by creating a new dest variable for each |
| 84 // for each Phi instruction, replacing the Phi instruction's dest with | 81 // Phi instruction, replacing the Phi instruction's dest with that variable, |
| 85 // that variable, and adding an explicit assignment of the old dest to | 82 // and adding an explicit assignment of the old dest to the new dest. For |
| 86 // the new dest. For example, | 83 // example, |
| 87 // a=phi(...) | 84 // a=phi(...) |
| 88 // changes to | 85 // changes to |
| 89 // "a_phi=phi(...); a=a_phi". | 86 // "a_phi=phi(...); a=a_phi". |
| 90 // | 87 // |
| 91 // This is in preparation for part 2 which deletes the Phi | 88 // This is in preparation for part 2 which deletes the Phi instructions and |
| 92 // instructions and appends assignment instructions to predecessor | 89 // appends assignment instructions to predecessor blocks. Note that this |
| 93 // blocks. Note that this transformation preserves SSA form. | 90 // transformation preserves SSA form. |
| 94 void CfgNode::placePhiLoads() { | 91 void CfgNode::placePhiLoads() { |
| 95 for (Inst &I : Phis) { | 92 for (Inst &I : Phis) { |
| 96 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 93 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 97 Insts.insert(Insts.begin(), Phi->lower(Func)); | 94 Insts.insert(Insts.begin(), Phi->lower(Func)); |
| 98 } | 95 } |
| 99 } | 96 } |
| 100 | 97 |
| 101 // This does part 2 of Phi lowering. For each Phi instruction at each | 98 // This does part 2 of Phi lowering. For each Phi instruction at each out-edge, |
| 102 // out-edge, create a corresponding assignment instruction, and add | 99 // create a corresponding assignment instruction, and add all the assignments |
| 103 // all the assignments near the end of this block. They need to be | 100 // near the end of this block. They need to be added before any branch |
| 104 // added before any branch instruction, and also if the block ends | 101 // instruction, and also if the block ends with a compare instruction followed |
| 105 // with a compare instruction followed by a branch instruction that we | 102 // by a branch instruction that we may want to fuse, it's better to insert the |
| 106 // may want to fuse, it's better to insert the new assignments before | 103 // new assignments before the compare instruction. The |
| 107 // the compare instruction. The tryOptimizedCmpxchgCmpBr() method | 104 // tryOptimizedCmpxchgCmpBr() method assumes this ordering of instructions. |
| 108 // assumes this ordering of instructions. | |
| 109 // | 105 // |
| 110 // Note that this transformation takes the Phi dest variables out of | 106 // Note that this transformation takes the Phi dest variables out of SSA form, |
| 111 // SSA form, as there may be assignments to the dest variable in | 107 // as there may be assignments to the dest variable in multiple blocks. |
| 112 // multiple blocks. | |
| 113 void CfgNode::placePhiStores() { | 108 void CfgNode::placePhiStores() { |
| 114 // Find the insertion point. | 109 // Find the insertion point. |
| 115 InstList::iterator InsertionPoint = Insts.end(); | 110 InstList::iterator InsertionPoint = Insts.end(); |
| 116 // Every block must end in a terminator instruction, and therefore | 111 // Every block must end in a terminator instruction, and therefore must have |
| 117 // must have at least one instruction, so it's valid to decrement | 112 // at least one instruction, so it's valid to decrement InsertionPoint (but |
| 118 // InsertionPoint (but assert just in case). | 113 // assert just in case). |
| 119 assert(InsertionPoint != Insts.begin()); | 114 assert(InsertionPoint != Insts.begin()); |
| 120 --InsertionPoint; | 115 --InsertionPoint; |
| 121 // Confirm that InsertionPoint is a terminator instruction. Calling | 116 // Confirm that InsertionPoint is a terminator instruction. Calling |
| 122 // getTerminatorEdges() on a non-terminator instruction will cause | 117 // getTerminatorEdges() on a non-terminator instruction will cause an |
| 123 // an llvm_unreachable(). | 118 // llvm_unreachable(). |
| 124 (void)InsertionPoint->getTerminatorEdges(); | 119 (void)InsertionPoint->getTerminatorEdges(); |
| 125 // SafeInsertionPoint is always immediately before the terminator | 120 // SafeInsertionPoint is always immediately before the terminator |
| 126 // instruction. If the block ends in a compare and conditional | 121 // instruction. If the block ends in a compare and conditional branch, it's |
| 127 // branch, it's better to place the Phi store before the compare so | 122 // better to place the Phi store before the compare so as not to interfere |
| 128 // as not to interfere with compare/branch fusing. However, if the | 123 // with compare/branch fusing. However, if the compare instruction's dest |
| 129 // compare instruction's dest operand is the same as the new | 124 // operand is the same as the new assignment statement's source operand, this |
| 130 // assignment statement's source operand, this can't be done due to | 125 // can't be done due to data dependences, so we need to fall back to the |
| 131 // data dependences, so we need to fall back to the | 126 // SafeInsertionPoint. To illustrate: |
| 132 // SafeInsertionPoint. To illustrate: | |
| 133 // ; <label>:95 | 127 // ; <label>:95 |
| 134 // %97 = load i8* %96, align 1 | 128 // %97 = load i8* %96, align 1 |
| 135 // %98 = icmp ne i8 %97, 0 | 129 // %98 = icmp ne i8 %97, 0 |
| 136 // br i1 %98, label %99, label %2132 | 130 // br i1 %98, label %99, label %2132 |
| 137 // ; <label>:99 | 131 // ; <label>:99 |
| 138 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] | 132 // %100 = phi i8 [ %97, %95 ], [ %110, %108 ] |
| 139 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] | 133 // %101 = phi i1 [ %98, %95 ], [ %111, %108 ] |
| 140 // would be Phi-lowered as: | 134 // would be Phi-lowered as: |
| 141 // ; <label>:95 | 135 // ; <label>:95 |
| 142 // %97 = load i8* %96, align 1 | 136 // %97 = load i8* %96, align 1 |
| 143 // %100_phi = %97 ; can be at InsertionPoint | 137 // %100_phi = %97 ; can be at InsertionPoint |
| 144 // %98 = icmp ne i8 %97, 0 | 138 // %98 = icmp ne i8 %97, 0 |
| 145 // %101_phi = %98 ; must be at SafeInsertionPoint | 139 // %101_phi = %98 ; must be at SafeInsertionPoint |
| 146 // br i1 %98, label %99, label %2132 | 140 // br i1 %98, label %99, label %2132 |
| 147 // ; <label>:99 | 141 // ; <label>:99 |
| 148 // %100 = %100_phi | 142 // %100 = %100_phi |
| 149 // %101 = %101_phi | 143 // %101 = %101_phi |
| 150 // | 144 // |
| 151 // TODO(stichnot): It may be possible to bypass this whole | 145 // TODO(stichnot): It may be possible to bypass this whole SafeInsertionPoint |
| 152 // SafeInsertionPoint mechanism. If a source basic block ends in a | 146 // mechanism. If a source basic block ends in a conditional branch: |
| 153 // conditional branch: | |
| 154 // labelSource: | 147 // labelSource: |
| 155 // ... | 148 // ... |
| 156 // br i1 %foo, label %labelTrue, label %labelFalse | 149 // br i1 %foo, label %labelTrue, label %labelFalse |
| 157 // and a branch target has a Phi involving the branch operand: | 150 // and a branch target has a Phi involving the branch operand: |
| 158 // labelTrue: | 151 // labelTrue: |
| 159 // %bar = phi i1 [ %foo, %labelSource ], ... | 152 // %bar = phi i1 [ %foo, %labelSource ], ... |
| 160 // then we actually know the constant i1 value of the Phi operand: | 153 // then we actually know the constant i1 value of the Phi operand: |
| 161 // labelTrue: | 154 // labelTrue: |
| 162 // %bar = phi i1 [ true, %labelSource ], ... | 155 // %bar = phi i1 [ true, %labelSource ], ... |
| 163 // It seems that this optimization should be done by clang or opt, | 156 // It seems that this optimization should be done by clang or opt, but we |
| 164 // but we could also do it here. | 157 // could also do it here. |
| 165 InstList::iterator SafeInsertionPoint = InsertionPoint; | 158 InstList::iterator SafeInsertionPoint = InsertionPoint; |
| 166 // Keep track of the dest variable of a compare instruction, so that | 159 // Keep track of the dest variable of a compare instruction, so that we |
| 167 // we insert the new instruction at the SafeInsertionPoint if the | 160 // insert the new instruction at the SafeInsertionPoint if the compare's dest |
| 168 // compare's dest matches the Phi-lowered assignment's source. | 161 // matches the Phi-lowered assignment's source. |
| 169 Variable *CmpInstDest = nullptr; | 162 Variable *CmpInstDest = nullptr; |
| 170 // If the current insertion point is at a conditional branch | 163 // If the current insertion point is at a conditional branch instruction, and |
| 171 // instruction, and the previous instruction is a compare | 164 // the previous instruction is a compare instruction, then we move the |
| 172 // instruction, then we move the insertion point before the compare | 165 // insertion point before the compare instruction so as not to interfere with |
| 173 // instruction so as not to interfere with compare/branch fusing. | 166 // compare/branch fusing. |
| 174 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { | 167 if (InstBr *Branch = llvm::dyn_cast<InstBr>(InsertionPoint)) { |
| 175 if (!Branch->isUnconditional()) { | 168 if (!Branch->isUnconditional()) { |
| 176 if (InsertionPoint != Insts.begin()) { | 169 if (InsertionPoint != Insts.begin()) { |
| 177 --InsertionPoint; | 170 --InsertionPoint; |
| 178 if (llvm::isa<InstIcmp>(InsertionPoint) || | 171 if (llvm::isa<InstIcmp>(InsertionPoint) || |
| 179 llvm::isa<InstFcmp>(InsertionPoint)) { | 172 llvm::isa<InstFcmp>(InsertionPoint)) { |
| 180 CmpInstDest = InsertionPoint->getDest(); | 173 CmpInstDest = InsertionPoint->getDest(); |
| 181 } else { | 174 } else { |
| 182 ++InsertionPoint; | 175 ++InsertionPoint; |
| 183 } | 176 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 202 } | 195 } |
| 203 } | 196 } |
| 204 } | 197 } |
| 205 | 198 |
| 206 // Deletes the phi instructions after the loads and stores are placed. | 199 // Deletes the phi instructions after the loads and stores are placed. |
| 207 void CfgNode::deletePhis() { | 200 void CfgNode::deletePhis() { |
| 208 for (Inst &I : Phis) | 201 for (Inst &I : Phis) |
| 209 I.setDeleted(); | 202 I.setDeleted(); |
| 210 } | 203 } |
| 211 | 204 |
| 212 // Splits the edge from Pred to this node by creating a new node and | 205 // Splits the edge from Pred to this node by creating a new node and hooking up |
| 213 // hooking up the in and out edges appropriately. (The EdgeIndex | 206 // the in and out edges appropriately. (The EdgeIndex parameter is only used to |
| 214 // parameter is only used to make the new node's name unique when | 207 // make the new node's name unique when there are multiple edges between the |
| 215 // there are multiple edges between the same pair of nodes.) The new | 208 // same pair of nodes.) The new node's instruction list is initialized to the |
| 216 // node's instruction list is initialized to the empty list, with no | 209 // empty list, with no terminator instruction. There must not be multiple edges |
| 217 // terminator instruction. There must not be multiple edges from Pred | 210 // from Pred to this node so all Inst::getTerminatorEdges implementations must |
| 218 // to this node so all Inst::getTerminatorEdges implementations must | |
| 219 // not contain duplicates. | 211 // not contain duplicates. |
| 220 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { | 212 CfgNode *CfgNode::splitIncomingEdge(CfgNode *Pred, SizeT EdgeIndex) { |
| 221 CfgNode *NewNode = Func->makeNode(); | 213 CfgNode *NewNode = Func->makeNode(); |
| 222 // Depth is the minimum as it works if both are the same, but if one is | 214 // Depth is the minimum as it works if both are the same, but if one is |
| 223 // outside the loop and the other is inside, the new node should be placed | 215 // outside the loop and the other is inside, the new node should be placed |
| 224 // outside and not be executed multiple times within the loop. | 216 // outside and not be executed multiple times within the loop. |
| 225 NewNode->setLoopNestDepth( | 217 NewNode->setLoopNestDepth( |
| 226 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); | 218 std::min(getLoopNestDepth(), Pred->getLoopNestDepth())); |
| 227 if (BuildDefs::dump()) | 219 if (BuildDefs::dump()) |
| 228 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + | 220 NewNode->setName("split_" + Pred->getName() + "_" + getName() + "_" + |
| 229 std::to_string(EdgeIndex)); | 221 std::to_string(EdgeIndex)); |
| 230 // The new node is added to the end of the node list, and will later | 222 // The new node is added to the end of the node list, and will later need to |
| 231 // need to be sorted into a reasonable topological order. | 223 // be sorted into a reasonable topological order. |
| 232 NewNode->setNeedsPlacement(true); | 224 NewNode->setNeedsPlacement(true); |
| 233 // Repoint Pred's out-edge. | 225 // Repoint Pred's out-edge. |
| 234 bool Found = false; | 226 bool Found = false; |
| 235 for (CfgNode *&I : Pred->OutEdges) { | 227 for (CfgNode *&I : Pred->OutEdges) { |
| 236 if (I == this) { | 228 if (I == this) { |
| 237 I = NewNode; | 229 I = NewNode; |
| 238 NewNode->InEdges.push_back(Pred); | 230 NewNode->InEdges.push_back(Pred); |
| 239 Found = true; | 231 Found = true; |
| 240 break; | 232 break; |
| 241 } | 233 } |
| (...skipping 30 matching lines...) Expand all Loading... |
| 272 return true; | 264 return true; |
| 273 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { | 265 if (const auto Var2 = llvm::dyn_cast<Variable>(Opnd)) { |
| 274 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) | 266 if (Var->hasReg() && Var->getRegNum() == Var2->getRegNum()) |
| 275 return true; | 267 return true; |
| 276 } | 268 } |
| 277 return false; | 269 return false; |
| 278 } | 270 } |
| 279 | 271 |
| 280 } // end of anonymous namespace | 272 } // end of anonymous namespace |
| 281 | 273 |
| 282 // This the "advanced" version of Phi lowering for a basic block, in contrast to | 274 // This the "advanced" version of Phi lowering for a basic block, in contrast |
| 283 // the simple version that lowers through assignments involving temporaries. | 275 // to the simple version that lowers through assignments involving temporaries. |
| 284 // | 276 // |
| 285 // All Phi instructions in a basic block are conceptually executed in parallel. | 277 // All Phi instructions in a basic block are conceptually executed in parallel. |
| 286 // However, if we lower Phis early and commit to a sequential ordering, we may | 278 // However, if we lower Phis early and commit to a sequential ordering, we may |
| 287 // end up creating unnecessary interferences which lead to worse register | 279 // end up creating unnecessary interferences which lead to worse register |
| 288 // allocation. Delaying Phi scheduling until after register allocation can help | 280 // allocation. Delaying Phi scheduling until after register allocation can help |
| 289 // unless there are no free registers for shuffling registers or stack slots and | 281 // unless there are no free registers for shuffling registers or stack slots |
| 290 // spilling becomes necessary. | 282 // and spilling becomes necessary. |
| 291 // | 283 // |
| 292 // The advanced Phi lowering starts by finding a topological sort of the Phi | 284 // The advanced Phi lowering starts by finding a topological sort of the Phi |
| 293 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on B. | 285 // instructions, where "A=B" comes before "B=C" due to the anti-dependence on |
| 294 // Preexisting register assignments are considered in the topological sort. If | 286 // B. Preexisting register assignments are considered in the topological sort. |
| 295 // a topological sort is not possible due to a cycle, the cycle is broken by | 287 // If a topological sort is not possible due to a cycle, the cycle is broken by |
| 296 // introducing a non-parallel temporary. For example, a cycle arising from a | 288 // introducing a non-parallel temporary. For example, a cycle arising from a |
| 297 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being | 289 // permutation like "A=B;B=C;C=A" can become "T=A;A=B;B=C;C=T". All else being |
| 298 // equal, prefer to schedule assignments with register-allocated Src operands | 290 // equal, prefer to schedule assignments with register-allocated Src operands |
| 299 // earlier, in case that register becomes free afterwards, and prefer to | 291 // earlier, in case that register becomes free afterwards, and prefer to |
| 300 // schedule assignments with register-allocated Dest variables later, to keep | 292 // schedule assignments with register-allocated Dest variables later, to keep |
| 301 // that register free for longer. | 293 // that register free for longer. |
| 302 // | 294 // |
| 303 // Once the ordering is determined, the Cfg edge is split and the assignment | 295 // Once the ordering is determined, the Cfg edge is split and the assignment |
| 304 // list is lowered by the target lowering layer. Since the assignment lowering | 296 // list is lowered by the target lowering layer. Since the assignment lowering |
| 305 // may create new infinite-weight temporaries, a follow-on register allocation | 297 // may create new infinite-weight temporaries, a follow-on register allocation |
| 306 // pass will be needed. To prepare for this, liveness (including live range | 298 // pass will be needed. To prepare for this, liveness (including live range |
| 307 // calculation) of the split nodes needs to be calculated, and liveness of the | 299 // calculation) of the split nodes needs to be calculated, and liveness of the |
| 308 // original node need to be updated to "undo" the effects of the phi | 300 // original node need to be updated to "undo" the effects of the phi |
| 309 // assignments. | 301 // assignments. |
| 310 | 302 |
| 311 // The specific placement of the new node within the Cfg node list is deferred | 303 // The specific placement of the new node within the Cfg node list is deferred |
| 312 // until later, including after empty node contraction. | 304 // until later, including after empty node contraction. |
| 313 // | 305 // |
| 314 // After phi assignments are lowered across all blocks, another register | 306 // After phi assignments are lowered across all blocks, another register |
| 315 // allocation pass is run, focusing only on pre-colored and infinite-weight | 307 // allocation pass is run, focusing only on pre-colored and infinite-weight |
| 316 // variables, similar to Om1 register allocation (except without the need to | 308 // variables, similar to Om1 register allocation (except without the need to |
| 317 // specially compute these variables' live ranges, since they have already been | 309 // specially compute these variables' live ranges, since they have already been |
| 318 // precisely calculated). The register allocator in this mode needs the ability | 310 // precisely calculated). The register allocator in this mode needs the ability |
| 319 // to forcibly spill and reload registers in case none are naturally available. | 311 // to forcibly spill and reload registers in case none are naturally available. |
| 320 void CfgNode::advancedPhiLowering() { | 312 void CfgNode::advancedPhiLowering() { |
| 321 if (getPhis().empty()) | 313 if (getPhis().empty()) |
| 322 return; | 314 return; |
| 323 | 315 |
| 324 // Count the number of non-deleted Phi instructions. | 316 // Count the number of non-deleted Phi instructions. |
| 325 struct PhiDesc { | 317 struct PhiDesc { |
| 326 InstPhi *Phi; | 318 InstPhi *Phi; |
| 327 Variable *Dest; | 319 Variable *Dest; |
| 328 Operand *Src; | 320 Operand *Src; |
| (...skipping 27 matching lines...) Expand all Loading... |
| 356 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); | 348 CfgNode *Split = splitIncomingEdge(Pred, InEdgeIndex++); |
| 357 SizeT Remaining = NumPhis; | 349 SizeT Remaining = NumPhis; |
| 358 | 350 |
| 359 // First pass computes Src and initializes NumPred. | 351 // First pass computes Src and initializes NumPred. |
| 360 for (size_t I = 0; I < NumPhis; ++I) { | 352 for (size_t I = 0; I < NumPhis; ++I) { |
| 361 Variable *Dest = Desc[I].Dest; | 353 Variable *Dest = Desc[I].Dest; |
| 362 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); | 354 Operand *Src = Desc[I].Phi->getOperandForTarget(Pred); |
| 363 Desc[I].Src = Src; | 355 Desc[I].Src = Src; |
| 364 Desc[I].Processed = false; | 356 Desc[I].Processed = false; |
| 365 Desc[I].NumPred = 0; | 357 Desc[I].NumPred = 0; |
| 366 // Cherry-pick any trivial assignments, so that they don't | 358 // Cherry-pick any trivial assignments, so that they don't contribute to |
| 367 // contribute to the running complexity of the topological sort. | 359 // the running complexity of the topological sort. |
| 368 if (sameVarOrReg(Dest, Src)) { | 360 if (sameVarOrReg(Dest, Src)) { |
| 369 Desc[I].Processed = true; | 361 Desc[I].Processed = true; |
| 370 --Remaining; | 362 --Remaining; |
| 371 if (Dest != Src) | 363 if (Dest != Src) |
| 372 // If Dest and Src are syntactically the same, don't bother | 364 // If Dest and Src are syntactically the same, don't bother adding |
| 373 // adding the assignment, because in all respects it would | 365 // the assignment, because in all respects it would be redundant, and |
| 374 // be redundant, and if Dest/Src are on the stack, the | 366 // if Dest/Src are on the stack, the target lowering may naively |
| 375 // target lowering may naively decide to lower it using a | 367 // decide to lower it using a temporary register. |
| 376 // temporary register. | |
| 377 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 368 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| 378 } | 369 } |
| 379 } | 370 } |
| 380 // Second pass computes NumPred by comparing every pair of Phi | 371 // Second pass computes NumPred by comparing every pair of Phi |
| 381 // instructions. | 372 // instructions. |
| 382 for (size_t I = 0; I < NumPhis; ++I) { | 373 for (size_t I = 0; I < NumPhis; ++I) { |
| 383 if (Desc[I].Processed) | 374 if (Desc[I].Processed) |
| 384 continue; | 375 continue; |
| 385 const Variable *Dest = Desc[I].Dest; | 376 const Variable *Dest = Desc[I].Dest; |
| 386 for (size_t J = 0; J < NumPhis; ++J) { | 377 for (size_t J = 0; J < NumPhis; ++J) { |
| 387 if (Desc[J].Processed) | 378 if (Desc[J].Processed) |
| 388 continue; | 379 continue; |
| 389 if (I != J) { | 380 if (I != J) { |
| 390 // There shouldn't be two Phis with the same Dest variable | 381 // There shouldn't be two Phis with the same Dest variable or |
| 391 // or register. | 382 // register. |
| 392 assert(!sameVarOrReg(Dest, Desc[J].Dest)); | 383 assert(!sameVarOrReg(Dest, Desc[J].Dest)); |
| 393 } | 384 } |
| 394 const Operand *Src = Desc[J].Src; | 385 const Operand *Src = Desc[J].Src; |
| 395 if (sameVarOrReg(Dest, Src)) | 386 if (sameVarOrReg(Dest, Src)) |
| 396 ++Desc[I].NumPred; | 387 ++Desc[I].NumPred; |
| 397 } | 388 } |
| 398 } | 389 } |
| 399 | 390 |
| 400 // Another pass to compute initial Weight values. | 391 // Another pass to compute initial Weight values. |
| 401 | 392 |
| 402 // Always pick NumPred=0 over NumPred>0. | 393 // Always pick NumPred=0 over NumPred>0. |
| 403 constexpr int32_t WeightNoPreds = 4; | 394 constexpr int32_t WeightNoPreds = 4; |
| 404 // Prefer Src as a register because the register might free up. | 395 // Prefer Src as a register because the register might free up. |
| 405 constexpr int32_t WeightSrcIsReg = 2; | 396 constexpr int32_t WeightSrcIsReg = 2; |
| 406 // Prefer Dest not as a register because the register stays free | 397 // Prefer Dest not as a register because the register stays free longer. |
| 407 // longer. | |
| 408 constexpr int32_t WeightDestNotReg = 1; | 398 constexpr int32_t WeightDestNotReg = 1; |
| 409 | 399 |
| 410 for (size_t I = 0; I < NumPhis; ++I) { | 400 for (size_t I = 0; I < NumPhis; ++I) { |
| 411 if (Desc[I].Processed) | 401 if (Desc[I].Processed) |
| 412 continue; | 402 continue; |
| 413 int32_t Weight = 0; | 403 int32_t Weight = 0; |
| 414 if (Desc[I].NumPred == 0) | 404 if (Desc[I].NumPred == 0) |
| 415 Weight += WeightNoPreds; | 405 Weight += WeightNoPreds; |
| 416 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) | 406 if (auto Var = llvm::dyn_cast<Variable>(Desc[I].Src)) |
| 417 if (Var->hasReg()) | 407 if (Var->hasReg()) |
| 418 Weight += WeightSrcIsReg; | 408 Weight += WeightSrcIsReg; |
| 419 if (!Desc[I].Dest->hasReg()) | 409 if (!Desc[I].Dest->hasReg()) |
| 420 Weight += WeightDestNotReg; | 410 Weight += WeightDestNotReg; |
| 421 Desc[I].Weight = Weight; | 411 Desc[I].Weight = Weight; |
| 422 } | 412 } |
| 423 | 413 |
| 424 // Repeatedly choose and process the best candidate in the | 414 // Repeatedly choose and process the best candidate in the topological |
| 425 // topological sort, until no candidates remain. This | 415 // sort, until no candidates remain. This implementation is O(N^2) where N |
| 426 // implementation is O(N^2) where N is the number of Phi | 416 // is the number of Phi instructions, but with a small constant factor |
| 427 // instructions, but with a small constant factor compared to a | 417 // compared to a likely implementation of O(N) topological sort. |
| 428 // likely implementation of O(N) topological sort. | |
| 429 for (; Remaining; --Remaining) { | 418 for (; Remaining; --Remaining) { |
| 430 size_t BestIndex = 0; | 419 size_t BestIndex = 0; |
| 431 int32_t BestWeight = -1; | 420 int32_t BestWeight = -1; |
| 432 // Find the best candidate. | 421 // Find the best candidate. |
| 433 for (size_t I = 0; I < NumPhis; ++I) { | 422 for (size_t I = 0; I < NumPhis; ++I) { |
| 434 if (Desc[I].Processed) | 423 if (Desc[I].Processed) |
| 435 continue; | 424 continue; |
| 436 int32_t Weight = 0; | 425 int32_t Weight = 0; |
| 437 Weight = Desc[I].Weight; | 426 Weight = Desc[I].Weight; |
| 438 if (Weight > BestWeight) { | 427 if (Weight > BestWeight) { |
| 439 BestIndex = I; | 428 BestIndex = I; |
| 440 BestWeight = Weight; | 429 BestWeight = Weight; |
| 441 } | 430 } |
| 442 } | 431 } |
| 443 assert(BestWeight >= 0); | 432 assert(BestWeight >= 0); |
| 444 assert(Desc[BestIndex].NumPred <= 1); | 433 assert(Desc[BestIndex].NumPred <= 1); |
| 445 Variable *Dest = Desc[BestIndex].Dest; | 434 Variable *Dest = Desc[BestIndex].Dest; |
| 446 Operand *Src = Desc[BestIndex].Src; | 435 Operand *Src = Desc[BestIndex].Src; |
| 447 assert(!sameVarOrReg(Dest, Src)); | 436 assert(!sameVarOrReg(Dest, Src)); |
| 448 // Break a cycle by introducing a temporary. | 437 // Break a cycle by introducing a temporary. |
| 449 if (Desc[BestIndex].NumPred) { | 438 if (Desc[BestIndex].NumPred) { |
| 450 bool Found = false; | 439 bool Found = false; |
| 451 // If the target instruction "A=B" is part of a cycle, find | 440 // If the target instruction "A=B" is part of a cycle, find the "X=A" |
| 452 // the "X=A" assignment in the cycle because it will have to | 441 // assignment in the cycle because it will have to be rewritten as |
| 453 // be rewritten as "X=tmp". | 442 // "X=tmp". |
| 454 for (size_t J = 0; !Found && J < NumPhis; ++J) { | 443 for (size_t J = 0; !Found && J < NumPhis; ++J) { |
| 455 if (Desc[J].Processed) | 444 if (Desc[J].Processed) |
| 456 continue; | 445 continue; |
| 457 Operand *OtherSrc = Desc[J].Src; | 446 Operand *OtherSrc = Desc[J].Src; |
| 458 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { | 447 if (Desc[J].NumPred && sameVarOrReg(Dest, OtherSrc)) { |
| 459 SizeT VarNum = Func->getNumVariables(); | 448 SizeT VarNum = Func->getNumVariables(); |
| 460 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); | 449 Variable *Tmp = Func->makeVariable(OtherSrc->getType()); |
| 461 if (BuildDefs::dump()) | 450 if (BuildDefs::dump()) |
| 462 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); | 451 Tmp->setName(Func, "__split_" + std::to_string(VarNum)); |
| 463 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); | 452 Split->appendInst(InstAssign::create(Func, Tmp, OtherSrc)); |
| 464 Desc[J].Src = Tmp; | 453 Desc[J].Src = Tmp; |
| 465 Found = true; | 454 Found = true; |
| 466 } | 455 } |
| 467 } | 456 } |
| 468 assert(Found); | 457 assert(Found); |
| 469 } | 458 } |
| 470 // Now that a cycle (if any) has been broken, create the actual | 459 // Now that a cycle (if any) has been broken, create the actual |
| 471 // assignment. | 460 // assignment. |
| 472 Split->appendInst(InstAssign::create(Func, Dest, Src)); | 461 Split->appendInst(InstAssign::create(Func, Dest, Src)); |
| 473 // Update NumPred for all Phi assignments using this Phi's Src | 462 // Update NumPred for all Phi assignments using this Phi's Src as their |
| 474 // as their Dest variable. Also update Weight if NumPred | 463 // Dest variable. Also update Weight if NumPred dropped from 1 to 0. |
| 475 // dropped from 1 to 0. | |
| 476 if (auto Var = llvm::dyn_cast<Variable>(Src)) { | 464 if (auto Var = llvm::dyn_cast<Variable>(Src)) { |
| 477 for (size_t I = 0; I < NumPhis; ++I) { | 465 for (size_t I = 0; I < NumPhis; ++I) { |
| 478 if (Desc[I].Processed) | 466 if (Desc[I].Processed) |
| 479 continue; | 467 continue; |
| 480 if (sameVarOrReg(Var, Desc[I].Dest)) { | 468 if (sameVarOrReg(Var, Desc[I].Dest)) { |
| 481 if (--Desc[I].NumPred == 0) | 469 if (--Desc[I].NumPred == 0) |
| 482 Desc[I].Weight += WeightNoPreds; | 470 Desc[I].Weight += WeightNoPreds; |
| 483 } | 471 } |
| 484 } | 472 } |
| 485 } | 473 } |
| 486 Desc[BestIndex].Processed = true; | 474 Desc[BestIndex].Processed = true; |
| 487 } | 475 } |
| 488 Split->appendInst(InstBr::create(Func, this)); | 476 Split->appendInst(InstBr::create(Func, this)); |
| 489 | 477 |
| 490 Split->genCode(); | 478 Split->genCode(); |
| 491 Func->getVMetadata()->addNode(Split); | 479 Func->getVMetadata()->addNode(Split); |
| 492 } | 480 } |
| 493 } | 481 } |
| 494 | 482 |
| 495 // Does address mode optimization. Pass each instruction to the | 483 // Does address mode optimization. Pass each instruction to the TargetLowering |
| 496 // TargetLowering object. If it returns a new instruction | 484 // object. If it returns a new instruction (representing the optimized address |
| 497 // (representing the optimized address mode), then insert the new | 485 // mode), then insert the new instruction and delete the old. |
| 498 // instruction and delete the old. | |
| 499 void CfgNode::doAddressOpt() { | 486 void CfgNode::doAddressOpt() { |
| 500 TargetLowering *Target = Func->getTarget(); | 487 TargetLowering *Target = Func->getTarget(); |
| 501 LoweringContext &Context = Target->getContext(); | 488 LoweringContext &Context = Target->getContext(); |
| 502 Context.init(this); | 489 Context.init(this); |
| 503 while (!Context.atEnd()) { | 490 while (!Context.atEnd()) { |
| 504 Target->doAddressOpt(); | 491 Target->doAddressOpt(); |
| 505 } | 492 } |
| 506 } | 493 } |
| 507 | 494 |
| 508 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { | 495 void CfgNode::doNopInsertion(RandomNumberGenerator &RNG) { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 520 } | 507 } |
| 521 if (!PauseNopInsertion) | 508 if (!PauseNopInsertion) |
| 522 Target->doNopInsertion(RNG); | 509 Target->doNopInsertion(RNG); |
| 523 // Ensure Cur=Next, so that the nops are inserted before the current | 510 // Ensure Cur=Next, so that the nops are inserted before the current |
| 524 // instruction rather than after. | 511 // instruction rather than after. |
| 525 Context.advanceCur(); | 512 Context.advanceCur(); |
| 526 Context.advanceNext(); | 513 Context.advanceNext(); |
| 527 } | 514 } |
| 528 } | 515 } |
| 529 | 516 |
| 530 // Drives the target lowering. Passes the current instruction and the | 517 // Drives the target lowering. Passes the current instruction and the next |
| 531 // next non-deleted instruction for target lowering. | 518 // non-deleted instruction for target lowering. |
| 532 void CfgNode::genCode() { | 519 void CfgNode::genCode() { |
| 533 TargetLowering *Target = Func->getTarget(); | 520 TargetLowering *Target = Func->getTarget(); |
| 534 LoweringContext &Context = Target->getContext(); | 521 LoweringContext &Context = Target->getContext(); |
| 535 // Lower the regular instructions. | 522 // Lower the regular instructions. |
| 536 Context.init(this); | 523 Context.init(this); |
| 537 Target->initNodeForLowering(this); | 524 Target->initNodeForLowering(this); |
| 538 while (!Context.atEnd()) { | 525 while (!Context.atEnd()) { |
| 539 InstList::iterator Orig = Context.getCur(); | 526 InstList::iterator Orig = Context.getCur(); |
| 540 if (llvm::isa<InstRet>(*Orig)) | 527 if (llvm::isa<InstRet>(*Orig)) |
| 541 setHasReturn(); | 528 setHasReturn(); |
| (...skipping 14 matching lines...) Expand all Loading... |
| 556 continue; | 543 continue; |
| 557 I.livenessLightweight(Func, Live); | 544 I.livenessLightweight(Func, Live); |
| 558 } | 545 } |
| 559 for (Inst &I : Phis) { | 546 for (Inst &I : Phis) { |
| 560 if (I.isDeleted()) | 547 if (I.isDeleted()) |
| 561 continue; | 548 continue; |
| 562 I.livenessLightweight(Func, Live); | 549 I.livenessLightweight(Func, Live); |
| 563 } | 550 } |
| 564 } | 551 } |
| 565 | 552 |
| 566 // Performs liveness analysis on the block. Returns true if the | 553 // Performs liveness analysis on the block. Returns true if the incoming |
| 567 // incoming liveness changed from before, false if it stayed the same. | 554 // liveness changed from before, false if it stayed the same. (If it changes, |
| 568 // (If it changes, the node's predecessors need to be processed | 555 // the node's predecessors need to be processed again.) |
| 569 // again.) | |
| 570 bool CfgNode::liveness(Liveness *Liveness) { | 556 bool CfgNode::liveness(Liveness *Liveness) { |
| 571 SizeT NumVars = Liveness->getNumVarsInNode(this); | 557 SizeT NumVars = Liveness->getNumVarsInNode(this); |
| 572 LivenessBV Live(NumVars); | 558 LivenessBV Live(NumVars); |
| 573 LiveBeginEndMap *LiveBegin = nullptr; | 559 LiveBeginEndMap *LiveBegin = nullptr; |
| 574 LiveBeginEndMap *LiveEnd = nullptr; | 560 LiveBeginEndMap *LiveEnd = nullptr; |
| 575 // Mark the beginning and ending of each variable's live range | 561 // Mark the beginning and ending of each variable's live range with the |
| 576 // with the sentinel instruction number 0. | 562 // sentinel instruction number 0. |
| 577 if (Liveness->getMode() == Liveness_Intervals) { | 563 if (Liveness->getMode() == Liveness_Intervals) { |
| 578 LiveBegin = Liveness->getLiveBegin(this); | 564 LiveBegin = Liveness->getLiveBegin(this); |
| 579 LiveEnd = Liveness->getLiveEnd(this); | 565 LiveEnd = Liveness->getLiveEnd(this); |
| 580 LiveBegin->clear(); | 566 LiveBegin->clear(); |
| 581 LiveEnd->clear(); | 567 LiveEnd->clear(); |
| 582 // Guess that the number of live ranges beginning is roughly the | 568 // Guess that the number of live ranges beginning is roughly the number of |
| 583 // number of instructions, and same for live ranges ending. | 569 // instructions, and same for live ranges ending. |
| 584 LiveBegin->reserve(getInstCountEstimate()); | 570 LiveBegin->reserve(getInstCountEstimate()); |
| 585 LiveEnd->reserve(getInstCountEstimate()); | 571 LiveEnd->reserve(getInstCountEstimate()); |
| 586 } | 572 } |
| 587 // Initialize Live to be the union of all successors' LiveIn. | 573 // Initialize Live to be the union of all successors' LiveIn. |
| 588 for (CfgNode *Succ : OutEdges) { | 574 for (CfgNode *Succ : OutEdges) { |
| 589 Live |= Liveness->getLiveIn(Succ); | 575 Live |= Liveness->getLiveIn(Succ); |
| 590 // Mark corresponding argument of phis in successor as live. | 576 // Mark corresponding argument of phis in successor as live. |
| 591 for (Inst &I : Succ->Phis) { | 577 for (Inst &I : Succ->Phis) { |
| 592 if (I.isDeleted()) | 578 if (I.isDeleted()) |
| 593 continue; | 579 continue; |
| 594 auto Phi = llvm::dyn_cast<InstPhi>(&I); | 580 auto Phi = llvm::dyn_cast<InstPhi>(&I); |
| 595 Phi->livenessPhiOperand(Live, this, Liveness); | 581 Phi->livenessPhiOperand(Live, this, Liveness); |
| 596 } | 582 } |
| 597 } | 583 } |
| 598 Liveness->getLiveOut(this) = Live; | 584 Liveness->getLiveOut(this) = Live; |
| 599 | 585 |
| 600 // Process regular instructions in reverse order. | 586 // Process regular instructions in reverse order. |
| 601 for (Inst &I : reverse_range(Insts)) { | 587 for (Inst &I : reverse_range(Insts)) { |
| 602 if (I.isDeleted()) | 588 if (I.isDeleted()) |
| 603 continue; | 589 continue; |
| 604 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); | 590 I.liveness(I.getNumber(), Live, Liveness, LiveBegin, LiveEnd); |
| 605 } | 591 } |
| 606 // Process phis in forward order so that we can override the | 592 // Process phis in forward order so that we can override the instruction |
| 607 // instruction number to be that of the earliest phi instruction in | 593 // number to be that of the earliest phi instruction in the block. |
| 608 // the block. | |
| 609 SizeT NumNonDeadPhis = 0; | 594 SizeT NumNonDeadPhis = 0; |
| 610 InstNumberT FirstPhiNumber = Inst::NumberSentinel; | 595 InstNumberT FirstPhiNumber = Inst::NumberSentinel; |
| 611 for (Inst &I : Phis) { | 596 for (Inst &I : Phis) { |
| 612 if (I.isDeleted()) | 597 if (I.isDeleted()) |
| 613 continue; | 598 continue; |
| 614 if (FirstPhiNumber == Inst::NumberSentinel) | 599 if (FirstPhiNumber == Inst::NumberSentinel) |
| 615 FirstPhiNumber = I.getNumber(); | 600 FirstPhiNumber = I.getNumber(); |
| 616 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) | 601 if (I.liveness(FirstPhiNumber, Live, Liveness, LiveBegin, LiveEnd)) |
| 617 ++NumNonDeadPhis; | 602 ++NumNonDeadPhis; |
| 618 } | 603 } |
| 619 | 604 |
| 620 // When using the sparse representation, after traversing the | 605 // When using the sparse representation, after traversing the instructions in |
| 621 // instructions in the block, the Live bitvector should only contain | 606 // the block, the Live bitvector should only contain set bits for global |
| 622 // set bits for global variables upon block entry. We validate this | 607 // variables upon block entry. We validate this by shrinking the Live vector |
| 623 // by shrinking the Live vector and then testing it against the | 608 // and then testing it against the pre-shrunk version. (The shrinking is |
| 624 // pre-shrunk version. (The shrinking is required, but the | 609 // required, but the validation is not.) |
| 625 // validation is not.) | |
| 626 LivenessBV LiveOrig = Live; | 610 LivenessBV LiveOrig = Live; |
| 627 Live.resize(Liveness->getNumGlobalVars()); | 611 Live.resize(Liveness->getNumGlobalVars()); |
| 628 if (Live != LiveOrig) { | 612 if (Live != LiveOrig) { |
| 629 if (BuildDefs::dump()) { | 613 if (BuildDefs::dump()) { |
| 630 // This is a fatal liveness consistency error. Print some | 614 // This is a fatal liveness consistency error. Print some diagnostics and |
| 631 // diagnostics and abort. | 615 // abort. |
| 632 Ostream &Str = Func->getContext()->getStrDump(); | 616 Ostream &Str = Func->getContext()->getStrDump(); |
| 633 Func->resetCurrentNode(); | 617 Func->resetCurrentNode(); |
| 634 Str << "LiveOrig-Live ="; | 618 Str << "LiveOrig-Live ="; |
| 635 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { | 619 for (SizeT i = Live.size(); i < LiveOrig.size(); ++i) { |
| 636 if (LiveOrig.test(i)) { | 620 if (LiveOrig.test(i)) { |
| 637 Str << " "; | 621 Str << " "; |
| 638 Liveness->getVariable(i, this)->dump(Func); | 622 Liveness->getVariable(i, this)->dump(Func); |
| 639 } | 623 } |
| 640 } | 624 } |
| 641 Str << "\n"; | 625 Str << "\n"; |
| 642 } | 626 } |
| 643 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); | 627 llvm::report_fatal_error("Fatal inconsistency in liveness analysis"); |
| 644 } | 628 } |
| 645 | 629 |
| 646 bool Changed = false; | 630 bool Changed = false; |
| 647 LivenessBV &LiveIn = Liveness->getLiveIn(this); | 631 LivenessBV &LiveIn = Liveness->getLiveIn(this); |
| 648 // Add in current LiveIn | 632 // Add in current LiveIn |
| 649 Live |= LiveIn; | 633 Live |= LiveIn; |
| 650 // Check result, set LiveIn=Live | 634 // Check result, set LiveIn=Live |
| 651 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); | 635 SizeT &PrevNumNonDeadPhis = Liveness->getNumNonDeadPhis(this); |
| 652 bool LiveInChanged = (Live != LiveIn); | 636 bool LiveInChanged = (Live != LiveIn); |
| 653 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); | 637 Changed = (NumNonDeadPhis != PrevNumNonDeadPhis || LiveInChanged); |
| 654 if (LiveInChanged) | 638 if (LiveInChanged) |
| 655 LiveIn = Live; | 639 LiveIn = Live; |
| 656 PrevNumNonDeadPhis = NumNonDeadPhis; | 640 PrevNumNonDeadPhis = NumNonDeadPhis; |
| 657 return Changed; | 641 return Changed; |
| 658 } | 642 } |
| 659 | 643 |
| 660 // Once basic liveness is complete, compute actual live ranges. It is | 644 // Once basic liveness is complete, compute actual live ranges. It is assumed |
| 661 // assumed that within a single basic block, a live range begins at | 645 // that within a single basic block, a live range begins at most once and ends |
| 662 // most once and ends at most once. This is certainly true for pure | 646 // at most once. This is certainly true for pure SSA form. It is also true once |
| 663 // SSA form. It is also true once phis are lowered, since each | 647 // phis are lowered, since each assignment to the phi-based temporary is in a |
| 664 // assignment to the phi-based temporary is in a different basic | 648 // different basic block, and there is a single read that ends the live in the |
| 665 // block, and there is a single read that ends the live in the basic | 649 // basic block that contained the actual phi instruction. |
| 666 // block that contained the actual phi instruction. | |
| 667 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, | 650 void CfgNode::livenessAddIntervals(Liveness *Liveness, InstNumberT FirstInstNum, |
| 668 InstNumberT LastInstNum) { | 651 InstNumberT LastInstNum) { |
| 669 TimerMarker T1(TimerStack::TT_liveRange, Func); | 652 TimerMarker T1(TimerStack::TT_liveRange, Func); |
| 670 | 653 |
| 671 SizeT NumVars = Liveness->getNumVarsInNode(this); | 654 SizeT NumVars = Liveness->getNumVarsInNode(this); |
| 672 LivenessBV &LiveIn = Liveness->getLiveIn(this); | 655 LivenessBV &LiveIn = Liveness->getLiveIn(this); |
| 673 LivenessBV &LiveOut = Liveness->getLiveOut(this); | 656 LivenessBV &LiveOut = Liveness->getLiveOut(this); |
| 674 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); | 657 LiveBeginEndMap &MapBegin = *Liveness->getLiveBegin(this); |
| 675 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); | 658 LiveBeginEndMap &MapEnd = *Liveness->getLiveEnd(this); |
| 676 std::sort(MapBegin.begin(), MapBegin.end()); | 659 std::sort(MapBegin.begin(), MapBegin.end()); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 689 LivenessBV LiveInAndOut = LiveIn; | 672 LivenessBV LiveInAndOut = LiveIn; |
| 690 LiveInAndOut &= LiveOut; | 673 LiveInAndOut &= LiveOut; |
| 691 | 674 |
| 692 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. | 675 // Iterate in parallel across the sorted MapBegin[] and MapEnd[]. |
| 693 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); | 676 auto IBB = MapBegin.begin(), IEB = MapEnd.begin(); |
| 694 auto IBE = MapBegin.end(), IEE = MapEnd.end(); | 677 auto IBE = MapBegin.end(), IEE = MapEnd.end(); |
| 695 while (IBB != IBE || IEB != IEE) { | 678 while (IBB != IBE || IEB != IEE) { |
| 696 SizeT i1 = IBB == IBE ? NumVars : IBB->first; | 679 SizeT i1 = IBB == IBE ? NumVars : IBB->first; |
| 697 SizeT i2 = IEB == IEE ? NumVars : IEB->first; | 680 SizeT i2 = IEB == IEE ? NumVars : IEB->first; |
| 698 SizeT i = std::min(i1, i2); | 681 SizeT i = std::min(i1, i2); |
| 699 // i1 is the Variable number of the next MapBegin entry, and i2 is | 682 // i1 is the Variable number of the next MapBegin entry, and i2 is the |
| 700 // the Variable number of the next MapEnd entry. If i1==i2, then | 683 // Variable number of the next MapEnd entry. If i1==i2, then the Variable's |
| 701 // the Variable's live range begins and ends in this block. If | 684 // live range begins and ends in this block. If i1<i2, then i1's live range |
| 702 // i1<i2, then i1's live range begins at instruction IBB->second | 685 // begins at instruction IBB->second and extends through the end of the |
| 703 // and extends through the end of the block. If i1>i2, then i2's | 686 // block. If i1>i2, then i2's live range begins at the first instruction of |
| 704 // live range begins at the first instruction of the block and | 687 // the block and ends at IEB->second. In any case, we choose the lesser of |
| 705 // ends at IEB->second. In any case, we choose the lesser of i1 | 688 // i1 and i2 and proceed accordingly. |
| 706 // and i2 and proceed accordingly. | |
| 707 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; | 689 InstNumberT LB = i == i1 ? IBB->second : FirstInstNum; |
| 708 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; | 690 InstNumberT LE = i == i2 ? IEB->second : LastInstNum + 1; |
| 709 | 691 |
| 710 Variable *Var = Liveness->getVariable(i, this); | 692 Variable *Var = Liveness->getVariable(i, this); |
| 711 if (LB > LE) { | 693 if (LB > LE) { |
| 712 Var->addLiveRange(FirstInstNum, LE); | 694 Var->addLiveRange(FirstInstNum, LE); |
| 713 Var->addLiveRange(LB, LastInstNum + 1); | 695 Var->addLiveRange(LB, LastInstNum + 1); |
| 714 // Assert that Var is a global variable by checking that its | 696 // Assert that Var is a global variable by checking that its liveness |
| 715 // liveness index is less than the number of globals. This | 697 // index is less than the number of globals. This ensures that the |
| 716 // ensures that the LiveInAndOut[] access is valid. | 698 // LiveInAndOut[] access is valid. |
| 717 assert(i < Liveness->getNumGlobalVars()); | 699 assert(i < Liveness->getNumGlobalVars()); |
| 718 LiveInAndOut[i] = false; | 700 LiveInAndOut[i] = false; |
| 719 } else { | 701 } else { |
| 720 Var->addLiveRange(LB, LE); | 702 Var->addLiveRange(LB, LE); |
| 721 } | 703 } |
| 722 if (i == i1) | 704 if (i == i1) |
| 723 ++IBB; | 705 ++IBB; |
| 724 if (i == i2) | 706 if (i == i2) |
| 725 ++IEB; | 707 ++IEB; |
| 726 } | 708 } |
| 727 // Process the variables that are live across the entire block. | 709 // Process the variables that are live across the entire block. |
| 728 for (int i = LiveInAndOut.find_first(); i != -1; | 710 for (int i = LiveInAndOut.find_first(); i != -1; |
| 729 i = LiveInAndOut.find_next(i)) { | 711 i = LiveInAndOut.find_next(i)) { |
| 730 Variable *Var = Liveness->getVariable(i, this); | 712 Variable *Var = Liveness->getVariable(i, this); |
| 731 if (Liveness->getRangeMask(Var->getIndex())) | 713 if (Liveness->getRangeMask(Var->getIndex())) |
| 732 Var->addLiveRange(FirstInstNum, LastInstNum + 1); | 714 Var->addLiveRange(FirstInstNum, LastInstNum + 1); |
| 733 } | 715 } |
| 734 } | 716 } |
| 735 | 717 |
| 736 // If this node contains only deleted instructions, and ends in an | 718 // If this node contains only deleted instructions, and ends in an |
| 737 // unconditional branch, contract the node by repointing all its | 719 // unconditional branch, contract the node by repointing all its in-edges to |
| 738 // in-edges to its successor. | 720 // its successor. |
| 739 void CfgNode::contractIfEmpty() { | 721 void CfgNode::contractIfEmpty() { |
| 740 if (InEdges.empty()) | 722 if (InEdges.empty()) |
| 741 return; | 723 return; |
| 742 Inst *Branch = nullptr; | 724 Inst *Branch = nullptr; |
| 743 for (Inst &I : Insts) { | 725 for (Inst &I : Insts) { |
| 744 if (I.isDeleted()) | 726 if (I.isDeleted()) |
| 745 continue; | 727 continue; |
| 746 if (I.isUnconditionalBranch()) | 728 if (I.isUnconditionalBranch()) |
| 747 Branch = &I; | 729 Branch = &I; |
| 748 else if (!I.isRedundantAssign()) | 730 else if (!I.isRedundantAssign()) |
| 749 return; | 731 return; |
| 750 } | 732 } |
| 751 Branch->setDeleted(); | 733 Branch->setDeleted(); |
| 752 assert(OutEdges.size() == 1); | 734 assert(OutEdges.size() == 1); |
| 753 CfgNode *Successor = OutEdges.front(); | 735 CfgNode *Successor = OutEdges.front(); |
| 754 // Repoint all this node's in-edges to this node's successor, unless | 736 // Repoint all this node's in-edges to this node's successor, unless this |
| 755 // this node's successor is actually itself (in which case the | 737 // node's successor is actually itself (in which case the statement |
| 756 // statement "OutEdges.front()->InEdges.push_back(Pred)" could | 738 // "OutEdges.front()->InEdges.push_back(Pred)" could invalidate the iterator |
| 757 // invalidate the iterator over this->InEdges). | 739 // over this->InEdges). |
| 758 if (Successor != this) { | 740 if (Successor != this) { |
| 759 for (CfgNode *Pred : InEdges) { | 741 for (CfgNode *Pred : InEdges) { |
| 760 for (CfgNode *&I : Pred->OutEdges) { | 742 for (CfgNode *&I : Pred->OutEdges) { |
| 761 if (I == this) { | 743 if (I == this) { |
| 762 I = Successor; | 744 I = Successor; |
| 763 Successor->InEdges.push_back(Pred); | 745 Successor->InEdges.push_back(Pred); |
| 764 } | 746 } |
| 765 } | 747 } |
| 766 for (Inst &I : Pred->getInsts()) { | 748 for (Inst &I : Pred->getInsts()) { |
| 767 if (!I.isDeleted()) | 749 if (!I.isDeleted()) |
| 768 I.repointEdges(this, Successor); | 750 I.repointEdges(this, Successor); |
| 769 } | 751 } |
| 770 } | 752 } |
| 771 | 753 |
| 772 // Remove the in-edge to the successor to allow node reordering to make | 754 // Remove the in-edge to the successor to allow node reordering to make |
| 773 // better decisions. For example it's more helpful to place a node after | 755 // better decisions. For example it's more helpful to place a node after a |
| 774 // a reachable predecessor than an unreachable one (like the one we just | 756 // reachable predecessor than an unreachable one (like the one we just |
| 775 // contracted). | 757 // contracted). |
| 776 Successor->InEdges.erase( | 758 Successor->InEdges.erase( |
| 777 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); | 759 std::find(Successor->InEdges.begin(), Successor->InEdges.end(), this)); |
| 778 } | 760 } |
| 779 InEdges.clear(); | 761 InEdges.clear(); |
| 780 } | 762 } |
| 781 | 763 |
| 782 void CfgNode::doBranchOpt(const CfgNode *NextNode) { | 764 void CfgNode::doBranchOpt(const CfgNode *NextNode) { |
| 783 TargetLowering *Target = Func->getTarget(); | 765 TargetLowering *Target = Func->getTarget(); |
| 784 // Find the first opportunity for branch optimization (which will be the last | 766 // Find the first opportunity for branch optimization (which will be the last |
| 785 // instruction in the block) and stop. This is sufficient unless there is some | 767 // instruction in the block) and stop. This is sufficient unless there is |
| 786 // target lowering where we have the possibility of multiple optimizations per | 768 // some target lowering where we have the possibility of multiple |
| 787 // block. Take care with switch lowering as there are multiple unconditional | 769 // optimizations per block. Take care with switch lowering as there are |
| 788 // branches and only the last can be deleted. | 770 // multiple unconditional branches and only the last can be deleted. |
| 789 for (Inst &I : reverse_range(Insts)) { | 771 for (Inst &I : reverse_range(Insts)) { |
| 790 if (!I.isDeleted()) { | 772 if (!I.isDeleted()) { |
| 791 Target->doBranchOpt(&I, NextNode); | 773 Target->doBranchOpt(&I, NextNode); |
| 792 return; | 774 return; |
| 793 } | 775 } |
| 794 } | 776 } |
| 795 } | 777 } |
| 796 | 778 |
| 797 // ======================== Dump routines ======================== // | 779 // ======================== Dump routines ======================== // |
| 798 | 780 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 818 for (SizeT i = 0; i < Live->size(); ++i) { | 800 for (SizeT i = 0; i < Live->size(); ++i) { |
| 819 if ((*Live)[i]) { | 801 if ((*Live)[i]) { |
| 820 Variable *Var = Liveness->getVariable(i, Node); | 802 Variable *Var = Liveness->getVariable(i, Node); |
| 821 if (Var->hasReg()) { | 803 if (Var->hasReg()) { |
| 822 if (IsLiveIn) | 804 if (IsLiveIn) |
| 823 ++LiveRegCount[Var->getRegNum()]; | 805 ++LiveRegCount[Var->getRegNum()]; |
| 824 LiveRegs.push_back(Var); | 806 LiveRegs.push_back(Var); |
| 825 } | 807 } |
| 826 } | 808 } |
| 827 } | 809 } |
| 828 // Sort the variables by regnum so they are always printed in a | 810 // Sort the variables by regnum so they are always printed in a familiar |
| 829 // familiar order. | 811 // order. |
| 830 std::sort(LiveRegs.begin(), LiveRegs.end(), | 812 std::sort(LiveRegs.begin(), LiveRegs.end(), |
| 831 [](const Variable *V1, const Variable *V2) { | 813 [](const Variable *V1, const Variable *V2) { |
| 832 return V1->getRegNum() < V2->getRegNum(); | 814 return V1->getRegNum() < V2->getRegNum(); |
| 833 }); | 815 }); |
| 834 bool First = true; | 816 bool First = true; |
| 835 for (Variable *Var : LiveRegs) { | 817 for (Variable *Var : LiveRegs) { |
| 836 if (!First) | 818 if (!First) |
| 837 Str << ","; | 819 Str << ","; |
| 838 First = false; | 820 First = false; |
| 839 Var->emit(Func); | 821 Var->emit(Func); |
| 840 } | 822 } |
| 841 } | 823 } |
| 842 Str << "\n"; | 824 Str << "\n"; |
| 843 } | 825 } |
| 844 | 826 |
| 845 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, | 827 void emitLiveRangesEnded(Ostream &Str, const Cfg *Func, const Inst *Instr, |
| 846 std::vector<SizeT> &LiveRegCount) { | 828 std::vector<SizeT> &LiveRegCount) { |
| 847 if (!BuildDefs::dump()) | 829 if (!BuildDefs::dump()) |
| 848 return; | 830 return; |
| 849 bool First = true; | 831 bool First = true; |
| 850 Variable *Dest = Instr->getDest(); | 832 Variable *Dest = Instr->getDest(); |
| 851 // Normally we increment the live count for the dest register. But | 833 // Normally we increment the live count for the dest register. But we |
| 852 // we shouldn't if the instruction's IsDestNonKillable flag is set, | 834 // shouldn't if the instruction's IsDestNonKillable flag is set, because this |
| 853 // because this means that the target lowering created this | 835 // means that the target lowering created this instruction as a non-SSA |
| 854 // instruction as a non-SSA assignment; i.e., a different, previous | 836 // assignment; i.e., a different, previous instruction started the dest |
| 855 // instruction started the dest variable's live range. | 837 // variable's live range. |
| 856 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) | 838 if (!Instr->isDestNonKillable() && Dest && Dest->hasReg()) |
| 857 ++LiveRegCount[Dest->getRegNum()]; | 839 ++LiveRegCount[Dest->getRegNum()]; |
| 858 FOREACH_VAR_IN_INST(Var, *Instr) { | 840 FOREACH_VAR_IN_INST(Var, *Instr) { |
| 859 bool ShouldReport = Instr->isLastUse(Var); | 841 bool ShouldReport = Instr->isLastUse(Var); |
| 860 if (ShouldReport && Var->hasReg()) { | 842 if (ShouldReport && Var->hasReg()) { |
| 861 // Don't report end of live range until the live count reaches 0. | 843 // Don't report end of live range until the live count reaches 0. |
| 862 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; | 844 SizeT NewCount = --LiveRegCount[Var->getRegNum()]; |
| 863 if (NewCount) | 845 if (NewCount) |
| 864 ShouldReport = false; | 846 ShouldReport = false; |
| 865 } | 847 } |
| 866 if (ShouldReport) { | 848 if (ShouldReport) { |
| 867 if (First) | 849 if (First) |
| 868 Str << " \t# END="; | 850 Str << " \t# END="; |
| 869 else | 851 else |
| 870 Str << ","; | 852 Str << ","; |
| 871 Var->emit(Func); | 853 Var->emit(Func); |
| 872 First = false; | 854 First = false; |
| 873 } | 855 } |
| 874 } | 856 } |
| 875 } | 857 } |
| 876 | 858 |
| 877 void updateStats(Cfg *Func, const Inst *I) { | 859 void updateStats(Cfg *Func, const Inst *I) { |
| 878 if (!BuildDefs::dump()) | 860 if (!BuildDefs::dump()) |
| 879 return; | 861 return; |
| 880 // Update emitted instruction count, plus fill/spill count for | 862 // Update emitted instruction count, plus fill/spill count for Variable |
| 881 // Variable operands without a physical register. | 863 // operands without a physical register. |
| 882 if (uint32_t Count = I->getEmitInstCount()) { | 864 if (uint32_t Count = I->getEmitInstCount()) { |
| 883 Func->getContext()->statsUpdateEmitted(Count); | 865 Func->getContext()->statsUpdateEmitted(Count); |
| 884 if (Variable *Dest = I->getDest()) { | 866 if (Variable *Dest = I->getDest()) { |
| 885 if (!Dest->hasReg()) | 867 if (!Dest->hasReg()) |
| 886 Func->getContext()->statsUpdateFills(); | 868 Func->getContext()->statsUpdateFills(); |
| 887 } | 869 } |
| 888 for (SizeT S = 0; S < I->getSrcSize(); ++S) { | 870 for (SizeT S = 0; S < I->getSrcSize(); ++S) { |
| 889 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { | 871 if (Variable *Src = llvm::dyn_cast<Variable>(I->getSrc(S))) { |
| 890 if (!Src->hasReg()) | 872 if (!Src->hasReg()) |
| 891 Func->getContext()->statsUpdateSpills(); | 873 Func->getContext()->statsUpdateSpills(); |
| 892 } | 874 } |
| 893 } | 875 } |
| 894 } | 876 } |
| 895 } | 877 } |
| 896 | 878 |
| 897 } // end of anonymous namespace | 879 } // end of anonymous namespace |
| 898 | 880 |
| 899 void CfgNode::emit(Cfg *Func) const { | 881 void CfgNode::emit(Cfg *Func) const { |
| 900 if (!BuildDefs::dump()) | 882 if (!BuildDefs::dump()) |
| 901 return; | 883 return; |
| 902 Func->setCurrentNode(this); | 884 Func->setCurrentNode(this); |
| 903 Ostream &Str = Func->getContext()->getStrEmit(); | 885 Ostream &Str = Func->getContext()->getStrEmit(); |
| 904 Liveness *Liveness = Func->getLiveness(); | 886 Liveness *Liveness = Func->getLiveness(); |
| 905 bool DecorateAsm = | 887 bool DecorateAsm = |
| 906 Liveness && Func->getContext()->getFlags().getDecorateAsm(); | 888 Liveness && Func->getContext()->getFlags().getDecorateAsm(); |
| 907 Str << getAsmName() << ":\n"; | 889 Str << getAsmName() << ":\n"; |
| 908 // LiveRegCount keeps track of the number of currently live | 890 // LiveRegCount keeps track of the number of currently live variables that |
| 909 // variables that each register is assigned to. Normally that would | 891 // each register is assigned to. Normally that would be only 0 or 1, but the |
| 910 // be only 0 or 1, but the register allocator's AllowOverlap | 892 // register allocator's AllowOverlap inference allows it to be greater than 1 |
| 911 // inference allows it to be greater than 1 for short periods. | 893 // for short periods. |
| 912 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); | 894 std::vector<SizeT> LiveRegCount(Func->getTarget()->getNumRegisters()); |
| 913 if (DecorateAsm) { | 895 if (DecorateAsm) { |
| 914 constexpr bool IsLiveIn = true; | 896 constexpr bool IsLiveIn = true; |
| 915 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); | 897 emitRegisterUsage(Str, Func, this, IsLiveIn, LiveRegCount); |
| 916 } | 898 } |
| 917 | 899 |
| 918 for (const Inst &I : Phis) { | 900 for (const Inst &I : Phis) { |
| 919 if (I.isDeleted()) | 901 if (I.isDeleted()) |
| 920 continue; | 902 continue; |
| 921 // Emitting a Phi instruction should cause an error. | 903 // Emitting a Phi instruction should cause an error. |
| 922 I.emit(Func); | 904 I.emit(Func); |
| 923 } | 905 } |
| 924 for (const Inst &I : Insts) { | 906 for (const Inst &I : Insts) { |
| 925 if (I.isDeleted()) | 907 if (I.isDeleted()) |
| 926 continue; | 908 continue; |
| 927 if (I.isRedundantAssign()) { | 909 if (I.isRedundantAssign()) { |
| 928 // Usually, redundant assignments end the live range of the src | 910 // Usually, redundant assignments end the live range of the src variable |
| 929 // variable and begin the live range of the dest variable, with | 911 // and begin the live range of the dest variable, with no net effect on |
| 930 // no net effect on the liveness of their register. However, if | 912 // the liveness of their register. However, if the register allocator |
| 931 // the register allocator infers the AllowOverlap condition, | 913 // infers the AllowOverlap condition, then this may be a redundant |
| 932 // then this may be a redundant assignment that does not end the | 914 // assignment that does not end the src variable's live range, in which |
| 933 // src variable's live range, in which case the active variable | 915 // case the active variable count for that register needs to be bumped. |
| 934 // count for that register needs to be bumped. That normally | 916 // That normally would have happened as part of emitLiveRangesEnded(), |
| 935 // would have happened as part of emitLiveRangesEnded(), but | 917 // but that isn't called for redundant assignments. |
| 936 // that isn't called for redundant assignments. | |
| 937 Variable *Dest = I.getDest(); | 918 Variable *Dest = I.getDest(); |
| 938 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) | 919 if (DecorateAsm && Dest->hasReg() && !I.isLastUse(I.getSrc(0))) |
| 939 ++LiveRegCount[Dest->getRegNum()]; | 920 ++LiveRegCount[Dest->getRegNum()]; |
| 940 continue; | 921 continue; |
| 941 } | 922 } |
| 942 I.emit(Func); | 923 I.emit(Func); |
| 943 if (DecorateAsm) | 924 if (DecorateAsm) |
| 944 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); | 925 emitLiveRangesEnded(Str, Func, &I, LiveRegCount); |
| 945 Str << "\n"; | 926 Str << "\n"; |
| 946 updateStats(Func, &I); | 927 updateStats(Func, &I); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 959 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; | 940 BundleEmitHelper &operator=(const BundleEmitHelper &) = delete; |
| 960 | 941 |
| 961 public: | 942 public: |
| 962 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, | 943 BundleEmitHelper(Assembler *Asm, TargetLowering *Target, |
| 963 const InstList &Insts) | 944 const InstList &Insts) |
| 964 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), | 945 : Asm(Asm), Target(Target), End(Insts.end()), BundleLockStart(End), |
| 965 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), | 946 BundleSize(1 << Asm->getBundleAlignLog2Bytes()), |
| 966 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} | 947 BundleMaskLo(BundleSize - 1), BundleMaskHi(~BundleMaskLo) {} |
| 967 // Check whether we're currently within a bundle_lock region. | 948 // Check whether we're currently within a bundle_lock region. |
| 968 bool isInBundleLockRegion() const { return BundleLockStart != End; } | 949 bool isInBundleLockRegion() const { return BundleLockStart != End; } |
| 969 // Check whether the current bundle_lock region has the align_to_end | 950 // Check whether the current bundle_lock region has the align_to_end option. |
| 970 // option. | |
| 971 bool isAlignToEnd() const { | 951 bool isAlignToEnd() const { |
| 972 assert(isInBundleLockRegion()); | 952 assert(isInBundleLockRegion()); |
| 973 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == | 953 return llvm::cast<InstBundleLock>(getBundleLockStart())->getOption() == |
| 974 InstBundleLock::Opt_AlignToEnd; | 954 InstBundleLock::Opt_AlignToEnd; |
| 975 } | 955 } |
| 976 // Check whether the entire bundle_lock region falls within the same | 956 // Check whether the entire bundle_lock region falls within the same bundle. |
| 977 // bundle. | |
| 978 bool isSameBundle() const { | 957 bool isSameBundle() const { |
| 979 assert(isInBundleLockRegion()); | 958 assert(isInBundleLockRegion()); |
| 980 return SizeSnapshotPre == SizeSnapshotPost || | 959 return SizeSnapshotPre == SizeSnapshotPost || |
| 981 (SizeSnapshotPre & BundleMaskHi) == | 960 (SizeSnapshotPre & BundleMaskHi) == |
| 982 ((SizeSnapshotPost - 1) & BundleMaskHi); | 961 ((SizeSnapshotPost - 1) & BundleMaskHi); |
| 983 } | 962 } |
| 984 // Get the bundle alignment of the first instruction of the | 963 // Get the bundle alignment of the first instruction of the bundle_lock |
| 985 // bundle_lock region. | 964 // region. |
| 986 intptr_t getPreAlignment() const { | 965 intptr_t getPreAlignment() const { |
| 987 assert(isInBundleLockRegion()); | 966 assert(isInBundleLockRegion()); |
| 988 return SizeSnapshotPre & BundleMaskLo; | 967 return SizeSnapshotPre & BundleMaskLo; |
| 989 } | 968 } |
| 990 // Get the bundle alignment of the first instruction past the | 969 // Get the bundle alignment of the first instruction past the bundle_lock |
| 991 // bundle_lock region. | 970 // region. |
| 992 intptr_t getPostAlignment() const { | 971 intptr_t getPostAlignment() const { |
| 993 assert(isInBundleLockRegion()); | 972 assert(isInBundleLockRegion()); |
| 994 return SizeSnapshotPost & BundleMaskLo; | 973 return SizeSnapshotPost & BundleMaskLo; |
| 995 } | 974 } |
| 996 // Get the iterator pointing to the bundle_lock instruction, e.g. to | 975 // Get the iterator pointing to the bundle_lock instruction, e.g. to roll |
| 997 // roll back the instruction iteration to that point. | 976 // back the instruction iteration to that point. |
| 998 InstList::const_iterator getBundleLockStart() const { | 977 InstList::const_iterator getBundleLockStart() const { |
| 999 assert(isInBundleLockRegion()); | 978 assert(isInBundleLockRegion()); |
| 1000 return BundleLockStart; | 979 return BundleLockStart; |
| 1001 } | 980 } |
| 1002 // Set up bookkeeping when the bundle_lock instruction is first | 981 // Set up bookkeeping when the bundle_lock instruction is first processed. |
| 1003 // processed. | |
| 1004 void enterBundleLock(InstList::const_iterator I) { | 982 void enterBundleLock(InstList::const_iterator I) { |
| 1005 assert(!isInBundleLockRegion()); | 983 assert(!isInBundleLockRegion()); |
| 1006 BundleLockStart = I; | 984 BundleLockStart = I; |
| 1007 SizeSnapshotPre = Asm->getBufferSize(); | 985 SizeSnapshotPre = Asm->getBufferSize(); |
| 1008 Asm->setPreliminary(true); | 986 Asm->setPreliminary(true); |
| 1009 Target->snapshotEmitState(); | 987 Target->snapshotEmitState(); |
| 1010 assert(isInBundleLockRegion()); | 988 assert(isInBundleLockRegion()); |
| 1011 } | 989 } |
| 1012 // Update bookkeeping when the bundle_unlock instruction is | 990 // Update bookkeeping when the bundle_unlock instruction is processed. |
| 1013 // processed. | |
| 1014 void enterBundleUnlock() { | 991 void enterBundleUnlock() { |
| 1015 assert(isInBundleLockRegion()); | 992 assert(isInBundleLockRegion()); |
| 1016 SizeSnapshotPost = Asm->getBufferSize(); | 993 SizeSnapshotPost = Asm->getBufferSize(); |
| 1017 } | 994 } |
| 1018 // Update bookkeeping when we are completely finished with the | 995 // Update bookkeeping when we are completely finished with the bundle_lock |
| 1019 // bundle_lock region. | 996 // region. |
| 1020 void leaveBundleLockRegion() { BundleLockStart = End; } | 997 void leaveBundleLockRegion() { BundleLockStart = End; } |
| 1021 // Check whether the instruction sequence fits within the current | 998 // Check whether the instruction sequence fits within the current bundle, and |
| 1022 // bundle, and if not, add nop padding to the end of the current | 999 // if not, add nop padding to the end of the current bundle. |
| 1023 // bundle. | |
| 1024 void padToNextBundle() { | 1000 void padToNextBundle() { |
| 1025 assert(isInBundleLockRegion()); | 1001 assert(isInBundleLockRegion()); |
| 1026 if (!isSameBundle()) { | 1002 if (!isSameBundle()) { |
| 1027 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); | 1003 intptr_t PadToNextBundle = BundleSize - getPreAlignment(); |
| 1028 Asm->padWithNop(PadToNextBundle); | 1004 Asm->padWithNop(PadToNextBundle); |
| 1029 SizeSnapshotPre += PadToNextBundle; | 1005 SizeSnapshotPre += PadToNextBundle; |
| 1030 SizeSnapshotPost += PadToNextBundle; | 1006 SizeSnapshotPost += PadToNextBundle; |
| 1031 assert((Asm->getBufferSize() & BundleMaskLo) == 0); | 1007 assert((Asm->getBufferSize() & BundleMaskLo) == 0); |
| 1032 assert(Asm->getBufferSize() == SizeSnapshotPre); | 1008 assert(Asm->getBufferSize() == SizeSnapshotPre); |
| 1033 } | 1009 } |
| 1034 } | 1010 } |
| 1035 // If align_to_end is specified, add padding such that the | 1011 // If align_to_end is specified, add padding such that the instruction |
| 1036 // instruction sequences ends precisely at a bundle boundary. | 1012 // sequences ends precisely at a bundle boundary. |
| 1037 void padForAlignToEnd() { | 1013 void padForAlignToEnd() { |
| 1038 assert(isInBundleLockRegion()); | 1014 assert(isInBundleLockRegion()); |
| 1039 if (isAlignToEnd()) { | 1015 if (isAlignToEnd()) { |
| 1040 if (intptr_t Offset = getPostAlignment()) { | 1016 if (intptr_t Offset = getPostAlignment()) { |
| 1041 Asm->padWithNop(BundleSize - Offset); | 1017 Asm->padWithNop(BundleSize - Offset); |
| 1042 SizeSnapshotPre = Asm->getBufferSize(); | 1018 SizeSnapshotPre = Asm->getBufferSize(); |
| 1043 } | 1019 } |
| 1044 } | 1020 } |
| 1045 } | 1021 } |
| 1046 // Update bookkeeping when rolling back for the second pass. | 1022 // Update bookkeeping when rolling back for the second pass. |
| 1047 void rollback() { | 1023 void rollback() { |
| 1048 assert(isInBundleLockRegion()); | 1024 assert(isInBundleLockRegion()); |
| 1049 Asm->setBufferSize(SizeSnapshotPre); | 1025 Asm->setBufferSize(SizeSnapshotPre); |
| 1050 Asm->setPreliminary(false); | 1026 Asm->setPreliminary(false); |
| 1051 Target->rollbackEmitState(); | 1027 Target->rollbackEmitState(); |
| 1052 } | 1028 } |
| 1053 | 1029 |
| 1054 private: | 1030 private: |
| 1055 Assembler *const Asm; | 1031 Assembler *const Asm; |
| 1056 TargetLowering *const Target; | 1032 TargetLowering *const Target; |
| 1057 // End is a sentinel value such that BundleLockStart==End implies | 1033 // End is a sentinel value such that BundleLockStart==End implies that we are |
| 1058 // that we are not in a bundle_lock region. | 1034 // not in a bundle_lock region. |
| 1059 const InstList::const_iterator End; | 1035 const InstList::const_iterator End; |
| 1060 InstList::const_iterator BundleLockStart; | 1036 InstList::const_iterator BundleLockStart; |
| 1061 const intptr_t BundleSize; | 1037 const intptr_t BundleSize; |
| 1062 // Masking with BundleMaskLo identifies an address's bundle offset. | 1038 // Masking with BundleMaskLo identifies an address's bundle offset. |
| 1063 const intptr_t BundleMaskLo; | 1039 const intptr_t BundleMaskLo; |
| 1064 // Masking with BundleMaskHi identifies an address's bundle. | 1040 // Masking with BundleMaskHi identifies an address's bundle. |
| 1065 const intptr_t BundleMaskHi; | 1041 const intptr_t BundleMaskHi; |
| 1066 intptr_t SizeSnapshotPre = 0; | 1042 intptr_t SizeSnapshotPre = 0; |
| 1067 intptr_t SizeSnapshotPost = 0; | 1043 intptr_t SizeSnapshotPost = 0; |
| 1068 }; | 1044 }; |
| 1069 | 1045 |
| 1070 } // end of anonymous namespace | 1046 } // end of anonymous namespace |
| 1071 | 1047 |
| 1072 void CfgNode::emitIAS(Cfg *Func) const { | 1048 void CfgNode::emitIAS(Cfg *Func) const { |
| 1073 Func->setCurrentNode(this); | 1049 Func->setCurrentNode(this); |
| 1074 Assembler *Asm = Func->getAssembler<>(); | 1050 Assembler *Asm = Func->getAssembler<>(); |
| 1075 // TODO(stichnot): When sandboxing, defer binding the node label | 1051 // TODO(stichnot): When sandboxing, defer binding the node label until just |
| 1076 // until just before the first instruction is emitted, to reduce the | 1052 // before the first instruction is emitted, to reduce the chance that a |
| 1077 // chance that a padding nop is a branch target. | 1053 // padding nop is a branch target. |
| 1078 Asm->bindCfgNodeLabel(getIndex()); | 1054 Asm->bindCfgNodeLabel(getIndex()); |
| 1079 for (const Inst &I : Phis) { | 1055 for (const Inst &I : Phis) { |
| 1080 if (I.isDeleted()) | 1056 if (I.isDeleted()) |
| 1081 continue; | 1057 continue; |
| 1082 // Emitting a Phi instruction should cause an error. | 1058 // Emitting a Phi instruction should cause an error. |
| 1083 I.emitIAS(Func); | 1059 I.emitIAS(Func); |
| 1084 } | 1060 } |
| 1085 | 1061 |
| 1086 // Do the simple emission if not sandboxed. | 1062 // Do the simple emission if not sandboxed. |
| 1087 if (!Func->getContext()->getFlags().getUseSandboxing()) { | 1063 if (!Func->getContext()->getFlags().getUseSandboxing()) { |
| 1088 for (const Inst &I : Insts) { | 1064 for (const Inst &I : Insts) { |
| 1089 if (!I.isDeleted() && !I.isRedundantAssign()) { | 1065 if (!I.isDeleted() && !I.isRedundantAssign()) { |
| 1090 I.emitIAS(Func); | 1066 I.emitIAS(Func); |
| 1091 updateStats(Func, &I); | 1067 updateStats(Func, &I); |
| 1092 } | 1068 } |
| 1093 } | 1069 } |
| 1094 return; | 1070 return; |
| 1095 } | 1071 } |
| 1096 | 1072 |
| 1097 // The remainder of the function handles emission with sandboxing. | 1073 // The remainder of the function handles emission with sandboxing. There are |
| 1098 // There are explicit bundle_lock regions delimited by bundle_lock | 1074 // explicit bundle_lock regions delimited by bundle_lock and bundle_unlock |
| 1099 // and bundle_unlock instructions. All other instructions are | 1075 // instructions. All other instructions are treated as an implicit |
| 1100 // treated as an implicit one-instruction bundle_lock region. | 1076 // one-instruction bundle_lock region. Emission is done twice for each |
| 1101 // Emission is done twice for each bundle_lock region. The first | 1077 // bundle_lock region. The first pass is a preliminary pass, after which we |
| 1102 // pass is a preliminary pass, after which we can figure out what | 1078 // can figure out what nop padding is needed, then roll back, and make the |
| 1103 // nop padding is needed, then roll back, and make the final pass. | 1079 // final pass. |
| 1104 // | 1080 // |
| 1105 // Ideally, the first pass would be speculative and the second pass | 1081 // Ideally, the first pass would be speculative and the second pass would |
| 1106 // would only be done if nop padding were needed, but the structure | 1082 // only be done if nop padding were needed, but the structure of the |
| 1107 // of the integrated assembler makes it hard to roll back the state | 1083 // integrated assembler makes it hard to roll back the state of label |
| 1108 // of label bindings, label links, and relocation fixups. Instead, | 1084 // bindings, label links, and relocation fixups. Instead, the first pass just |
| 1109 // the first pass just disables all mutation of that state. | 1085 // disables all mutation of that state. |
| 1110 | 1086 |
| 1111 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); | 1087 BundleEmitHelper Helper(Asm, Func->getTarget(), Insts); |
| 1112 InstList::const_iterator End = Insts.end(); | 1088 InstList::const_iterator End = Insts.end(); |
| 1113 // Retrying indicates that we had to roll back to the bundle_lock | 1089 // Retrying indicates that we had to roll back to the bundle_lock instruction |
| 1114 // instruction to apply padding before the bundle_lock sequence. | 1090 // to apply padding before the bundle_lock sequence. |
| 1115 bool Retrying = false; | 1091 bool Retrying = false; |
| 1116 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { | 1092 for (InstList::const_iterator I = Insts.begin(); I != End; ++I) { |
| 1117 if (I->isDeleted() || I->isRedundantAssign()) | 1093 if (I->isDeleted() || I->isRedundantAssign()) |
| 1118 continue; | 1094 continue; |
| 1119 | 1095 |
| 1120 if (llvm::isa<InstBundleLock>(I)) { | 1096 if (llvm::isa<InstBundleLock>(I)) { |
| 1121 // Set up the initial bundle_lock state. This should not happen | 1097 // Set up the initial bundle_lock state. This should not happen while |
| 1122 // while retrying, because the retry rolls back to the | 1098 // retrying, because the retry rolls back to the instruction following |
| 1123 // instruction following the bundle_lock instruction. | 1099 // the bundle_lock instruction. |
| 1124 assert(!Retrying); | 1100 assert(!Retrying); |
| 1125 Helper.enterBundleLock(I); | 1101 Helper.enterBundleLock(I); |
| 1126 continue; | 1102 continue; |
| 1127 } | 1103 } |
| 1128 | 1104 |
| 1129 if (llvm::isa<InstBundleUnlock>(I)) { | 1105 if (llvm::isa<InstBundleUnlock>(I)) { |
| 1130 Helper.enterBundleUnlock(); | 1106 Helper.enterBundleUnlock(); |
| 1131 if (Retrying) { | 1107 if (Retrying) { |
| 1132 // Make sure all instructions are in the same bundle. | 1108 // Make sure all instructions are in the same bundle. |
| 1133 assert(Helper.isSameBundle()); | 1109 assert(Helper.isSameBundle()); |
| 1134 // If align_to_end is specified, make sure the next | 1110 // If align_to_end is specified, make sure the next instruction begins |
| 1135 // instruction begins the bundle. | 1111 // the bundle. |
| 1136 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); | 1112 assert(!Helper.isAlignToEnd() || Helper.getPostAlignment() == 0); |
| 1137 Helper.leaveBundleLockRegion(); | 1113 Helper.leaveBundleLockRegion(); |
| 1138 Retrying = false; | 1114 Retrying = false; |
| 1139 } else { | 1115 } else { |
| 1140 // This is the first pass, so roll back for the retry pass. | 1116 // This is the first pass, so roll back for the retry pass. |
| 1141 Helper.rollback(); | 1117 Helper.rollback(); |
| 1142 // Pad to the next bundle if the instruction sequence crossed | 1118 // Pad to the next bundle if the instruction sequence crossed a bundle |
| 1143 // a bundle boundary. | 1119 // boundary. |
| 1144 Helper.padToNextBundle(); | 1120 Helper.padToNextBundle(); |
| 1145 // Insert additional padding to make AlignToEnd work. | 1121 // Insert additional padding to make AlignToEnd work. |
| 1146 Helper.padForAlignToEnd(); | 1122 Helper.padForAlignToEnd(); |
| 1147 // Prepare for the retry pass after padding is done. | 1123 // Prepare for the retry pass after padding is done. |
| 1148 Retrying = true; | 1124 Retrying = true; |
| 1149 I = Helper.getBundleLockStart(); | 1125 I = Helper.getBundleLockStart(); |
| 1150 } | 1126 } |
| 1151 continue; | 1127 continue; |
| 1152 } | 1128 } |
| 1153 | 1129 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 1164 I->emitIAS(Func); | 1140 I->emitIAS(Func); |
| 1165 Helper.enterBundleUnlock(); | 1141 Helper.enterBundleUnlock(); |
| 1166 Helper.rollback(); | 1142 Helper.rollback(); |
| 1167 Helper.padToNextBundle(); | 1143 Helper.padToNextBundle(); |
| 1168 I->emitIAS(Func); | 1144 I->emitIAS(Func); |
| 1169 updateStats(Func, I); | 1145 updateStats(Func, I); |
| 1170 Helper.leaveBundleLockRegion(); | 1146 Helper.leaveBundleLockRegion(); |
| 1171 } | 1147 } |
| 1172 } | 1148 } |
| 1173 | 1149 |
| 1174 // Don't allow bundle locking across basic blocks, to keep the | 1150 // Don't allow bundle locking across basic blocks, to keep the backtracking |
| 1175 // backtracking mechanism simple. | 1151 // mechanism simple. |
| 1176 assert(!Helper.isInBundleLockRegion()); | 1152 assert(!Helper.isInBundleLockRegion()); |
| 1177 assert(!Retrying); | 1153 assert(!Retrying); |
| 1178 } | 1154 } |
| 1179 | 1155 |
| 1180 void CfgNode::dump(Cfg *Func) const { | 1156 void CfgNode::dump(Cfg *Func) const { |
| 1181 if (!BuildDefs::dump()) | 1157 if (!BuildDefs::dump()) |
| 1182 return; | 1158 return; |
| 1183 Func->setCurrentNode(this); | 1159 Func->setCurrentNode(this); |
| 1184 Ostream &Str = Func->getContext()->getStrDump(); | 1160 Ostream &Str = Func->getContext()->getStrDump(); |
| 1185 Liveness *Liveness = Func->getLiveness(); | 1161 Liveness *Liveness = Func->getLiveness(); |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1283 InstIntrinsicCall *Inst = InstIntrinsicCall::create( | 1259 InstIntrinsicCall *Inst = InstIntrinsicCall::create( |
| 1284 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); | 1260 Func, 5, Func->makeVariable(IceType_i64), RMWI64Name, Info->Info); |
| 1285 Inst->addArg(AtomicRMWOp); | 1261 Inst->addArg(AtomicRMWOp); |
| 1286 Inst->addArg(Counter); | 1262 Inst->addArg(Counter); |
| 1287 Inst->addArg(One); | 1263 Inst->addArg(One); |
| 1288 Inst->addArg(OrderAcquireRelease); | 1264 Inst->addArg(OrderAcquireRelease); |
| 1289 Insts.push_front(Inst); | 1265 Insts.push_front(Inst); |
| 1290 } | 1266 } |
| 1291 | 1267 |
| 1292 } // end of namespace Ice | 1268 } // end of namespace Ice |
| OLD | NEW |