OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
203 TypeToRegisterSet[IceType_void] = InvalidRegisters; | 203 TypeToRegisterSet[IceType_void] = InvalidRegisters; |
204 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; | 204 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; |
205 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; | 205 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; |
206 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | 206 TypeToRegisterSet[IceType_i16] = IntegerRegisters; |
207 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | 207 TypeToRegisterSet[IceType_i32] = IntegerRegisters; |
208 TypeToRegisterSet[IceType_i64] = IntegerRegisters; | 208 TypeToRegisterSet[IceType_i64] = IntegerRegisters; |
209 TypeToRegisterSet[IceType_f32] = FloatRegisters; | 209 TypeToRegisterSet[IceType_f32] = FloatRegisters; |
210 TypeToRegisterSet[IceType_f64] = FloatRegisters; | 210 TypeToRegisterSet[IceType_f64] = FloatRegisters; |
211 } | 211 } |
212 | 212 |
| 213 void TargetX8632::translateO2() { |
| 214 GlobalContext *Context = Func->getContext(); |
| 215 |
| 216 // Lower Phi instructions. |
| 217 Timer T_placePhiLoads; |
| 218 Func->placePhiLoads(); |
| 219 if (Func->hasError()) |
| 220 return; |
| 221 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()"); |
| 222 Timer T_placePhiStores; |
| 223 Func->placePhiStores(); |
| 224 if (Func->hasError()) |
| 225 return; |
| 226 T_placePhiStores.printElapsedUs(Context, "placePhiStores()"); |
| 227 Timer T_deletePhis; |
| 228 Func->deletePhis(); |
| 229 if (Func->hasError()) |
| 230 return; |
| 231 T_deletePhis.printElapsedUs(Context, "deletePhis()"); |
| 232 Func->dump("After Phi lowering"); |
| 233 |
| 234 // Address mode optimization. |
| 235 Timer T_doAddressOpt; |
| 236 Func->doAddressOpt(); |
| 237 T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()"); |
| 238 |
| 239 // Target lowering. This requires liveness analysis for some parts |
| 240 // of the lowering decisions, such as compare/branch fusing. If |
| 241 // non-lightweight liveness analysis is used, the instructions need |
| 242 // to be renumbered first. TODO: This renumbering should only be |
| 243 // necessary if we're actually calculating live intervals, which we |
| 244 // only do for register allocation. |
| 245 Timer T_renumber1; |
| 246 Func->renumberInstructions(); |
| 247 if (Func->hasError()) |
| 248 return; |
| 249 T_renumber1.printElapsedUs(Context, "renumberInstructions()"); |
| 250 // TODO: It should be sufficient to use the fastest liveness |
| 251 // calculation, i.e. livenessLightweight(). However, for some |
| 252 // reason that slows down the rest of the translation. Investigate. |
| 253 Timer T_liveness1; |
| 254 Func->liveness(Liveness_Basic); |
| 255 if (Func->hasError()) |
| 256 return; |
| 257 T_liveness1.printElapsedUs(Context, "liveness()"); |
| 258 Func->dump("After x86 address mode opt"); |
| 259 Timer T_genCode; |
| 260 Func->genCode(); |
| 261 if (Func->hasError()) |
| 262 return; |
| 263 T_genCode.printElapsedUs(Context, "genCode()"); |
| 264 |
| 265 // Register allocation. This requires instruction renumbering and |
| 266 // full liveness analysis. |
| 267 Timer T_renumber2; |
| 268 Func->renumberInstructions(); |
| 269 if (Func->hasError()) |
| 270 return; |
| 271 T_renumber2.printElapsedUs(Context, "renumberInstructions()"); |
| 272 Timer T_liveness2; |
| 273 Func->liveness(Liveness_Intervals); |
| 274 if (Func->hasError()) |
| 275 return; |
| 276 T_liveness2.printElapsedUs(Context, "liveness()"); |
| 277 // Validate the live range computations. Do it outside the timing |
| 278 // code. TODO: Put this under a flag. |
| 279 bool ValidLiveness = Func->validateLiveness(); |
| 280 assert(ValidLiveness); |
| 281 (void)ValidLiveness; // used only in assert() |
| 282 ComputedLiveRanges = true; |
| 283 // The post-codegen dump is done here, after liveness analysis and |
| 284 // associated cleanup, to make the dump cleaner and more useful. |
| 285 Func->dump("After initial x8632 codegen"); |
| 286 Timer T_regAlloc; |
| 287 regAlloc(); |
| 288 if (Func->hasError()) |
| 289 return; |
| 290 T_regAlloc.printElapsedUs(Context, "regAlloc()"); |
| 291 Func->dump("After linear scan regalloc"); |
| 292 |
| 293 // Stack frame mapping. |
| 294 Timer T_genFrame; |
| 295 Func->genFrame(); |
| 296 if (Func->hasError()) |
| 297 return; |
| 298 T_genFrame.printElapsedUs(Context, "genFrame()"); |
| 299 Func->dump("After stack frame mapping"); |
| 300 } |
| 301 |
213 void TargetX8632::translateOm1() { | 302 void TargetX8632::translateOm1() { |
214 GlobalContext *Context = Func->getContext(); | 303 GlobalContext *Context = Func->getContext(); |
215 Ostream &Str = Context->getStrDump(); | |
216 Timer T_placePhiLoads; | 304 Timer T_placePhiLoads; |
217 Func->placePhiLoads(); | 305 Func->placePhiLoads(); |
218 if (Func->hasError()) | 306 if (Func->hasError()) |
219 return; | 307 return; |
220 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()"); | 308 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()"); |
221 Timer T_placePhiStores; | 309 Timer T_placePhiStores; |
222 Func->placePhiStores(); | 310 Func->placePhiStores(); |
223 if (Func->hasError()) | 311 if (Func->hasError()) |
224 return; | 312 return; |
225 T_placePhiStores.printElapsedUs(Context, "placePhiStores()"); | 313 T_placePhiStores.printElapsedUs(Context, "placePhiStores()"); |
226 Timer T_deletePhis; | 314 Timer T_deletePhis; |
227 Func->deletePhis(); | 315 Func->deletePhis(); |
228 if (Func->hasError()) | 316 if (Func->hasError()) |
229 return; | 317 return; |
230 T_deletePhis.printElapsedUs(Context, "deletePhis()"); | 318 T_deletePhis.printElapsedUs(Context, "deletePhis()"); |
231 if (Context->isVerbose()) { | 319 Func->dump("After Phi lowering"); |
232 Str << "================ After Phi lowering ================\n"; | |
233 Func->dump(); | |
234 } | |
235 | 320 |
236 Timer T_genCode; | 321 Timer T_genCode; |
237 Func->genCode(); | 322 Func->genCode(); |
238 if (Func->hasError()) | 323 if (Func->hasError()) |
239 return; | 324 return; |
240 T_genCode.printElapsedUs(Context, "genCode()"); | 325 T_genCode.printElapsedUs(Context, "genCode()"); |
241 if (Context->isVerbose()) { | 326 Func->dump("After initial x8632 codegen"); |
242 Str << "================ After initial x8632 codegen ================\n"; | |
243 Func->dump(); | |
244 } | |
245 | 327 |
246 Timer T_genFrame; | 328 Timer T_genFrame; |
247 Func->genFrame(); | 329 Func->genFrame(); |
248 if (Func->hasError()) | 330 if (Func->hasError()) |
249 return; | 331 return; |
250 T_genFrame.printElapsedUs(Context, "genFrame()"); | 332 T_genFrame.printElapsedUs(Context, "genFrame()"); |
251 if (Context->isVerbose()) { | 333 Func->dump("After stack frame mapping"); |
252 Str << "================ After stack frame mapping ================\n"; | |
253 Func->dump(); | |
254 } | |
255 } | 334 } |
256 | 335 |
257 IceString TargetX8632::RegNames[] = { | 336 IceString TargetX8632::RegNames[] = { |
258 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | 337 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ |
259 frameptr, isI8, isInt, isFP) \ | 338 frameptr, isI8, isInt, isFP) \ |
260 name, | 339 name, |
261 REGX8632_TABLE | 340 REGX8632_TABLE |
262 #undef X | 341 #undef X |
263 }; | 342 }; |
264 | 343 |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
391 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | 470 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
392 I != E; ++I) { | 471 I != E; ++I) { |
393 Variable *Var = *I; | 472 Variable *Var = *I; |
394 if (Var->hasReg()) { | 473 if (Var->hasReg()) { |
395 RegsUsed[Var->getRegNum()] = true; | 474 RegsUsed[Var->getRegNum()] = true; |
396 continue; | 475 continue; |
397 } | 476 } |
398 // An argument passed on the stack already has a stack slot. | 477 // An argument passed on the stack already has a stack slot. |
399 if (Var->getIsArg()) | 478 if (Var->getIsArg()) |
400 continue; | 479 continue; |
| 480 // An unreferenced variable doesn't need a stack slot. |
| 481 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
| 482 continue; |
401 // A spill slot linked to a variable with a stack slot should reuse | 483 // A spill slot linked to a variable with a stack slot should reuse |
402 // that stack slot. | 484 // that stack slot. |
403 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | 485 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
404 if (Variable *Linked = Var->getPreferredRegister()) { | 486 if (Variable *Linked = Var->getPreferredRegister()) { |
405 if (!Linked->hasReg()) | 487 if (!Linked->hasReg()) |
406 continue; | 488 continue; |
407 } | 489 } |
408 } | 490 } |
409 int32_t Increment = typeWidthInBytesOnStack(Var->getType()); | 491 int32_t Increment = typeWidthInBytesOnStack(Var->getType()); |
410 if (SimpleCoalescing) { | 492 if (SimpleCoalescing) { |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
476 int32_t NextStackOffset = 0; | 558 int32_t NextStackOffset = 0; |
477 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | 559 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
478 I != E; ++I) { | 560 I != E; ++I) { |
479 Variable *Var = *I; | 561 Variable *Var = *I; |
480 if (Var->hasReg()) { | 562 if (Var->hasReg()) { |
481 RegsUsed[Var->getRegNum()] = true; | 563 RegsUsed[Var->getRegNum()] = true; |
482 continue; | 564 continue; |
483 } | 565 } |
484 if (Var->getIsArg()) | 566 if (Var->getIsArg()) |
485 continue; | 567 continue; |
| 568 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
| 569 continue; |
486 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | 570 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
487 if (Variable *Linked = Var->getPreferredRegister()) { | 571 if (Variable *Linked = Var->getPreferredRegister()) { |
488 if (!Linked->hasReg()) { | 572 if (!Linked->hasReg()) { |
489 // TODO: Make sure Linked has already been assigned a stack | 573 // TODO: Make sure Linked has already been assigned a stack |
490 // slot. | 574 // slot. |
491 Var->setStackOffset(Linked->getStackOffset()); | 575 Var->setStackOffset(Linked->getStackOffset()); |
492 continue; | 576 continue; |
493 } | 577 } |
494 } | 578 } |
495 } | 579 } |
(...skipping 1098 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1594 _mov(Dest, NonDefault); | 1678 _mov(Dest, NonDefault); |
1595 Context.insert(Label); | 1679 Context.insert(Label); |
1596 } | 1680 } |
1597 } | 1681 } |
1598 | 1682 |
1599 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 1683 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
1600 Operand *Src0 = legalize(Inst->getSrc(0)); | 1684 Operand *Src0 = legalize(Inst->getSrc(0)); |
1601 Operand *Src1 = legalize(Inst->getSrc(1)); | 1685 Operand *Src1 = legalize(Inst->getSrc(1)); |
1602 Variable *Dest = Inst->getDest(); | 1686 Variable *Dest = Inst->getDest(); |
1603 | 1687 |
| 1688 // If Src1 is an immediate, or known to be a physical register, we can |
| 1689 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| 1690 // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| 1691 // the physical register, but unfortunately we have to commit to one or |
| 1692 // the other before register allocation.) |
| 1693 bool IsSrc1ImmOrReg = false; |
| 1694 if (llvm::isa<Constant>(Src1)) { |
| 1695 IsSrc1ImmOrReg = true; |
| 1696 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| 1697 if (Var->hasReg()) |
| 1698 IsSrc1ImmOrReg = true; |
| 1699 } |
| 1700 |
| 1701 // Try to fuse a compare immediately followed by a conditional branch. This |
| 1702 // is possible when the compare dest and the branch source operands are the |
| 1703 // same, and are their only uses. TODO: implement this optimization for i64. |
| 1704 if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
| 1705 if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
| 1706 Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
| 1707 Operand *Src0New = |
| 1708 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| 1709 _cmp(Src0New, Src1); |
| 1710 _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
| 1711 NextBr->getTargetFalse()); |
| 1712 // Skip over the following branch instruction. |
| 1713 NextBr->setDeleted(); |
| 1714 Context.advanceNext(); |
| 1715 return; |
| 1716 } |
| 1717 } |
| 1718 |
1604 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | 1719 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
1605 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | 1720 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); |
1606 Constant *One = Ctx->getConstantInt(IceType_i32, 1); | 1721 Constant *One = Ctx->getConstantInt(IceType_i32, 1); |
1607 if (Src0->getType() == IceType_i64) { | 1722 if (Src0->getType() == IceType_i64) { |
1608 InstIcmp::ICond Condition = Inst->getCondition(); | 1723 InstIcmp::ICond Condition = Inst->getCondition(); |
1609 size_t Index = static_cast<size_t>(Condition); | 1724 size_t Index = static_cast<size_t>(Condition); |
1610 assert(Index < TableIcmp64Size); | 1725 assert(Index < TableIcmp64Size); |
1611 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 1726 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
1612 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 1727 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
1613 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | 1728 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
(...skipping 16 matching lines...) Expand all Loading... |
1630 _cmp(loOperand(Src0), Src1LoRI); | 1745 _cmp(loOperand(Src0), Src1LoRI); |
1631 _br(TableIcmp64[Index].C3, LabelTrue); | 1746 _br(TableIcmp64[Index].C3, LabelTrue); |
1632 Context.insert(LabelFalse); | 1747 Context.insert(LabelFalse); |
1633 Context.insert(InstFakeUse::create(Func, Dest)); | 1748 Context.insert(InstFakeUse::create(Func, Dest)); |
1634 _mov(Dest, Zero); | 1749 _mov(Dest, Zero); |
1635 Context.insert(LabelTrue); | 1750 Context.insert(LabelTrue); |
1636 } | 1751 } |
1637 return; | 1752 return; |
1638 } | 1753 } |
1639 | 1754 |
1640 // If Src1 is an immediate, or known to be a physical register, we can | |
1641 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | |
1642 // a physical register. (Actually, either Src0 or Src1 can be chosen for | |
1643 // the physical register, but unfortunately we have to commit to one or | |
1644 // the other before register allocation.) | |
1645 bool IsSrc1ImmOrReg = false; | |
1646 if (llvm::isa<Constant>(Src1)) { | |
1647 IsSrc1ImmOrReg = true; | |
1648 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | |
1649 if (Var->hasReg()) | |
1650 IsSrc1ImmOrReg = true; | |
1651 } | |
1652 | |
1653 // cmp b, c | 1755 // cmp b, c |
1654 Operand *Src0New = | 1756 Operand *Src0New = |
1655 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | 1757 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
1656 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1758 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1657 _cmp(Src0New, Src1); | 1759 _cmp(Src0New, Src1); |
1658 _mov(Dest, One); | 1760 _mov(Dest, One); |
1659 _br(getIcmp32Mapping(Inst->getCondition()), Label); | 1761 _br(getIcmp32Mapping(Inst->getCondition()), Label); |
1660 Context.insert(InstFakeUse::create(Func, Dest)); | 1762 Context.insert(InstFakeUse::create(Func, Dest)); |
1661 _mov(Dest, Zero); | 1763 _mov(Dest, Zero); |
1662 Context.insert(Label); | 1764 Context.insert(Label); |
1663 } | 1765 } |
1664 | 1766 |
| 1767 namespace { |
| 1768 |
| 1769 bool isAdd(const Inst *Inst) { |
| 1770 if (const InstArithmetic *Arith = |
| 1771 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| 1772 return (Arith->getOp() == InstArithmetic::Add); |
| 1773 } |
| 1774 return false; |
| 1775 } |
| 1776 |
| 1777 void computeAddressOpt(Variable *&Base, Variable *&Index, int32_t &Shift, |
| 1778 int32_t &Offset) { |
| 1779 (void)Offset; // TODO: pattern-match for non-zero offsets. |
| 1780 if (Base == NULL) |
| 1781 return; |
| 1782 // If the Base has more than one use or is live across multiple |
| 1783 // blocks, then don't go further. Alternatively (?), never consider |
| 1784 // a transformation that would change a variable that is currently |
| 1785 // *not* live across basic block boundaries into one that *is*. |
| 1786 if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/) |
| 1787 return; |
| 1788 |
| 1789 while (true) { |
| 1790 // Base is Base=Var ==> |
| 1791 // set Base=Var |
| 1792 const Inst *BaseInst = Base->getDefinition(); |
| 1793 Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL; |
| 1794 Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0); |
| 1795 // TODO: Helper function for all instances of assignment |
| 1796 // transitivity. |
| 1797 if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 && |
| 1798 // TODO: ensure BaseVariable0 stays single-BB |
| 1799 true) { |
| 1800 Base = BaseVariable0; |
| 1801 continue; |
| 1802 } |
| 1803 |
| 1804 // Index is Index=Var ==> |
| 1805 // set Index=Var |
| 1806 |
| 1807 // Index==NULL && Base is Base=Var1+Var2 ==> |
| 1808 // set Base=Var1, Index=Var2, Shift=0 |
| 1809 Operand *BaseOperand1 = |
| 1810 BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL; |
| 1811 Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1); |
| 1812 if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 && |
| 1813 // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB |
| 1814 true) { |
| 1815 Base = BaseVariable0; |
| 1816 Index = BaseVariable1; |
| 1817 Shift = 0; // should already have been 0 |
| 1818 continue; |
| 1819 } |
| 1820 |
| 1821 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==> |
| 1822 // Index=Var, Shift+=log2(Const) |
| 1823 const Inst *IndexInst = Index ? Index->getDefinition() : NULL; |
| 1824 if (const InstArithmetic *ArithInst = |
| 1825 llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) { |
| 1826 Operand *IndexOperand0 = ArithInst->getSrc(0); |
| 1827 Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0); |
| 1828 Operand *IndexOperand1 = ArithInst->getSrc(1); |
| 1829 ConstantInteger *IndexConstant1 = |
| 1830 llvm::dyn_cast<ConstantInteger>(IndexOperand1); |
| 1831 if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 && |
| 1832 IndexOperand1->getType() == IceType_i32 && IndexConstant1) { |
| 1833 uint64_t Mult = IndexConstant1->getValue(); |
| 1834 uint32_t LogMult; |
| 1835 switch (Mult) { |
| 1836 case 1: |
| 1837 LogMult = 0; |
| 1838 break; |
| 1839 case 2: |
| 1840 LogMult = 1; |
| 1841 break; |
| 1842 case 4: |
| 1843 LogMult = 2; |
| 1844 break; |
| 1845 case 8: |
| 1846 LogMult = 3; |
| 1847 break; |
| 1848 default: |
| 1849 LogMult = 4; |
| 1850 break; |
| 1851 } |
| 1852 if (Shift + LogMult <= 3) { |
| 1853 Index = IndexVariable0; |
| 1854 Shift += LogMult; |
| 1855 continue; |
| 1856 } |
| 1857 } |
| 1858 } |
| 1859 |
| 1860 // Index is Index=Var<<Const && Const+Shift<=3 ==> |
| 1861 // Index=Var, Shift+=Const |
| 1862 |
| 1863 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==> |
| 1864 // Index=Var, Shift+=log2(Const) |
| 1865 |
| 1866 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==> |
| 1867 // swap(Index,Base) |
| 1868 // Similar for Base=Const*Var and Base=Var<<Const |
| 1869 |
| 1870 // Base is Base=Var+Const ==> |
| 1871 // set Base=Var, Offset+=Const |
| 1872 |
| 1873 // Base is Base=Const+Var ==> |
| 1874 // set Base=Var, Offset+=Const |
| 1875 |
| 1876 // Base is Base=Var-Const ==> |
| 1877 // set Base=Var, Offset-=Const |
| 1878 |
| 1879 // Index is Index=Var+Const ==> |
| 1880 // set Index=Var, Offset+=(Const<<Shift) |
| 1881 |
| 1882 // Index is Index=Const+Var ==> |
| 1883 // set Index=Var, Offset+=(Const<<Shift) |
| 1884 |
| 1885 // Index is Index=Var-Const ==> |
| 1886 // set Index=Var, Offset-=(Const<<Shift) |
| 1887 |
| 1888 // TODO: consider overflow issues with respect to Offset. |
| 1889 // TODO: handle symbolic constants. |
| 1890 break; |
| 1891 } |
| 1892 } |
| 1893 |
| 1894 } // anonymous namespace |
| 1895 |
1665 void TargetX8632::lowerLoad(const InstLoad *Inst) { | 1896 void TargetX8632::lowerLoad(const InstLoad *Inst) { |
1666 // A Load instruction can be treated the same as an Assign | 1897 // A Load instruction can be treated the same as an Assign |
1667 // instruction, after the source operand is transformed into an | 1898 // instruction, after the source operand is transformed into an |
1668 // OperandX8632Mem operand. Note that the address mode | 1899 // OperandX8632Mem operand. Note that the address mode |
1669 // optimization already creates an OperandX8632Mem operand, so it | 1900 // optimization already creates an OperandX8632Mem operand, so it |
1670 // doesn't need another level of transformation. | 1901 // doesn't need another level of transformation. |
1671 Type Ty = Inst->getDest()->getType(); | 1902 Type Ty = Inst->getDest()->getType(); |
1672 Operand *Src0 = Inst->getSourceAddress(); | 1903 Operand *Src0 = Inst->getSourceAddress(); |
1673 // Address mode optimization already creates an OperandX8632Mem | 1904 // Address mode optimization already creates an OperandX8632Mem |
1674 // operand, so it doesn't need another level of transformation. | 1905 // operand, so it doesn't need another level of transformation. |
1675 if (!llvm::isa<OperandX8632Mem>(Src0)) { | 1906 if (!llvm::isa<OperandX8632Mem>(Src0)) { |
1676 Variable *Base = llvm::dyn_cast<Variable>(Src0); | 1907 Variable *Base = llvm::dyn_cast<Variable>(Src0); |
1677 Constant *Offset = llvm::dyn_cast<Constant>(Src0); | 1908 Constant *Offset = llvm::dyn_cast<Constant>(Src0); |
1678 assert(Base || Offset); | 1909 assert(Base || Offset); |
1679 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset); | 1910 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset); |
1680 } | 1911 } |
1681 | 1912 |
| 1913 // Fuse this load with a subsequent Arithmetic instruction in the |
| 1914 // following situations: |
| 1915 // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b |
| 1916 // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true |
| 1917 // |
| 1918 // TODO: Clean up and test thoroughly. |
| 1919 // |
| 1920 // TODO: Why limit to Arithmetic instructions? This could probably be |
| 1921 // applied to most any instruction type. Look at all source operands |
| 1922 // in the following instruction, and if there is one instance of the |
| 1923 // load instruction's dest variable, and that instruction ends that |
| 1924 // variable's live range, then make the substitution. Deal with |
| 1925 // commutativity optimization in the arithmetic instruction lowering. |
| 1926 InstArithmetic *NewArith = NULL; |
| 1927 if (InstArithmetic *Arith = |
| 1928 llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) { |
| 1929 Variable *DestLoad = Inst->getDest(); |
| 1930 Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0)); |
| 1931 Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1)); |
| 1932 if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) && |
| 1933 DestLoad != Src0Arith) { |
| 1934 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(), |
| 1935 Arith->getSrc(0), Src0); |
| 1936 } else if (Src0Arith == DestLoad && Arith->isCommutative() && |
| 1937 Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) { |
| 1938 NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(), |
| 1939 Arith->getSrc(1), Src0); |
| 1940 } |
| 1941 if (NewArith) { |
| 1942 Arith->setDeleted(); |
| 1943 Context.advanceNext(); |
| 1944 lowerArithmetic(NewArith); |
| 1945 return; |
| 1946 } |
| 1947 } |
| 1948 |
1682 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0); | 1949 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0); |
1683 lowerAssign(Assign); | 1950 lowerAssign(Assign); |
1684 } | 1951 } |
1685 | 1952 |
| 1953 void TargetX8632::doAddressOptLoad() { |
| 1954 Inst *Inst = *Context.getCur(); |
| 1955 Variable *Dest = Inst->getDest(); |
| 1956 Operand *Addr = Inst->getSrc(0); |
| 1957 Variable *Index = NULL; |
| 1958 int32_t Shift = 0; |
| 1959 int32_t Offset = 0; // TODO: make Constant |
| 1960 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 1961 computeAddressOpt(Base, Index, Shift, Offset); |
| 1962 if (Base && Addr != Base) { |
| 1963 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); |
| 1964 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
| 1965 Shift); |
| 1966 Inst->setDeleted(); |
| 1967 Context.insert(InstLoad::create(Func, Dest, Addr)); |
| 1968 } |
| 1969 } |
| 1970 |
1686 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { | 1971 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { |
1687 Func->setError("Phi found in regular instruction list"); | 1972 Func->setError("Phi found in regular instruction list"); |
1688 } | 1973 } |
1689 | 1974 |
1690 void TargetX8632::lowerRet(const InstRet *Inst) { | 1975 void TargetX8632::lowerRet(const InstRet *Inst) { |
1691 Variable *Reg = NULL; | 1976 Variable *Reg = NULL; |
1692 if (Inst->hasRetValue()) { | 1977 if (Inst->hasRetValue()) { |
1693 Operand *Src0 = legalize(Inst->getRetValue()); | 1978 Operand *Src0 = legalize(Inst->getRetValue()); |
1694 if (Src0->getType() == IceType_i64) { | 1979 if (Src0->getType() == IceType_i64) { |
1695 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax); | 1980 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax); |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1774 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 2059 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); |
1775 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 2060 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); |
1776 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 2061 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
1777 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 2062 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
1778 } else { | 2063 } else { |
1779 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 2064 Value = legalize(Value, Legal_Reg | Legal_Imm, true); |
1780 _store(Value, NewAddr); | 2065 _store(Value, NewAddr); |
1781 } | 2066 } |
1782 } | 2067 } |
1783 | 2068 |
| 2069 void TargetX8632::doAddressOptStore() { |
| 2070 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); |
| 2071 Operand *Data = Inst->getData(); |
| 2072 Operand *Addr = Inst->getAddr(); |
| 2073 Variable *Index = NULL; |
| 2074 int32_t Shift = 0; |
| 2075 int32_t Offset = 0; // TODO: make Constant |
| 2076 Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| 2077 computeAddressOpt(Base, Index, Shift, Offset); |
| 2078 if (Base && Addr != Base) { |
| 2079 Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); |
| 2080 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| 2081 Shift); |
| 2082 Inst->setDeleted(); |
| 2083 Context.insert(InstStore::create(Func, Data, Addr)); |
| 2084 } |
| 2085 } |
| 2086 |
1784 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | 2087 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
1785 // This implements the most naive possible lowering. | 2088 // This implements the most naive possible lowering. |
1786 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | 2089 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
1787 Operand *Src0 = Inst->getComparison(); | 2090 Operand *Src0 = Inst->getComparison(); |
1788 SizeT NumCases = Inst->getNumCases(); | 2091 SizeT NumCases = Inst->getNumCases(); |
1789 // OK, we'll be slightly less naive by forcing Src into a physical | 2092 // OK, we'll be slightly less naive by forcing Src into a physical |
1790 // register if there are 2 or more uses. | 2093 // register if there are 2 or more uses. |
1791 if (NumCases >= 2) | 2094 if (NumCases >= 2) |
1792 Src0 = legalizeToVar(Src0, true); | 2095 Src0 = legalizeToVar(Src0, true); |
1793 else | 2096 else |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1897 // there was some prior register allocation pass that made register | 2200 // there was some prior register allocation pass that made register |
1898 // assignments, those registers need to be black-listed here as | 2201 // assignments, those registers need to be black-listed here as |
1899 // well. | 2202 // well. |
1900 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | 2203 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; |
1901 ++I) { | 2204 ++I) { |
1902 const Inst *Inst = *I; | 2205 const Inst *Inst = *I; |
1903 if (Inst->isDeleted()) | 2206 if (Inst->isDeleted()) |
1904 continue; | 2207 continue; |
1905 if (llvm::isa<InstFakeKill>(Inst)) | 2208 if (llvm::isa<InstFakeKill>(Inst)) |
1906 continue; | 2209 continue; |
1907 SizeT VarIndex = 0; | |
1908 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | 2210 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { |
1909 Operand *Src = Inst->getSrc(SrcNum); | 2211 Operand *Src = Inst->getSrc(SrcNum); |
1910 SizeT NumVars = Src->getNumVars(); | 2212 SizeT NumVars = Src->getNumVars(); |
1911 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) { | 2213 for (SizeT J = 0; J < NumVars; ++J) { |
1912 const Variable *Var = Src->getVar(J); | 2214 const Variable *Var = Src->getVar(J); |
1913 if (!Var->hasReg()) | 2215 if (!Var->hasReg()) |
1914 continue; | 2216 continue; |
1915 WhiteList[Var->getRegNum()] = false; | 2217 WhiteList[Var->getRegNum()] = false; |
1916 } | 2218 } |
1917 } | 2219 } |
1918 } | 2220 } |
1919 // The second pass colors infinite-weight variables. | 2221 // The second pass colors infinite-weight variables. |
1920 llvm::SmallBitVector AvailableRegisters = WhiteList; | 2222 llvm::SmallBitVector AvailableRegisters = WhiteList; |
1921 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | 2223 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; |
1922 ++I) { | 2224 ++I) { |
1923 const Inst *Inst = *I; | 2225 const Inst *Inst = *I; |
1924 if (Inst->isDeleted()) | 2226 if (Inst->isDeleted()) |
1925 continue; | 2227 continue; |
1926 SizeT VarIndex = 0; | |
1927 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | 2228 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { |
1928 Operand *Src = Inst->getSrc(SrcNum); | 2229 Operand *Src = Inst->getSrc(SrcNum); |
1929 SizeT NumVars = Src->getNumVars(); | 2230 SizeT NumVars = Src->getNumVars(); |
1930 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) { | 2231 for (SizeT J = 0; J < NumVars; ++J) { |
1931 Variable *Var = Src->getVar(J); | 2232 Variable *Var = Src->getVar(J); |
1932 if (Var->hasReg()) | 2233 if (Var->hasReg()) |
1933 continue; | 2234 continue; |
1934 if (!Var->getWeight().isInf()) | 2235 if (!Var->getWeight().isInf()) |
1935 continue; | 2236 continue; |
1936 llvm::SmallBitVector AvailableTypedRegisters = | 2237 llvm::SmallBitVector AvailableTypedRegisters = |
1937 AvailableRegisters & getRegisterSetForType(Var->getType()); | 2238 AvailableRegisters & getRegisterSetForType(Var->getType()); |
1938 if (!AvailableTypedRegisters.any()) { | 2239 if (!AvailableTypedRegisters.any()) { |
1939 // This is a hack in case we run out of physical registers | 2240 // This is a hack in case we run out of physical registers |
1940 // due to an excessive number of "push" instructions from | 2241 // due to an excessive number of "push" instructions from |
1941 // lowering a call. | 2242 // lowering a call. |
1942 AvailableRegisters = WhiteList; | 2243 AvailableRegisters = WhiteList; |
1943 AvailableTypedRegisters = | 2244 AvailableTypedRegisters = |
1944 AvailableRegisters & getRegisterSetForType(Var->getType()); | 2245 AvailableRegisters & getRegisterSetForType(Var->getType()); |
1945 } | 2246 } |
1946 assert(AvailableTypedRegisters.any()); | 2247 assert(AvailableTypedRegisters.any()); |
1947 int32_t RegNum = AvailableTypedRegisters.find_first(); | 2248 int32_t RegNum = AvailableTypedRegisters.find_first(); |
1948 Var->setRegNum(RegNum); | 2249 Var->setRegNum(RegNum); |
1949 AvailableRegisters[RegNum] = false; | 2250 AvailableRegisters[RegNum] = false; |
1950 } | 2251 } |
1951 } | 2252 } |
1952 } | 2253 } |
1953 } | 2254 } |
1954 | 2255 |
1955 template <> void ConstantFloat::emit(const Cfg *Func) const { | 2256 template <> void ConstantFloat::emit(GlobalContext *Ctx) const { |
1956 Ostream &Str = Func->getContext()->getStrEmit(); | 2257 Ostream &Str = Ctx->getStrEmit(); |
1957 // It would be better to prefix with ".L$" instead of "L$", but | 2258 // It would be better to prefix with ".L$" instead of "L$", but |
1958 // llvm-mc doesn't parse "dword ptr [.L$foo]". | 2259 // llvm-mc doesn't parse "dword ptr [.L$foo]". |
1959 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; | 2260 Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; |
1960 } | 2261 } |
1961 | 2262 |
1962 template <> void ConstantDouble::emit(const Cfg *Func) const { | 2263 template <> void ConstantDouble::emit(GlobalContext *Ctx) const { |
1963 Ostream &Str = Func->getContext()->getStrEmit(); | 2264 Ostream &Str = Ctx->getStrEmit(); |
1964 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]"; | 2265 Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]"; |
1965 } | 2266 } |
1966 | 2267 |
1967 } // end of namespace Ice | 2268 } // end of namespace Ice |
OLD | NEW |