| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| 11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
| 12 // high-level instruction. It also implements | 12 // high-level instruction. |
| 13 // TargetX8632Fast::postLower() which does the simplest possible | |
| 14 // register allocation for the "fast" target. | |
| 15 // | 13 // |
| 16 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
| 17 | 15 |
| 18 #include "llvm/ADT/DenseMap.h" | 16 #include "llvm/ADT/DenseMap.h" |
| 19 #include "llvm/Support/CommandLine.h" | 17 #include "llvm/Support/CommandLine.h" |
| 20 #include "llvm/Support/MathExtras.h" | 18 #include "llvm/Support/MathExtras.h" |
| 21 | 19 |
| 22 #include "IceCfg.h" | 20 #include "IceCfg.h" |
| 23 #include "IceCfgNode.h" | 21 #include "IceCfgNode.h" |
| 24 #include "IceClFlags.h" | 22 #include "IceClFlags.h" |
| (...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 368 Func->liveness(Liveness_Intervals); | 366 Func->liveness(Liveness_Intervals); |
| 369 if (Func->hasError()) | 367 if (Func->hasError()) |
| 370 return; | 368 return; |
| 371 // Validate the live range computations. The expensive validation | 369 // Validate the live range computations. The expensive validation |
| 372 // call is deliberately only made when assertions are enabled. | 370 // call is deliberately only made when assertions are enabled. |
| 373 assert(Func->validateLiveness()); | 371 assert(Func->validateLiveness()); |
| 374 // The post-codegen dump is done here, after liveness analysis and | 372 // The post-codegen dump is done here, after liveness analysis and |
| 375 // associated cleanup, to make the dump cleaner and more useful. | 373 // associated cleanup, to make the dump cleaner and more useful. |
| 376 Func->dump("After initial x8632 codegen"); | 374 Func->dump("After initial x8632 codegen"); |
| 377 Func->getVMetadata()->init(VMK_All); | 375 Func->getVMetadata()->init(VMK_All); |
| 378 regAlloc(); | 376 regAlloc(RAK_Global); |
| 379 if (Func->hasError()) | 377 if (Func->hasError()) |
| 380 return; | 378 return; |
| 381 Func->dump("After linear scan regalloc"); | 379 Func->dump("After linear scan regalloc"); |
| 382 | 380 |
| 383 if (Ctx->getFlags().PhiEdgeSplit) { | 381 if (Ctx->getFlags().PhiEdgeSplit) { |
| 384 Func->advancedPhiLowering(); | 382 Func->advancedPhiLowering(); |
| 385 Func->dump("After advanced Phi lowering"); | 383 Func->dump("After advanced Phi lowering"); |
| 386 } | 384 } |
| 387 | 385 |
| 388 // Stack frame mapping. | 386 // Stack frame mapping. |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 422 return; | 420 return; |
| 423 Func->dump("After Phi lowering"); | 421 Func->dump("After Phi lowering"); |
| 424 | 422 |
| 425 Func->doArgLowering(); | 423 Func->doArgLowering(); |
| 426 | 424 |
| 427 Func->genCode(); | 425 Func->genCode(); |
| 428 if (Func->hasError()) | 426 if (Func->hasError()) |
| 429 return; | 427 return; |
| 430 Func->dump("After initial x8632 codegen"); | 428 Func->dump("After initial x8632 codegen"); |
| 431 | 429 |
| 430 regAlloc(RAK_InfOnly); |
| 431 if (Func->hasError()) |
| 432 return; |
| 433 Func->dump("After regalloc of infinite-weight variables"); |
| 434 |
| 432 Func->genFrame(); | 435 Func->genFrame(); |
| 433 if (Func->hasError()) | 436 if (Func->hasError()) |
| 434 return; | 437 return; |
| 435 Func->dump("After stack frame mapping"); | 438 Func->dump("After stack frame mapping"); |
| 436 | 439 |
| 437 // Nop insertion | 440 // Nop insertion |
| 438 if (shouldDoNopInsertion()) { | 441 if (shouldDoNopInsertion()) { |
| 439 Func->doNopInsertion(); | 442 Func->doNopInsertion(); |
| 440 } | 443 } |
| 441 } | 444 } |
| (...skipping 1367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1809 // eliminated after lowering, we would need to ensure that the | 1812 // eliminated after lowering, we would need to ensure that the |
| 1810 // pre-call and the post-call esp adjustment get eliminated as well. | 1813 // pre-call and the post-call esp adjustment get eliminated as well. |
| 1811 if (ParameterAreaSizeBytes) { | 1814 if (ParameterAreaSizeBytes) { |
| 1812 _adjust_stack(ParameterAreaSizeBytes); | 1815 _adjust_stack(ParameterAreaSizeBytes); |
| 1813 } | 1816 } |
| 1814 | 1817 |
| 1815 // Copy arguments that are passed on the stack to the appropriate | 1818 // Copy arguments that are passed on the stack to the appropriate |
| 1816 // stack locations. | 1819 // stack locations. |
| 1817 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 1820 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| 1818 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 1821 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| 1819 // TODO: Consider calling postLower() here to reduce the register | |
| 1820 // pressure associated with using too many infinite weight | |
| 1821 // temporaries when lowering the call sequence in -Om1 mode. | |
| 1822 } | 1822 } |
| 1823 | 1823 |
| 1824 // Copy arguments to be passed in registers to the appropriate | 1824 // Copy arguments to be passed in registers to the appropriate |
| 1825 // registers. | 1825 // registers. |
| 1826 // TODO: Investigate the impact of lowering arguments passed in | 1826 // TODO: Investigate the impact of lowering arguments passed in |
| 1827 // registers after lowering stack arguments as opposed to the other | 1827 // registers after lowering stack arguments as opposed to the other |
| 1828 // way around. Lowering register arguments after stack arguments may | 1828 // way around. Lowering register arguments after stack arguments may |
| 1829 // reduce register pressure. On the other hand, lowering register | 1829 // reduce register pressure. On the other hand, lowering register |
| 1830 // arguments first (before stack arguments) may result in more compact | 1830 // arguments first (before stack arguments) may result in more compact |
| 1831 // code, as the memory operand displacements may end up being smaller | 1831 // code, as the memory operand displacements may end up being smaller |
| (...skipping 2273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); | 4105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); |
| 4106 | 4106 |
| 4107 // Perform the arithmetic as a scalar operation. | 4107 // Perform the arithmetic as a scalar operation. |
| 4108 Variable *Res = Func->makeVariable(ElementTy); | 4108 Variable *Res = Func->makeVariable(ElementTy); |
| 4109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); | 4109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); |
| 4110 | 4110 |
| 4111 // Insert the result into position. | 4111 // Insert the result into position. |
| 4112 Variable *DestT = Func->makeVariable(Ty); | 4112 Variable *DestT = Func->makeVariable(Ty); |
| 4113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); | 4113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); |
| 4114 T = DestT; | 4114 T = DestT; |
| 4115 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of | |
| 4116 // infinite weight temporaries. | |
| 4117 } | 4115 } |
| 4118 | 4116 |
| 4119 lowerAssign(InstAssign::create(Func, Dest, T)); | 4117 lowerAssign(InstAssign::create(Func, Dest, T)); |
| 4120 } | 4118 } |
| 4121 | 4119 |
| 4122 // The following pattern occurs often in lowered C and C++ code: | 4120 // The following pattern occurs often in lowered C and C++ code: |
| 4123 // | 4121 // |
| 4124 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 4122 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| 4125 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 4123 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| 4126 // | 4124 // |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4193 // Lower the pre-ordered list of assignments into mov instructions. | 4191 // Lower the pre-ordered list of assignments into mov instructions. |
| 4194 // Also has to do some ad-hoc register allocation as necessary. | 4192 // Also has to do some ad-hoc register allocation as necessary. |
| 4195 void TargetX8632::lowerPhiAssignments(CfgNode *Node, | 4193 void TargetX8632::lowerPhiAssignments(CfgNode *Node, |
| 4196 const AssignList &Assignments) { | 4194 const AssignList &Assignments) { |
| 4197 // Check that this is a properly initialized shell of a node. | 4195 // Check that this is a properly initialized shell of a node. |
| 4198 assert(Node->getOutEdges().size() == 1); | 4196 assert(Node->getOutEdges().size() == 1); |
| 4199 assert(Node->getInsts().empty()); | 4197 assert(Node->getInsts().empty()); |
| 4200 assert(Node->getPhis().empty()); | 4198 assert(Node->getPhis().empty()); |
| 4201 CfgNode *Succ = Node->getOutEdges().front(); | 4199 CfgNode *Succ = Node->getOutEdges().front(); |
| 4202 getContext().init(Node); | 4200 getContext().init(Node); |
| 4203 // Register set setup similar to regAlloc() and postLower(). | 4201 // Register set setup similar to regAlloc(). |
| 4204 RegSetMask RegInclude = RegSet_All; | 4202 RegSetMask RegInclude = RegSet_All; |
| 4205 RegSetMask RegExclude = RegSet_StackPointer; | 4203 RegSetMask RegExclude = RegSet_StackPointer; |
| 4206 if (hasFramePointer()) | 4204 if (hasFramePointer()) |
| 4207 RegExclude |= RegSet_FramePointer; | 4205 RegExclude |= RegSet_FramePointer; |
| 4208 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude); | 4206 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude); |
| 4209 bool NeedsRegs = false; | 4207 bool NeedsRegs = false; |
| 4210 // Initialize the set of available registers to the set of what is | 4208 // Initialize the set of available registers to the set of what is |
| 4211 // available (not live) at the beginning of the successor block, | 4209 // available (not live) at the beginning of the successor block, |
| 4212 // minus all registers used as Dest operands in the Assignments. To | 4210 // minus all registers used as Dest operands in the Assignments. To |
| 4213 // do this, we start off assuming all registers are available, then | 4211 // do this, we start off assuming all registers are available, then |
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4505 assert(Type != IceType_i64); | 4503 assert(Type != IceType_i64); |
| 4506 Variable *Reg = Func->makeVariable(Type); | 4504 Variable *Reg = Func->makeVariable(Type); |
| 4507 if (RegNum == Variable::NoRegister) | 4505 if (RegNum == Variable::NoRegister) |
| 4508 Reg->setWeightInfinite(); | 4506 Reg->setWeightInfinite(); |
| 4509 else | 4507 else |
| 4510 Reg->setRegNum(RegNum); | 4508 Reg->setRegNum(RegNum); |
| 4511 return Reg; | 4509 return Reg; |
| 4512 } | 4510 } |
| 4513 | 4511 |
| 4514 void TargetX8632::postLower() { | 4512 void TargetX8632::postLower() { |
| 4515 if (Ctx->getOptLevel() != Opt_m1) { | 4513 if (Ctx->getOptLevel() == Opt_m1) |
| 4516 // Find two-address non-SSA instructions where Dest==Src0, and set | |
| 4517 // the DestNonKillable flag to keep liveness analysis consistent. | |
| 4518 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { | |
| 4519 if (Inst->isDeleted()) | |
| 4520 continue; | |
| 4521 if (Variable *Dest = Inst->getDest()) { | |
| 4522 // TODO(stichnot): We may need to consider all source | |
| 4523 // operands, not just the first one, if using 3-address | |
| 4524 // instructions. | |
| 4525 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest) | |
| 4526 Inst->setDestNonKillable(); | |
| 4527 } | |
| 4528 } | |
| 4529 return; | 4514 return; |
| 4530 } | 4515 // Find two-address non-SSA instructions where Dest==Src0, and set |
| 4531 // TODO: Avoid recomputing WhiteList every instruction. | 4516 // the DestNonKillable flag to keep liveness analysis consistent. |
| 4532 RegSetMask RegInclude = RegSet_All; | |
| 4533 RegSetMask RegExclude = RegSet_StackPointer; | |
| 4534 if (hasFramePointer()) | |
| 4535 RegExclude |= RegSet_FramePointer; | |
| 4536 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude); | |
| 4537 // Make one pass to black-list pre-colored registers. TODO: If | |
| 4538 // there was some prior register allocation pass that made register | |
| 4539 // assignments, those registers need to be black-listed here as | |
| 4540 // well. | |
| 4541 llvm::DenseMap<const Variable *, const Inst *> LastUses; | |
| 4542 // The first pass also keeps track of which instruction is the last | |
| 4543 // use for each infinite-weight variable. After the last use, the | |
| 4544 // variable is released to the free list. | |
| 4545 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { | 4517 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { |
| 4546 if (Inst->isDeleted()) | 4518 if (Inst->isDeleted()) |
| 4547 continue; | 4519 continue; |
| 4548 // Don't consider a FakeKill instruction, because (currently) it | 4520 if (Variable *Dest = Inst->getDest()) { |
| 4549 // is only used to kill all scratch registers at a call site, and | 4521 // TODO(stichnot): We may need to consider all source |
| 4550 // we don't want to black-list all scratch registers during the | 4522 // operands, not just the first one, if using 3-address |
| 4551 // call lowering. This could become a problem since it relies on | 4523 // instructions. |
| 4552 // the lowering sequence not keeping any infinite-weight variables | 4524 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest) |
| 4553 // live across a call. TODO(stichnot): Consider replacing this | 4525 Inst->setDestNonKillable(); |
| 4554 // whole postLower() implementation with a robust local register | |
| 4555 // allocator, for example compute live ranges only for pre-colored | |
| 4556 // and infinite-weight variables and run the existing linear-scan | |
| 4557 // allocator. | |
| 4558 assert(!llvm::isa<InstFakeKill>(Inst) || Inst->getSrcSize() == 0); | |
| 4559 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | |
| 4560 Operand *Src = Inst->getSrc(SrcNum); | |
| 4561 SizeT NumVars = Src->getNumVars(); | |
| 4562 for (SizeT J = 0; J < NumVars; ++J) { | |
| 4563 const Variable *Var = Src->getVar(J); | |
| 4564 // Track last uses of all variables, regardless of whether | |
| 4565 // they are pre-colored or infinite-weight. | |
| 4566 LastUses[Var] = Inst; | |
| 4567 if (!Var->hasReg()) | |
| 4568 continue; | |
| 4569 WhiteList[Var->getRegNum()] = false; | |
| 4570 } | |
| 4571 } | 4526 } |
| 4572 } | 4527 } |
| 4573 // The second pass colors infinite-weight variables. | |
| 4574 llvm::SmallBitVector AvailableRegisters = WhiteList; | |
| 4575 llvm::SmallBitVector FreedRegisters(WhiteList.size()); | |
| 4576 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { | |
| 4577 FreedRegisters.reset(); | |
| 4578 if (Inst->isDeleted()) | |
| 4579 continue; | |
| 4580 // Iterate over all variables referenced in the instruction, | |
| 4581 // including the Dest variable (if any). If the variable is | |
| 4582 // marked as infinite-weight, find it a register. If this | |
| 4583 // instruction is the last use of the variable in the lowered | |
| 4584 // sequence, release the register to the free list after this | |
| 4585 // instruction is completely processed. Note that the first pass | |
| 4586 // ignores the Dest operand, under the assumption that a | |
| 4587 // pre-colored Dest will appear as a source operand in some | |
| 4588 // subsequent instruction in the lowered sequence. | |
| 4589 Variable *Dest = Inst->getDest(); | |
| 4590 SizeT NumSrcs = Inst->getSrcSize(); | |
| 4591 if (Dest) | |
| 4592 ++NumSrcs; | |
| 4593 if (NumSrcs == 0) | |
| 4594 continue; | |
| 4595 OperandList Srcs(NumSrcs); | |
| 4596 for (SizeT i = 0; i < Inst->getSrcSize(); ++i) | |
| 4597 Srcs[i] = Inst->getSrc(i); | |
| 4598 if (Dest) | |
| 4599 Srcs[NumSrcs - 1] = Dest; | |
| 4600 for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) { | |
| 4601 Operand *Src = Srcs[SrcNum]; | |
| 4602 SizeT NumVars = Src->getNumVars(); | |
| 4603 for (SizeT J = 0; J < NumVars; ++J) { | |
| 4604 Variable *Var = Src->getVar(J); | |
| 4605 if (!Var->hasReg() && Var->getWeight().isInf()) { | |
| 4606 llvm::SmallBitVector AvailableTypedRegisters = | |
| 4607 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
| 4608 assert(AvailableTypedRegisters.any()); | |
| 4609 int32_t RegNum = AvailableTypedRegisters.find_first(); | |
| 4610 Var->setRegNum(RegNum); | |
| 4611 AvailableRegisters[RegNum] = false; | |
| 4612 } | |
| 4613 if (Var->hasReg()) { | |
| 4614 int32_t RegNum = Var->getRegNum(); | |
| 4615 assert(!AvailableRegisters[RegNum]); | |
| 4616 if (LastUses[Var] == Inst) { | |
| 4617 if (WhiteList[RegNum]) | |
| 4618 FreedRegisters[RegNum] = true; | |
| 4619 } | |
| 4620 } | |
| 4621 } | |
| 4622 } | |
| 4623 AvailableRegisters |= FreedRegisters; | |
| 4624 } | |
| 4625 } | 4528 } |
| 4626 | 4529 |
| 4627 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const { | 4530 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const { |
| 4628 Ostream &Str = Ctx->getStrEmit(); | 4531 Ostream &Str = Ctx->getStrEmit(); |
| 4629 Str << "$" << (int32_t)getValue(); | 4532 Str << "$" << (int32_t)getValue(); |
| 4630 } | 4533 } |
| 4631 | 4534 |
| 4632 template <> void ConstantInteger64::emit(GlobalContext *) const { | 4535 template <> void ConstantInteger64::emit(GlobalContext *) const { |
| 4633 llvm_unreachable("Not expecting to emit 64-bit integers"); | 4536 llvm_unreachable("Not expecting to emit 64-bit integers"); |
| 4634 } | 4537 } |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4737 } else if (IsConstant || IsExternal) | 4640 } else if (IsConstant || IsExternal) |
| 4738 Str << "\t.zero\t" << Size << "\n"; | 4641 Str << "\t.zero\t" << Size << "\n"; |
| 4739 // Size is part of .comm. | 4642 // Size is part of .comm. |
| 4740 | 4643 |
| 4741 if (IsConstant || HasNonzeroInitializer || IsExternal) | 4644 if (IsConstant || HasNonzeroInitializer || IsExternal) |
| 4742 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4645 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 4743 // Size is part of .comm. | 4646 // Size is part of .comm. |
| 4744 } | 4647 } |
| 4745 | 4648 |
| 4746 } // end of namespace Ice | 4649 } // end of namespace Ice |
| OLD | NEW |