OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
12 // high-level instruction. It also implements | 12 // high-level instruction. It also implements |
13 // TargetX8632Fast::postLower() which does the simplest possible | 13 // TargetX8632Fast::postLower() which does the simplest possible |
jvoung (off chromium)
2014/11/14 23:26:22
This comment about what postLower() does can be up
Jim Stichnoth
2014/11/14 23:51:35
Done.
| |
14 // register allocation for the "fast" target. | 14 // register allocation for the "fast" target. |
15 // | 15 // |
16 //===----------------------------------------------------------------------===// | 16 //===----------------------------------------------------------------------===// |
17 | 17 |
18 #include "llvm/ADT/DenseMap.h" | 18 #include "llvm/ADT/DenseMap.h" |
19 #include "llvm/Support/CommandLine.h" | 19 #include "llvm/Support/CommandLine.h" |
20 #include "llvm/Support/MathExtras.h" | 20 #include "llvm/Support/MathExtras.h" |
21 | 21 |
22 #include "IceCfg.h" | 22 #include "IceCfg.h" |
23 #include "IceCfgNode.h" | 23 #include "IceCfgNode.h" |
(...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
368 Func->liveness(Liveness_Intervals); | 368 Func->liveness(Liveness_Intervals); |
369 if (Func->hasError()) | 369 if (Func->hasError()) |
370 return; | 370 return; |
371 // Validate the live range computations. The expensive validation | 371 // Validate the live range computations. The expensive validation |
372 // call is deliberately only made when assertions are enabled. | 372 // call is deliberately only made when assertions are enabled. |
373 assert(Func->validateLiveness()); | 373 assert(Func->validateLiveness()); |
374 // The post-codegen dump is done here, after liveness analysis and | 374 // The post-codegen dump is done here, after liveness analysis and |
375 // associated cleanup, to make the dump cleaner and more useful. | 375 // associated cleanup, to make the dump cleaner and more useful. |
376 Func->dump("After initial x8632 codegen"); | 376 Func->dump("After initial x8632 codegen"); |
377 Func->getVMetadata()->init(VMK_All); | 377 Func->getVMetadata()->init(VMK_All); |
378 regAlloc(); | 378 regAlloc(RAK_Global); |
379 if (Func->hasError()) | 379 if (Func->hasError()) |
380 return; | 380 return; |
381 Func->dump("After linear scan regalloc"); | 381 Func->dump("After linear scan regalloc"); |
382 | 382 |
383 if (Ctx->getFlags().PhiEdgeSplit) { | 383 if (Ctx->getFlags().PhiEdgeSplit) { |
384 Func->advancedPhiLowering(); | 384 Func->advancedPhiLowering(); |
385 Func->dump("After advanced Phi lowering"); | 385 Func->dump("After advanced Phi lowering"); |
386 } | 386 } |
387 | 387 |
388 // Stack frame mapping. | 388 // Stack frame mapping. |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
422 return; | 422 return; |
423 Func->dump("After Phi lowering"); | 423 Func->dump("After Phi lowering"); |
424 | 424 |
425 Func->doArgLowering(); | 425 Func->doArgLowering(); |
426 | 426 |
427 Func->genCode(); | 427 Func->genCode(); |
428 if (Func->hasError()) | 428 if (Func->hasError()) |
429 return; | 429 return; |
430 Func->dump("After initial x8632 codegen"); | 430 Func->dump("After initial x8632 codegen"); |
431 | 431 |
432 regAlloc(RAK_InfOnly); | |
433 if (Func->hasError()) | |
434 return; | |
435 Func->dump("After regalloc of infinite-weight variables"); | |
436 | |
432 Func->genFrame(); | 437 Func->genFrame(); |
433 if (Func->hasError()) | 438 if (Func->hasError()) |
434 return; | 439 return; |
435 Func->dump("After stack frame mapping"); | 440 Func->dump("After stack frame mapping"); |
436 | 441 |
437 // Nop insertion | 442 // Nop insertion |
438 if (shouldDoNopInsertion()) { | 443 if (shouldDoNopInsertion()) { |
439 Func->doNopInsertion(); | 444 Func->doNopInsertion(); |
440 } | 445 } |
441 } | 446 } |
(...skipping 1367 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1809 // eliminated after lowering, we would need to ensure that the | 1814 // eliminated after lowering, we would need to ensure that the |
1810 // pre-call and the post-call esp adjustment get eliminated as well. | 1815 // pre-call and the post-call esp adjustment get eliminated as well. |
1811 if (ParameterAreaSizeBytes) { | 1816 if (ParameterAreaSizeBytes) { |
1812 _adjust_stack(ParameterAreaSizeBytes); | 1817 _adjust_stack(ParameterAreaSizeBytes); |
1813 } | 1818 } |
1814 | 1819 |
1815 // Copy arguments that are passed on the stack to the appropriate | 1820 // Copy arguments that are passed on the stack to the appropriate |
1816 // stack locations. | 1821 // stack locations. |
1817 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 1822 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
1818 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 1823 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
1819 // TODO: Consider calling postLower() here to reduce the register | 1824 // TODO: Consider calling postLower() here to reduce the register |
jvoung (off chromium)
2014/11/14 23:26:22
This postLower comment may not apply anymore?
Jim Stichnoth
2014/11/14 23:51:35
Done.
| |
1820 // pressure associated with using too many infinite weight | 1825 // pressure associated with using too many infinite weight |
1821 // temporaries when lowering the call sequence in -Om1 mode. | 1826 // temporaries when lowering the call sequence in -Om1 mode. |
1822 } | 1827 } |
1823 | 1828 |
1824 // Copy arguments to be passed in registers to the appropriate | 1829 // Copy arguments to be passed in registers to the appropriate |
1825 // registers. | 1830 // registers. |
1826 // TODO: Investigate the impact of lowering arguments passed in | 1831 // TODO: Investigate the impact of lowering arguments passed in |
1827 // registers after lowering stack arguments as opposed to the other | 1832 // registers after lowering stack arguments as opposed to the other |
1828 // way around. Lowering register arguments after stack arguments may | 1833 // way around. Lowering register arguments after stack arguments may |
1829 // reduce register pressure. On the other hand, lowering register | 1834 // reduce register pressure. On the other hand, lowering register |
(...skipping 2275 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); | 4110 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); |
4106 | 4111 |
4107 // Perform the arithmetic as a scalar operation. | 4112 // Perform the arithmetic as a scalar operation. |
4108 Variable *Res = Func->makeVariable(ElementTy); | 4113 Variable *Res = Func->makeVariable(ElementTy); |
4109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); | 4114 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); |
4110 | 4115 |
4111 // Insert the result into position. | 4116 // Insert the result into position. |
4112 Variable *DestT = Func->makeVariable(Ty); | 4117 Variable *DestT = Func->makeVariable(Ty); |
4113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); | 4118 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); |
4114 T = DestT; | 4119 T = DestT; |
4115 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of | 4120 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of |
jvoung (off chromium)
2014/11/14 23:26:22
similar?
Jim Stichnoth
2014/11/14 23:51:35
Done.
| |
4116 // infinite weight temporaries. | 4121 // infinite weight temporaries. |
4117 } | 4122 } |
4118 | 4123 |
4119 lowerAssign(InstAssign::create(Func, Dest, T)); | 4124 lowerAssign(InstAssign::create(Func, Dest, T)); |
4120 } | 4125 } |
4121 | 4126 |
4122 // The following pattern occurs often in lowered C and C++ code: | 4127 // The following pattern occurs often in lowered C and C++ code: |
4123 // | 4128 // |
4124 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 | 4129 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
4125 // %cmp.ext = sext <n x i1> %cmp to <n x ty> | 4130 // %cmp.ext = sext <n x i1> %cmp to <n x ty> |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4193 // Lower the pre-ordered list of assignments into mov instructions. | 4198 // Lower the pre-ordered list of assignments into mov instructions. |
4194 // Also has to do some ad-hoc register allocation as necessary. | 4199 // Also has to do some ad-hoc register allocation as necessary. |
4195 void TargetX8632::lowerPhiAssignments(CfgNode *Node, | 4200 void TargetX8632::lowerPhiAssignments(CfgNode *Node, |
4196 const AssignList &Assignments) { | 4201 const AssignList &Assignments) { |
4197 // Check that this is a properly initialized shell of a node. | 4202 // Check that this is a properly initialized shell of a node. |
4198 assert(Node->getOutEdges().size() == 1); | 4203 assert(Node->getOutEdges().size() == 1); |
4199 assert(Node->getInsts().empty()); | 4204 assert(Node->getInsts().empty()); |
4200 assert(Node->getPhis().empty()); | 4205 assert(Node->getPhis().empty()); |
4201 CfgNode *Succ = Node->getOutEdges().front(); | 4206 CfgNode *Succ = Node->getOutEdges().front(); |
4202 getContext().init(Node); | 4207 getContext().init(Node); |
4203 // Register set setup similar to regAlloc() and postLower(). | 4208 // Register set setup similar to regAlloc() and postLower(). |
jvoung (off chromium)
2014/11/14 23:26:22
similar comment about postLower
Jim Stichnoth
2014/11/14 23:51:35
Done.
| |
4204 RegSetMask RegInclude = RegSet_All; | 4209 RegSetMask RegInclude = RegSet_All; |
4205 RegSetMask RegExclude = RegSet_StackPointer; | 4210 RegSetMask RegExclude = RegSet_StackPointer; |
4206 if (hasFramePointer()) | 4211 if (hasFramePointer()) |
4207 RegExclude |= RegSet_FramePointer; | 4212 RegExclude |= RegSet_FramePointer; |
4208 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude); | 4213 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude); |
4209 bool NeedsRegs = false; | 4214 bool NeedsRegs = false; |
4210 // Initialize the set of available registers to the set of what is | 4215 // Initialize the set of available registers to the set of what is |
4211 // available (not live) at the beginning of the successor block, | 4216 // available (not live) at the beginning of the successor block, |
4212 // minus all registers used as Dest operands in the Assignments. To | 4217 // minus all registers used as Dest operands in the Assignments. To |
4213 // do this, we start off assuming all registers are available, then | 4218 // do this, we start off assuming all registers are available, then |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4505 assert(Type != IceType_i64); | 4510 assert(Type != IceType_i64); |
4506 Variable *Reg = Func->makeVariable(Type); | 4511 Variable *Reg = Func->makeVariable(Type); |
4507 if (RegNum == Variable::NoRegister) | 4512 if (RegNum == Variable::NoRegister) |
4508 Reg->setWeightInfinite(); | 4513 Reg->setWeightInfinite(); |
4509 else | 4514 else |
4510 Reg->setRegNum(RegNum); | 4515 Reg->setRegNum(RegNum); |
4511 return Reg; | 4516 return Reg; |
4512 } | 4517 } |
4513 | 4518 |
4514 void TargetX8632::postLower() { | 4519 void TargetX8632::postLower() { |
4515 if (Ctx->getOptLevel() != Opt_m1) { | 4520 if (Ctx->getOptLevel() == Opt_m1) |
4516 // Find two-address non-SSA instructions where Dest==Src0, and set | |
4517 // the DestNonKillable flag to keep liveness analysis consistent. | |
4518 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { | |
4519 if (Inst->isDeleted()) | |
4520 continue; | |
4521 if (Variable *Dest = Inst->getDest()) { | |
4522 // TODO(stichnot): We may need to consider all source | |
4523 // operands, not just the first one, if using 3-address | |
4524 // instructions. | |
4525 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest) | |
4526 Inst->setDestNonKillable(); | |
4527 } | |
4528 } | |
4529 return; | 4521 return; |
4530 } | 4522 // Find two-address non-SSA instructions where Dest==Src0, and set |
4531 // TODO: Avoid recomputing WhiteList every instruction. | 4523 // the DestNonKillable flag to keep liveness analysis consistent. |
4532 RegSetMask RegInclude = RegSet_All; | |
4533 RegSetMask RegExclude = RegSet_StackPointer; | |
4534 if (hasFramePointer()) | |
4535 RegExclude |= RegSet_FramePointer; | |
4536 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude); | |
4537 // Make one pass to black-list pre-colored registers. TODO: If | |
4538 // there was some prior register allocation pass that made register | |
4539 // assignments, those registers need to be black-listed here as | |
4540 // well. | |
4541 llvm::DenseMap<const Variable *, const Inst *> LastUses; | |
4542 // The first pass also keeps track of which instruction is the last | |
4543 // use for each infinite-weight variable. After the last use, the | |
4544 // variable is released to the free list. | |
4545 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { | 4524 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { |
4546 if (Inst->isDeleted()) | 4525 if (Inst->isDeleted()) |
4547 continue; | 4526 continue; |
4548 // Don't consider a FakeKill instruction, because (currently) it | 4527 if (Variable *Dest = Inst->getDest()) { |
4549 // is only used to kill all scratch registers at a call site, and | 4528 // TODO(stichnot): We may need to consider all source |
4550 // we don't want to black-list all scratch registers during the | 4529 // operands, not just the first one, if using 3-address |
4551 // call lowering. This could become a problem since it relies on | 4530 // instructions. |
4552 // the lowering sequence not keeping any infinite-weight variables | 4531 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest) |
4553 // live across a call. TODO(stichnot): Consider replacing this | 4532 Inst->setDestNonKillable(); |
4554 // whole postLower() implementation with a robust local register | |
4555 // allocator, for example compute live ranges only for pre-colored | |
4556 // and infinite-weight variables and run the existing linear-scan | |
4557 // allocator. | |
4558 assert(!llvm::isa<InstFakeKill>(Inst) || Inst->getSrcSize() == 0); | |
4559 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | |
4560 Operand *Src = Inst->getSrc(SrcNum); | |
4561 SizeT NumVars = Src->getNumVars(); | |
4562 for (SizeT J = 0; J < NumVars; ++J) { | |
4563 const Variable *Var = Src->getVar(J); | |
4564 // Track last uses of all variables, regardless of whether | |
4565 // they are pre-colored or infinite-weight. | |
4566 LastUses[Var] = Inst; | |
4567 if (!Var->hasReg()) | |
4568 continue; | |
4569 WhiteList[Var->getRegNum()] = false; | |
4570 } | |
4571 } | 4533 } |
4572 } | 4534 } |
4573 // The second pass colors infinite-weight variables. | |
4574 llvm::SmallBitVector AvailableRegisters = WhiteList; | |
4575 llvm::SmallBitVector FreedRegisters(WhiteList.size()); | |
4576 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { | |
4577 FreedRegisters.reset(); | |
4578 if (Inst->isDeleted()) | |
4579 continue; | |
4580 // Iterate over all variables referenced in the instruction, | |
4581 // including the Dest variable (if any). If the variable is | |
4582 // marked as infinite-weight, find it a register. If this | |
4583 // instruction is the last use of the variable in the lowered | |
4584 // sequence, release the register to the free list after this | |
4585 // instruction is completely processed. Note that the first pass | |
4586 // ignores the Dest operand, under the assumption that a | |
4587 // pre-colored Dest will appear as a source operand in some | |
4588 // subsequent instruction in the lowered sequence. | |
4589 Variable *Dest = Inst->getDest(); | |
4590 SizeT NumSrcs = Inst->getSrcSize(); | |
4591 if (Dest) | |
4592 ++NumSrcs; | |
4593 if (NumSrcs == 0) | |
4594 continue; | |
4595 OperandList Srcs(NumSrcs); | |
4596 for (SizeT i = 0; i < Inst->getSrcSize(); ++i) | |
4597 Srcs[i] = Inst->getSrc(i); | |
4598 if (Dest) | |
4599 Srcs[NumSrcs - 1] = Dest; | |
4600 for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) { | |
4601 Operand *Src = Srcs[SrcNum]; | |
4602 SizeT NumVars = Src->getNumVars(); | |
4603 for (SizeT J = 0; J < NumVars; ++J) { | |
4604 Variable *Var = Src->getVar(J); | |
4605 if (!Var->hasReg() && Var->getWeight().isInf()) { | |
4606 llvm::SmallBitVector AvailableTypedRegisters = | |
4607 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
4608 assert(AvailableTypedRegisters.any()); | |
4609 int32_t RegNum = AvailableTypedRegisters.find_first(); | |
4610 Var->setRegNum(RegNum); | |
4611 AvailableRegisters[RegNum] = false; | |
4612 } | |
4613 if (Var->hasReg()) { | |
4614 int32_t RegNum = Var->getRegNum(); | |
4615 assert(!AvailableRegisters[RegNum]); | |
4616 if (LastUses[Var] == Inst) { | |
4617 if (WhiteList[RegNum]) | |
4618 FreedRegisters[RegNum] = true; | |
4619 } | |
4620 } | |
4621 } | |
4622 } | |
4623 AvailableRegisters |= FreedRegisters; | |
4624 } | |
4625 } | 4535 } |
4626 | 4536 |
4627 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const { | 4537 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const { |
4628 Ostream &Str = Ctx->getStrEmit(); | 4538 Ostream &Str = Ctx->getStrEmit(); |
4629 Str << "$" << (int32_t)getValue(); | 4539 Str << "$" << (int32_t)getValue(); |
4630 } | 4540 } |
4631 | 4541 |
4632 template <> void ConstantInteger64::emit(GlobalContext *) const { | 4542 template <> void ConstantInteger64::emit(GlobalContext *) const { |
4633 llvm_unreachable("Not expecting to emit 64-bit integers"); | 4543 llvm_unreachable("Not expecting to emit 64-bit integers"); |
4634 } | 4544 } |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4737 } else if (IsConstant || IsExternal) | 4647 } else if (IsConstant || IsExternal) |
4738 Str << "\t.zero\t" << Size << "\n"; | 4648 Str << "\t.zero\t" << Size << "\n"; |
4739 // Size is part of .comm. | 4649 // Size is part of .comm. |
4740 | 4650 |
4741 if (IsConstant || HasNonzeroInitializer || IsExternal) | 4651 if (IsConstant || HasNonzeroInitializer || IsExternal) |
4742 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4652 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4743 // Size is part of .comm. | 4653 // Size is part of .comm. |
4744 } | 4654 } |
4745 | 4655 |
4746 } // end of namespace Ice | 4656 } // end of namespace Ice |
OLD | NEW |