Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(56)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 733643005: Subzero: Use the linear-scan register allocator for Om1 as well. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Refactor some common initializations into init() Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible 13 // TargetX8632Fast::postLower() which does the simplest possible
jvoung (off chromium) 2014/11/14 23:26:22 This comment about what postLower() does can be up
Jim Stichnoth 2014/11/14 23:51:35 Done.
14 // register allocation for the "fast" target. 14 // register allocation for the "fast" target.
15 // 15 //
16 //===----------------------------------------------------------------------===// 16 //===----------------------------------------------------------------------===//
17 17
18 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/Support/CommandLine.h" 19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/MathExtras.h" 20 #include "llvm/Support/MathExtras.h"
21 21
22 #include "IceCfg.h" 22 #include "IceCfg.h"
23 #include "IceCfgNode.h" 23 #include "IceCfgNode.h"
(...skipping 344 matching lines...) Expand 10 before | Expand all | Expand 10 after
368 Func->liveness(Liveness_Intervals); 368 Func->liveness(Liveness_Intervals);
369 if (Func->hasError()) 369 if (Func->hasError())
370 return; 370 return;
371 // Validate the live range computations. The expensive validation 371 // Validate the live range computations. The expensive validation
372 // call is deliberately only made when assertions are enabled. 372 // call is deliberately only made when assertions are enabled.
373 assert(Func->validateLiveness()); 373 assert(Func->validateLiveness());
374 // The post-codegen dump is done here, after liveness analysis and 374 // The post-codegen dump is done here, after liveness analysis and
375 // associated cleanup, to make the dump cleaner and more useful. 375 // associated cleanup, to make the dump cleaner and more useful.
376 Func->dump("After initial x8632 codegen"); 376 Func->dump("After initial x8632 codegen");
377 Func->getVMetadata()->init(VMK_All); 377 Func->getVMetadata()->init(VMK_All);
378 regAlloc(); 378 regAlloc(RAK_Global);
379 if (Func->hasError()) 379 if (Func->hasError())
380 return; 380 return;
381 Func->dump("After linear scan regalloc"); 381 Func->dump("After linear scan regalloc");
382 382
383 if (Ctx->getFlags().PhiEdgeSplit) { 383 if (Ctx->getFlags().PhiEdgeSplit) {
384 Func->advancedPhiLowering(); 384 Func->advancedPhiLowering();
385 Func->dump("After advanced Phi lowering"); 385 Func->dump("After advanced Phi lowering");
386 } 386 }
387 387
388 // Stack frame mapping. 388 // Stack frame mapping.
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
422 return; 422 return;
423 Func->dump("After Phi lowering"); 423 Func->dump("After Phi lowering");
424 424
425 Func->doArgLowering(); 425 Func->doArgLowering();
426 426
427 Func->genCode(); 427 Func->genCode();
428 if (Func->hasError()) 428 if (Func->hasError())
429 return; 429 return;
430 Func->dump("After initial x8632 codegen"); 430 Func->dump("After initial x8632 codegen");
431 431
432 regAlloc(RAK_InfOnly);
433 if (Func->hasError())
434 return;
435 Func->dump("After regalloc of infinite-weight variables");
436
432 Func->genFrame(); 437 Func->genFrame();
433 if (Func->hasError()) 438 if (Func->hasError())
434 return; 439 return;
435 Func->dump("After stack frame mapping"); 440 Func->dump("After stack frame mapping");
436 441
437 // Nop insertion 442 // Nop insertion
438 if (shouldDoNopInsertion()) { 443 if (shouldDoNopInsertion()) {
439 Func->doNopInsertion(); 444 Func->doNopInsertion();
440 } 445 }
441 } 446 }
(...skipping 1367 matching lines...) Expand 10 before | Expand all | Expand 10 after
1809 // eliminated after lowering, we would need to ensure that the 1814 // eliminated after lowering, we would need to ensure that the
1810 // pre-call and the post-call esp adjustment get eliminated as well. 1815 // pre-call and the post-call esp adjustment get eliminated as well.
1811 if (ParameterAreaSizeBytes) { 1816 if (ParameterAreaSizeBytes) {
1812 _adjust_stack(ParameterAreaSizeBytes); 1817 _adjust_stack(ParameterAreaSizeBytes);
1813 } 1818 }
1814 1819
1815 // Copy arguments that are passed on the stack to the appropriate 1820 // Copy arguments that are passed on the stack to the appropriate
1816 // stack locations. 1821 // stack locations.
1817 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { 1822 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1818 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); 1823 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
1819 // TODO: Consider calling postLower() here to reduce the register 1824 // TODO: Consider calling postLower() here to reduce the register
jvoung (off chromium) 2014/11/14 23:26:22 This postLower comment may not apply anymore?
Jim Stichnoth 2014/11/14 23:51:35 Done.
1820 // pressure associated with using too many infinite weight 1825 // pressure associated with using too many infinite weight
1821 // temporaries when lowering the call sequence in -Om1 mode. 1826 // temporaries when lowering the call sequence in -Om1 mode.
1822 } 1827 }
1823 1828
1824 // Copy arguments to be passed in registers to the appropriate 1829 // Copy arguments to be passed in registers to the appropriate
1825 // registers. 1830 // registers.
1826 // TODO: Investigate the impact of lowering arguments passed in 1831 // TODO: Investigate the impact of lowering arguments passed in
1827 // registers after lowering stack arguments as opposed to the other 1832 // registers after lowering stack arguments as opposed to the other
1828 // way around. Lowering register arguments after stack arguments may 1833 // way around. Lowering register arguments after stack arguments may
1829 // reduce register pressure. On the other hand, lowering register 1834 // reduce register pressure. On the other hand, lowering register
(...skipping 2275 matching lines...) Expand 10 before | Expand all | Expand 10 after
4105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 4110 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4106 4111
4107 // Perform the arithmetic as a scalar operation. 4112 // Perform the arithmetic as a scalar operation.
4108 Variable *Res = Func->makeVariable(ElementTy); 4113 Variable *Res = Func->makeVariable(ElementTy);
4109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 4114 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4110 4115
4111 // Insert the result into position. 4116 // Insert the result into position.
4112 Variable *DestT = Func->makeVariable(Ty); 4117 Variable *DestT = Func->makeVariable(Ty);
4113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); 4118 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4114 T = DestT; 4119 T = DestT;
4115 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of 4120 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of
jvoung (off chromium) 2014/11/14 23:26:22 similar?
Jim Stichnoth 2014/11/14 23:51:35 Done.
4116 // infinite weight temporaries. 4121 // infinite weight temporaries.
4117 } 4122 }
4118 4123
4119 lowerAssign(InstAssign::create(Func, Dest, T)); 4124 lowerAssign(InstAssign::create(Func, Dest, T));
4120 } 4125 }
4121 4126
4122 // The following pattern occurs often in lowered C and C++ code: 4127 // The following pattern occurs often in lowered C and C++ code:
4123 // 4128 //
4124 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4129 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4125 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 4130 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
4193 // Lower the pre-ordered list of assignments into mov instructions. 4198 // Lower the pre-ordered list of assignments into mov instructions.
4194 // Also has to do some ad-hoc register allocation as necessary. 4199 // Also has to do some ad-hoc register allocation as necessary.
4195 void TargetX8632::lowerPhiAssignments(CfgNode *Node, 4200 void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4196 const AssignList &Assignments) { 4201 const AssignList &Assignments) {
4197 // Check that this is a properly initialized shell of a node. 4202 // Check that this is a properly initialized shell of a node.
4198 assert(Node->getOutEdges().size() == 1); 4203 assert(Node->getOutEdges().size() == 1);
4199 assert(Node->getInsts().empty()); 4204 assert(Node->getInsts().empty());
4200 assert(Node->getPhis().empty()); 4205 assert(Node->getPhis().empty());
4201 CfgNode *Succ = Node->getOutEdges().front(); 4206 CfgNode *Succ = Node->getOutEdges().front();
4202 getContext().init(Node); 4207 getContext().init(Node);
4203 // Register set setup similar to regAlloc() and postLower(). 4208 // Register set setup similar to regAlloc() and postLower().
jvoung (off chromium) 2014/11/14 23:26:22 similar comment about postLower
Jim Stichnoth 2014/11/14 23:51:35 Done.
4204 RegSetMask RegInclude = RegSet_All; 4209 RegSetMask RegInclude = RegSet_All;
4205 RegSetMask RegExclude = RegSet_StackPointer; 4210 RegSetMask RegExclude = RegSet_StackPointer;
4206 if (hasFramePointer()) 4211 if (hasFramePointer())
4207 RegExclude |= RegSet_FramePointer; 4212 RegExclude |= RegSet_FramePointer;
4208 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude); 4213 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4209 bool NeedsRegs = false; 4214 bool NeedsRegs = false;
4210 // Initialize the set of available registers to the set of what is 4215 // Initialize the set of available registers to the set of what is
4211 // available (not live) at the beginning of the successor block, 4216 // available (not live) at the beginning of the successor block,
4212 // minus all registers used as Dest operands in the Assignments. To 4217 // minus all registers used as Dest operands in the Assignments. To
4213 // do this, we start off assuming all registers are available, then 4218 // do this, we start off assuming all registers are available, then
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
4505 assert(Type != IceType_i64); 4510 assert(Type != IceType_i64);
4506 Variable *Reg = Func->makeVariable(Type); 4511 Variable *Reg = Func->makeVariable(Type);
4507 if (RegNum == Variable::NoRegister) 4512 if (RegNum == Variable::NoRegister)
4508 Reg->setWeightInfinite(); 4513 Reg->setWeightInfinite();
4509 else 4514 else
4510 Reg->setRegNum(RegNum); 4515 Reg->setRegNum(RegNum);
4511 return Reg; 4516 return Reg;
4512 } 4517 }
4513 4518
4514 void TargetX8632::postLower() { 4519 void TargetX8632::postLower() {
4515 if (Ctx->getOptLevel() != Opt_m1) { 4520 if (Ctx->getOptLevel() == Opt_m1)
4516 // Find two-address non-SSA instructions where Dest==Src0, and set
4517 // the DestNonKillable flag to keep liveness analysis consistent.
4518 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
4519 if (Inst->isDeleted())
4520 continue;
4521 if (Variable *Dest = Inst->getDest()) {
4522 // TODO(stichnot): We may need to consider all source
4523 // operands, not just the first one, if using 3-address
4524 // instructions.
4525 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4526 Inst->setDestNonKillable();
4527 }
4528 }
4529 return; 4521 return;
4530 } 4522 // Find two-address non-SSA instructions where Dest==Src0, and set
4531 // TODO: Avoid recomputing WhiteList every instruction. 4523 // the DestNonKillable flag to keep liveness analysis consistent.
4532 RegSetMask RegInclude = RegSet_All;
4533 RegSetMask RegExclude = RegSet_StackPointer;
4534 if (hasFramePointer())
4535 RegExclude |= RegSet_FramePointer;
4536 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
4537 // Make one pass to black-list pre-colored registers. TODO: If
4538 // there was some prior register allocation pass that made register
4539 // assignments, those registers need to be black-listed here as
4540 // well.
4541 llvm::DenseMap<const Variable *, const Inst *> LastUses;
4542 // The first pass also keeps track of which instruction is the last
4543 // use for each infinite-weight variable. After the last use, the
4544 // variable is released to the free list.
4545 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { 4524 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
4546 if (Inst->isDeleted()) 4525 if (Inst->isDeleted())
4547 continue; 4526 continue;
4548 // Don't consider a FakeKill instruction, because (currently) it 4527 if (Variable *Dest = Inst->getDest()) {
4549 // is only used to kill all scratch registers at a call site, and 4528 // TODO(stichnot): We may need to consider all source
4550 // we don't want to black-list all scratch registers during the 4529 // operands, not just the first one, if using 3-address
4551 // call lowering. This could become a problem since it relies on 4530 // instructions.
4552 // the lowering sequence not keeping any infinite-weight variables 4531 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4553 // live across a call. TODO(stichnot): Consider replacing this 4532 Inst->setDestNonKillable();
4554 // whole postLower() implementation with a robust local register
4555 // allocator, for example compute live ranges only for pre-colored
4556 // and infinite-weight variables and run the existing linear-scan
4557 // allocator.
4558 assert(!llvm::isa<InstFakeKill>(Inst) || Inst->getSrcSize() == 0);
4559 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
4560 Operand *Src = Inst->getSrc(SrcNum);
4561 SizeT NumVars = Src->getNumVars();
4562 for (SizeT J = 0; J < NumVars; ++J) {
4563 const Variable *Var = Src->getVar(J);
4564 // Track last uses of all variables, regardless of whether
4565 // they are pre-colored or infinite-weight.
4566 LastUses[Var] = Inst;
4567 if (!Var->hasReg())
4568 continue;
4569 WhiteList[Var->getRegNum()] = false;
4570 }
4571 } 4533 }
4572 } 4534 }
4573 // The second pass colors infinite-weight variables.
4574 llvm::SmallBitVector AvailableRegisters = WhiteList;
4575 llvm::SmallBitVector FreedRegisters(WhiteList.size());
4576 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
4577 FreedRegisters.reset();
4578 if (Inst->isDeleted())
4579 continue;
4580 // Iterate over all variables referenced in the instruction,
4581 // including the Dest variable (if any). If the variable is
4582 // marked as infinite-weight, find it a register. If this
4583 // instruction is the last use of the variable in the lowered
4584 // sequence, release the register to the free list after this
4585 // instruction is completely processed. Note that the first pass
4586 // ignores the Dest operand, under the assumption that a
4587 // pre-colored Dest will appear as a source operand in some
4588 // subsequent instruction in the lowered sequence.
4589 Variable *Dest = Inst->getDest();
4590 SizeT NumSrcs = Inst->getSrcSize();
4591 if (Dest)
4592 ++NumSrcs;
4593 if (NumSrcs == 0)
4594 continue;
4595 OperandList Srcs(NumSrcs);
4596 for (SizeT i = 0; i < Inst->getSrcSize(); ++i)
4597 Srcs[i] = Inst->getSrc(i);
4598 if (Dest)
4599 Srcs[NumSrcs - 1] = Dest;
4600 for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) {
4601 Operand *Src = Srcs[SrcNum];
4602 SizeT NumVars = Src->getNumVars();
4603 for (SizeT J = 0; J < NumVars; ++J) {
4604 Variable *Var = Src->getVar(J);
4605 if (!Var->hasReg() && Var->getWeight().isInf()) {
4606 llvm::SmallBitVector AvailableTypedRegisters =
4607 AvailableRegisters & getRegisterSetForType(Var->getType());
4608 assert(AvailableTypedRegisters.any());
4609 int32_t RegNum = AvailableTypedRegisters.find_first();
4610 Var->setRegNum(RegNum);
4611 AvailableRegisters[RegNum] = false;
4612 }
4613 if (Var->hasReg()) {
4614 int32_t RegNum = Var->getRegNum();
4615 assert(!AvailableRegisters[RegNum]);
4616 if (LastUses[Var] == Inst) {
4617 if (WhiteList[RegNum])
4618 FreedRegisters[RegNum] = true;
4619 }
4620 }
4621 }
4622 }
4623 AvailableRegisters |= FreedRegisters;
4624 }
4625 } 4535 }
4626 4536
4627 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const { 4537 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
4628 Ostream &Str = Ctx->getStrEmit(); 4538 Ostream &Str = Ctx->getStrEmit();
4629 Str << "$" << (int32_t)getValue(); 4539 Str << "$" << (int32_t)getValue();
4630 } 4540 }
4631 4541
4632 template <> void ConstantInteger64::emit(GlobalContext *) const { 4542 template <> void ConstantInteger64::emit(GlobalContext *) const {
4633 llvm_unreachable("Not expecting to emit 64-bit integers"); 4543 llvm_unreachable("Not expecting to emit 64-bit integers");
4634 } 4544 }
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
4737 } else if (IsConstant || IsExternal) 4647 } else if (IsConstant || IsExternal)
4738 Str << "\t.zero\t" << Size << "\n"; 4648 Str << "\t.zero\t" << Size << "\n";
4739 // Size is part of .comm. 4649 // Size is part of .comm.
4740 4650
4741 if (IsConstant || HasNonzeroInitializer || IsExternal) 4651 if (IsConstant || HasNonzeroInitializer || IsExternal)
4742 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4652 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4743 // Size is part of .comm. 4653 // Size is part of .comm.
4744 } 4654 }
4745 4655
4746 } // end of namespace Ice 4656 } // end of namespace Ice
OLDNEW
« src/IceTargetLowering.h ('K') | « src/IceTargetLowering.cpp ('k') | src/IceTranslator.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698