Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(66)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 733643005: Subzero: Use the linear-scan register allocator for Om1 as well. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Update/fix some comments Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLowering.cpp ('k') | src/IceTranslator.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction.
13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target.
15 // 13 //
16 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
17 15
18 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/MathExtras.h" 18 #include "llvm/Support/MathExtras.h"
21 19
22 #include "IceCfg.h" 20 #include "IceCfg.h"
23 #include "IceCfgNode.h" 21 #include "IceCfgNode.h"
24 #include "IceClFlags.h" 22 #include "IceClFlags.h"
(...skipping 343 matching lines...) Expand 10 before | Expand all | Expand 10 after
368 Func->liveness(Liveness_Intervals); 366 Func->liveness(Liveness_Intervals);
369 if (Func->hasError()) 367 if (Func->hasError())
370 return; 368 return;
371 // Validate the live range computations. The expensive validation 369 // Validate the live range computations. The expensive validation
372 // call is deliberately only made when assertions are enabled. 370 // call is deliberately only made when assertions are enabled.
373 assert(Func->validateLiveness()); 371 assert(Func->validateLiveness());
374 // The post-codegen dump is done here, after liveness analysis and 372 // The post-codegen dump is done here, after liveness analysis and
375 // associated cleanup, to make the dump cleaner and more useful. 373 // associated cleanup, to make the dump cleaner and more useful.
376 Func->dump("After initial x8632 codegen"); 374 Func->dump("After initial x8632 codegen");
377 Func->getVMetadata()->init(VMK_All); 375 Func->getVMetadata()->init(VMK_All);
378 regAlloc(); 376 regAlloc(RAK_Global);
379 if (Func->hasError()) 377 if (Func->hasError())
380 return; 378 return;
381 Func->dump("After linear scan regalloc"); 379 Func->dump("After linear scan regalloc");
382 380
383 if (Ctx->getFlags().PhiEdgeSplit) { 381 if (Ctx->getFlags().PhiEdgeSplit) {
384 Func->advancedPhiLowering(); 382 Func->advancedPhiLowering();
385 Func->dump("After advanced Phi lowering"); 383 Func->dump("After advanced Phi lowering");
386 } 384 }
387 385
388 // Stack frame mapping. 386 // Stack frame mapping.
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
422 return; 420 return;
423 Func->dump("After Phi lowering"); 421 Func->dump("After Phi lowering");
424 422
425 Func->doArgLowering(); 423 Func->doArgLowering();
426 424
427 Func->genCode(); 425 Func->genCode();
428 if (Func->hasError()) 426 if (Func->hasError())
429 return; 427 return;
430 Func->dump("After initial x8632 codegen"); 428 Func->dump("After initial x8632 codegen");
431 429
430 regAlloc(RAK_InfOnly);
431 if (Func->hasError())
432 return;
433 Func->dump("After regalloc of infinite-weight variables");
434
432 Func->genFrame(); 435 Func->genFrame();
433 if (Func->hasError()) 436 if (Func->hasError())
434 return; 437 return;
435 Func->dump("After stack frame mapping"); 438 Func->dump("After stack frame mapping");
436 439
437 // Nop insertion 440 // Nop insertion
438 if (shouldDoNopInsertion()) { 441 if (shouldDoNopInsertion()) {
439 Func->doNopInsertion(); 442 Func->doNopInsertion();
440 } 443 }
441 } 444 }
(...skipping 1367 matching lines...) Expand 10 before | Expand all | Expand 10 after
1809 // eliminated after lowering, we would need to ensure that the 1812 // eliminated after lowering, we would need to ensure that the
1810 // pre-call and the post-call esp adjustment get eliminated as well. 1813 // pre-call and the post-call esp adjustment get eliminated as well.
1811 if (ParameterAreaSizeBytes) { 1814 if (ParameterAreaSizeBytes) {
1812 _adjust_stack(ParameterAreaSizeBytes); 1815 _adjust_stack(ParameterAreaSizeBytes);
1813 } 1816 }
1814 1817
1815 // Copy arguments that are passed on the stack to the appropriate 1818 // Copy arguments that are passed on the stack to the appropriate
1816 // stack locations. 1819 // stack locations.
1817 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { 1820 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1818 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); 1821 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
1819 // TODO: Consider calling postLower() here to reduce the register
1820 // pressure associated with using too many infinite weight
1821 // temporaries when lowering the call sequence in -Om1 mode.
1822 } 1822 }
1823 1823
1824 // Copy arguments to be passed in registers to the appropriate 1824 // Copy arguments to be passed in registers to the appropriate
1825 // registers. 1825 // registers.
1826 // TODO: Investigate the impact of lowering arguments passed in 1826 // TODO: Investigate the impact of lowering arguments passed in
1827 // registers after lowering stack arguments as opposed to the other 1827 // registers after lowering stack arguments as opposed to the other
1828 // way around. Lowering register arguments after stack arguments may 1828 // way around. Lowering register arguments after stack arguments may
1829 // reduce register pressure. On the other hand, lowering register 1829 // reduce register pressure. On the other hand, lowering register
1830 // arguments first (before stack arguments) may result in more compact 1830 // arguments first (before stack arguments) may result in more compact
1831 // code, as the memory operand displacements may end up being smaller 1831 // code, as the memory operand displacements may end up being smaller
(...skipping 2273 matching lines...) Expand 10 before | Expand all | Expand 10 after
4105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index)); 4105 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
4106 4106
4107 // Perform the arithmetic as a scalar operation. 4107 // Perform the arithmetic as a scalar operation.
4108 Variable *Res = Func->makeVariable(ElementTy); 4108 Variable *Res = Func->makeVariable(ElementTy);
4109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1)); 4109 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
4110 4110
4111 // Insert the result into position. 4111 // Insert the result into position.
4112 Variable *DestT = Func->makeVariable(Ty); 4112 Variable *DestT = Func->makeVariable(Ty);
4113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); 4113 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4114 T = DestT; 4114 T = DestT;
4115 // TODO(stichnot): Use postLower() in -Om1 mode to avoid buildup of
4116 // infinite weight temporaries.
4117 } 4115 }
4118 4116
4119 lowerAssign(InstAssign::create(Func, Dest, T)); 4117 lowerAssign(InstAssign::create(Func, Dest, T));
4120 } 4118 }
4121 4119
4122 // The following pattern occurs often in lowered C and C++ code: 4120 // The following pattern occurs often in lowered C and C++ code:
4123 // 4121 //
4124 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4122 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4125 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 4123 // %cmp.ext = sext <n x i1> %cmp to <n x ty>
4126 // 4124 //
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
4193 // Lower the pre-ordered list of assignments into mov instructions. 4191 // Lower the pre-ordered list of assignments into mov instructions.
4194 // Also has to do some ad-hoc register allocation as necessary. 4192 // Also has to do some ad-hoc register allocation as necessary.
4195 void TargetX8632::lowerPhiAssignments(CfgNode *Node, 4193 void TargetX8632::lowerPhiAssignments(CfgNode *Node,
4196 const AssignList &Assignments) { 4194 const AssignList &Assignments) {
4197 // Check that this is a properly initialized shell of a node. 4195 // Check that this is a properly initialized shell of a node.
4198 assert(Node->getOutEdges().size() == 1); 4196 assert(Node->getOutEdges().size() == 1);
4199 assert(Node->getInsts().empty()); 4197 assert(Node->getInsts().empty());
4200 assert(Node->getPhis().empty()); 4198 assert(Node->getPhis().empty());
4201 CfgNode *Succ = Node->getOutEdges().front(); 4199 CfgNode *Succ = Node->getOutEdges().front();
4202 getContext().init(Node); 4200 getContext().init(Node);
4203 // Register set setup similar to regAlloc() and postLower(). 4201 // Register set setup similar to regAlloc().
4204 RegSetMask RegInclude = RegSet_All; 4202 RegSetMask RegInclude = RegSet_All;
4205 RegSetMask RegExclude = RegSet_StackPointer; 4203 RegSetMask RegExclude = RegSet_StackPointer;
4206 if (hasFramePointer()) 4204 if (hasFramePointer())
4207 RegExclude |= RegSet_FramePointer; 4205 RegExclude |= RegSet_FramePointer;
4208 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude); 4206 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4209 bool NeedsRegs = false; 4207 bool NeedsRegs = false;
4210 // Initialize the set of available registers to the set of what is 4208 // Initialize the set of available registers to the set of what is
4211 // available (not live) at the beginning of the successor block, 4209 // available (not live) at the beginning of the successor block,
4212 // minus all registers used as Dest operands in the Assignments. To 4210 // minus all registers used as Dest operands in the Assignments. To
4213 // do this, we start off assuming all registers are available, then 4211 // do this, we start off assuming all registers are available, then
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
4505 assert(Type != IceType_i64); 4503 assert(Type != IceType_i64);
4506 Variable *Reg = Func->makeVariable(Type); 4504 Variable *Reg = Func->makeVariable(Type);
4507 if (RegNum == Variable::NoRegister) 4505 if (RegNum == Variable::NoRegister)
4508 Reg->setWeightInfinite(); 4506 Reg->setWeightInfinite();
4509 else 4507 else
4510 Reg->setRegNum(RegNum); 4508 Reg->setRegNum(RegNum);
4511 return Reg; 4509 return Reg;
4512 } 4510 }
4513 4511
4514 void TargetX8632::postLower() { 4512 void TargetX8632::postLower() {
4515 if (Ctx->getOptLevel() != Opt_m1) { 4513 if (Ctx->getOptLevel() == Opt_m1)
4516 // Find two-address non-SSA instructions where Dest==Src0, and set
4517 // the DestNonKillable flag to keep liveness analysis consistent.
4518 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
4519 if (Inst->isDeleted())
4520 continue;
4521 if (Variable *Dest = Inst->getDest()) {
4522 // TODO(stichnot): We may need to consider all source
4523 // operands, not just the first one, if using 3-address
4524 // instructions.
4525 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4526 Inst->setDestNonKillable();
4527 }
4528 }
4529 return; 4514 return;
4530 } 4515 // Find two-address non-SSA instructions where Dest==Src0, and set
4531 // TODO: Avoid recomputing WhiteList every instruction. 4516 // the DestNonKillable flag to keep liveness analysis consistent.
4532 RegSetMask RegInclude = RegSet_All;
4533 RegSetMask RegExclude = RegSet_StackPointer;
4534 if (hasFramePointer())
4535 RegExclude |= RegSet_FramePointer;
4536 llvm::SmallBitVector WhiteList = getRegisterSet(RegInclude, RegExclude);
4537 // Make one pass to black-list pre-colored registers. TODO: If
4538 // there was some prior register allocation pass that made register
4539 // assignments, those registers need to be black-listed here as
4540 // well.
4541 llvm::DenseMap<const Variable *, const Inst *> LastUses;
4542 // The first pass also keeps track of which instruction is the last
4543 // use for each infinite-weight variable. After the last use, the
4544 // variable is released to the free list.
4545 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) { 4517 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
4546 if (Inst->isDeleted()) 4518 if (Inst->isDeleted())
4547 continue; 4519 continue;
4548 // Don't consider a FakeKill instruction, because (currently) it 4520 if (Variable *Dest = Inst->getDest()) {
4549 // is only used to kill all scratch registers at a call site, and 4521 // TODO(stichnot): We may need to consider all source
4550 // we don't want to black-list all scratch registers during the 4522 // operands, not just the first one, if using 3-address
4551 // call lowering. This could become a problem since it relies on 4523 // instructions.
4552 // the lowering sequence not keeping any infinite-weight variables 4524 if (Inst->getSrcSize() > 0 && Inst->getSrc(0) == Dest)
4553 // live across a call. TODO(stichnot): Consider replacing this 4525 Inst->setDestNonKillable();
4554 // whole postLower() implementation with a robust local register
4555 // allocator, for example compute live ranges only for pre-colored
4556 // and infinite-weight variables and run the existing linear-scan
4557 // allocator.
4558 assert(!llvm::isa<InstFakeKill>(Inst) || Inst->getSrcSize() == 0);
4559 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
4560 Operand *Src = Inst->getSrc(SrcNum);
4561 SizeT NumVars = Src->getNumVars();
4562 for (SizeT J = 0; J < NumVars; ++J) {
4563 const Variable *Var = Src->getVar(J);
4564 // Track last uses of all variables, regardless of whether
4565 // they are pre-colored or infinite-weight.
4566 LastUses[Var] = Inst;
4567 if (!Var->hasReg())
4568 continue;
4569 WhiteList[Var->getRegNum()] = false;
4570 }
4571 } 4526 }
4572 } 4527 }
4573 // The second pass colors infinite-weight variables.
4574 llvm::SmallBitVector AvailableRegisters = WhiteList;
4575 llvm::SmallBitVector FreedRegisters(WhiteList.size());
4576 for (auto Inst = Context.begin(), E = Context.end(); Inst != E; ++Inst) {
4577 FreedRegisters.reset();
4578 if (Inst->isDeleted())
4579 continue;
4580 // Iterate over all variables referenced in the instruction,
4581 // including the Dest variable (if any). If the variable is
4582 // marked as infinite-weight, find it a register. If this
4583 // instruction is the last use of the variable in the lowered
4584 // sequence, release the register to the free list after this
4585 // instruction is completely processed. Note that the first pass
4586 // ignores the Dest operand, under the assumption that a
4587 // pre-colored Dest will appear as a source operand in some
4588 // subsequent instruction in the lowered sequence.
4589 Variable *Dest = Inst->getDest();
4590 SizeT NumSrcs = Inst->getSrcSize();
4591 if (Dest)
4592 ++NumSrcs;
4593 if (NumSrcs == 0)
4594 continue;
4595 OperandList Srcs(NumSrcs);
4596 for (SizeT i = 0; i < Inst->getSrcSize(); ++i)
4597 Srcs[i] = Inst->getSrc(i);
4598 if (Dest)
4599 Srcs[NumSrcs - 1] = Dest;
4600 for (SizeT SrcNum = 0; SrcNum < NumSrcs; ++SrcNum) {
4601 Operand *Src = Srcs[SrcNum];
4602 SizeT NumVars = Src->getNumVars();
4603 for (SizeT J = 0; J < NumVars; ++J) {
4604 Variable *Var = Src->getVar(J);
4605 if (!Var->hasReg() && Var->getWeight().isInf()) {
4606 llvm::SmallBitVector AvailableTypedRegisters =
4607 AvailableRegisters & getRegisterSetForType(Var->getType());
4608 assert(AvailableTypedRegisters.any());
4609 int32_t RegNum = AvailableTypedRegisters.find_first();
4610 Var->setRegNum(RegNum);
4611 AvailableRegisters[RegNum] = false;
4612 }
4613 if (Var->hasReg()) {
4614 int32_t RegNum = Var->getRegNum();
4615 assert(!AvailableRegisters[RegNum]);
4616 if (LastUses[Var] == Inst) {
4617 if (WhiteList[RegNum])
4618 FreedRegisters[RegNum] = true;
4619 }
4620 }
4621 }
4622 }
4623 AvailableRegisters |= FreedRegisters;
4624 }
4625 } 4528 }
4626 4529
4627 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const { 4530 template <> void ConstantInteger32::emit(GlobalContext *Ctx) const {
4628 Ostream &Str = Ctx->getStrEmit(); 4531 Ostream &Str = Ctx->getStrEmit();
4629 Str << "$" << (int32_t)getValue(); 4532 Str << "$" << (int32_t)getValue();
4630 } 4533 }
4631 4534
4632 template <> void ConstantInteger64::emit(GlobalContext *) const { 4535 template <> void ConstantInteger64::emit(GlobalContext *) const {
4633 llvm_unreachable("Not expecting to emit 64-bit integers"); 4536 llvm_unreachable("Not expecting to emit 64-bit integers");
4634 } 4537 }
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
4737 } else if (IsConstant || IsExternal) 4640 } else if (IsConstant || IsExternal)
4738 Str << "\t.zero\t" << Size << "\n"; 4641 Str << "\t.zero\t" << Size << "\n";
4739 // Size is part of .comm. 4642 // Size is part of .comm.
4740 4643
4741 if (IsConstant || HasNonzeroInitializer || IsExternal) 4644 if (IsConstant || HasNonzeroInitializer || IsExternal)
4742 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4645 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4743 // Size is part of .comm. 4646 // Size is part of .comm.
4744 } 4647 }
4745 4648
4746 } // end of namespace Ice 4649 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLowering.cpp ('k') | src/IceTranslator.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698