Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1352)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1253833002: Subzero: Cleanly implement register allocation after phi lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Improve translation-time performance Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTimerTree.def » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 369 matching lines...) Expand 10 before | Expand all | Expand 10 after
380 // The post-codegen dump is done here, after liveness analysis and associated 380 // The post-codegen dump is done here, after liveness analysis and associated
381 // cleanup, to make the dump cleaner and more useful. 381 // cleanup, to make the dump cleaner and more useful.
382 Func->dump("After initial x8632 codegen"); 382 Func->dump("After initial x8632 codegen");
383 Func->getVMetadata()->init(VMK_All); 383 Func->getVMetadata()->init(VMK_All);
384 regAlloc(RAK_Global); 384 regAlloc(RAK_Global);
385 if (Func->hasError()) 385 if (Func->hasError())
386 return; 386 return;
387 Func->dump("After linear scan regalloc"); 387 Func->dump("After linear scan regalloc");
388 388
389 if (Ctx->getFlags().getPhiEdgeSplit()) { 389 if (Ctx->getFlags().getPhiEdgeSplit()) {
390 // We need to pause constant blinding or pooling during advanced phi 390 Func->advancedPhiLowering();
391 // lowering, unless the lowering assignment has a physical register for the
392 // dest Variable.
393 {
394 BoolFlagSaver B(RandomizationPoolingPaused, true);
395 Func->advancedPhiLowering();
396 }
397 Func->dump("After advanced Phi lowering"); 391 Func->dump("After advanced Phi lowering");
398 } 392 }
399 393
400 // Stack frame mapping. 394 // Stack frame mapping.
401 Func->genFrame(); 395 Func->genFrame();
402 if (Func->hasError()) 396 if (Func->hasError())
403 return; 397 return;
404 Func->dump("After stack frame mapping"); 398 Func->dump("After stack frame mapping");
405 399
406 Func->contractEmptyNodes(); 400 Func->contractEmptyNodes();
(...skipping 1650 matching lines...) Expand 10 before | Expand all | Expand 10 after
2057 Operand *Src0Lo = loOperand(Src0); 2051 Operand *Src0Lo = loOperand(Src0);
2058 Operand *Src0Hi = hiOperand(Src0); 2052 Operand *Src0Hi = hiOperand(Src0);
2059 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2053 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2060 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2054 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2061 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2055 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2062 _mov(T_Lo, Src0Lo); 2056 _mov(T_Lo, Src0Lo);
2063 _mov(DestLo, T_Lo); 2057 _mov(DestLo, T_Lo);
2064 _mov(T_Hi, Src0Hi); 2058 _mov(T_Hi, Src0Hi);
2065 _mov(DestHi, T_Hi); 2059 _mov(DestHi, T_Hi);
2066 } else { 2060 } else {
2067 Operand *RI; 2061 Operand *Src0Legal;
2068 if (Dest->hasReg()) { 2062 if (Dest->hasReg()) {
2069 // If Dest already has a physical register, then legalize the 2063 // If Dest already has a physical register, then only basic legalization
2070 // Src operand into a Variable with the same register 2064 // is needed, as the source operand can be a register, immediate, or
2071 // assignment. This is mostly a workaround for advanced phi 2065 // memory.
2072 // lowering's ad-hoc register allocation which assumes no 2066 Src0Legal = legalize(Src0);
2073 // register allocation is needed when at least one of the
2074 // operands is non-memory.
2075
2076 // If we have a physical register for the dest variable, we can
2077 // enable our constant blinding or pooling again. Note this is
2078 // only for advancedPhiLowering(), the flag flip should leave
2079 // no other side effect.
2080 {
2081 BoolFlagSaver B(RandomizationPoolingPaused, false);
2082 RI = legalize(Src0, Legal_Reg, Dest->getRegNum());
2083 }
2084 } else { 2067 } else {
2085 // If Dest could be a stack operand, then RI must be a physical 2068 // If Dest could be a stack operand, then RI must be a physical
2086 // register or a scalar integer immediate. 2069 // register or a scalar integer immediate.
2087 RI = legalize(Src0, Legal_Reg | Legal_Imm); 2070 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
2088 } 2071 }
2089 if (isVectorType(Dest->getType())) 2072 if (isVectorType(Dest->getType()))
2090 _movp(Dest, RI); 2073 _movp(Dest, Src0Legal);
2091 else 2074 else
2092 _mov(Dest, RI); 2075 _mov(Dest, Src0Legal);
2093 } 2076 }
2094 } 2077 }
2095 2078
2096 template <class Machine> 2079 template <class Machine>
2097 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { 2080 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
2098 if (Inst->isUnconditional()) { 2081 if (Inst->isUnconditional()) {
2099 _br(Inst->getTargetUnconditional()); 2082 _br(Inst->getTargetUnconditional());
2100 return; 2083 return;
2101 } 2084 }
2102 Operand *Cond = Inst->getCondition(); 2085 Operand *Cond = Inst->getCondition();
(...skipping 2828 matching lines...) Expand 10 before | Expand all | Expand 10 after
4931 /// turned into zeroes, since loOperand() and hiOperand() don't expect 4914 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4932 /// Undef input. 4915 /// Undef input.
4933 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4916 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4934 // Pause constant blinding or pooling, blinding or pooling will be done later 4917 // Pause constant blinding or pooling, blinding or pooling will be done later
4935 // during phi lowering assignments 4918 // during phi lowering assignments
4936 BoolFlagSaver B(RandomizationPoolingPaused, true); 4919 BoolFlagSaver B(RandomizationPoolingPaused, true);
4937 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 4920 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4938 this, Context.getNode(), Func); 4921 this, Context.getNode(), Func);
4939 } 4922 }
4940 4923
4941 inline bool isMemoryOperand(const Operand *Opnd) {
4942 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
4943 return !Var->hasReg();
4944 // We treat vector undef values the same as a memory operand,
4945 // because they do in fact need a register to materialize the vector
4946 // of zeroes into.
4947 if (llvm::isa<ConstantUndef>(Opnd))
4948 return isScalarFloatingType(Opnd->getType()) ||
4949 isVectorType(Opnd->getType());
4950 if (llvm::isa<Constant>(Opnd))
4951 return isScalarFloatingType(Opnd->getType());
4952 return true;
4953 }
4954
4955 /// Lower the pre-ordered list of assignments into mov instructions.
4956 /// Also has to do some ad-hoc register allocation as necessary.
4957 template <class Machine>
4958 void TargetX86Base<Machine>::lowerPhiAssignments(
4959 CfgNode *Node, const AssignList &Assignments) {
4960 // Check that this is a properly initialized shell of a node.
4961 assert(Node->getOutEdges().size() == 1);
4962 assert(Node->getInsts().empty());
4963 assert(Node->getPhis().empty());
4964 CfgNode *Succ = Node->getOutEdges().front();
4965 getContext().init(Node);
4966 // Register set setup similar to regAlloc().
4967 RegSetMask RegInclude = RegSet_All;
4968 RegSetMask RegExclude = RegSet_StackPointer;
4969 if (hasFramePointer())
4970 RegExclude |= RegSet_FramePointer;
4971 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
4972 bool NeedsRegs = false;
4973 // Initialize the set of available registers to the set of what is
4974 // available (not live) at the beginning of the successor block,
4975 // minus all registers used as Dest operands in the Assignments. To
4976 // do this, we start off assuming all registers are available, then
4977 // iterate through the Assignments and remove Dest registers.
4978 // During this iteration, we also determine whether we will actually
4979 // need any extra registers for memory-to-memory copies. If so, we
4980 // do the actual work of removing the live-in registers from the
4981 // set. TODO(stichnot): This work is being repeated for every split
4982 // edge to the successor, so consider updating LiveIn just once
4983 // after all the edges are split.
4984 for (const Inst &I : Assignments) {
4985 Variable *Dest = I.getDest();
4986 if (Dest->hasReg()) {
4987 Available[Dest->getRegNum()] = false;
4988 } else if (isMemoryOperand(I.getSrc(0))) {
4989 NeedsRegs = true; // Src and Dest are both in memory
4990 }
4991 }
4992 if (NeedsRegs) {
4993 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
4994 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
4995 Variable *Var = Func->getLiveness()->getVariable(i, Succ);
4996 if (Var->hasReg())
4997 Available[Var->getRegNum()] = false;
4998 }
4999 }
5000 // Iterate backwards through the Assignments. After lowering each
5001 // assignment, add Dest to the set of available registers, and
5002 // remove Src from the set of available registers. Iteration is
5003 // done backwards to enable incremental updates of the available
5004 // register set, and the lowered instruction numbers may be out of
5005 // order, but that can be worked around by renumbering the block
5006 // afterwards if necessary.
5007 for (const Inst &I : reverse_range(Assignments)) {
5008 Context.rewind();
5009 auto Assign = llvm::dyn_cast<InstAssign>(&I);
5010 Variable *Dest = Assign->getDest();
5011
5012 // If the source operand is ConstantUndef, do not legalize it. In function
5013 // test_split_undef_int_vec, the advanced phi lowering process will find an
5014 // assignment of undefined vector. This vector, as the Src here, will crash
5015 // if it go through legalize(). legalize() will create a new variable with
5016 // makeVectorOfZeros(), but this new variable will be assigned a stack
5017 // slot. This will fail with pxor(Var, Var) because it is an illegal
5018 // instruction form. Note this failure is irrelevant to randomization or
5019 // pooling of constants. So, we do not call legalize() to add pool label
5020 // for the src operands of phi assignment instructions. Instead, we
5021 // manually add pool label for constant float and constant double values
5022 // here. Note going through legalize() does not affect the testing results
5023 // of SPEC2K and xtests.
5024 Operand *Src = Assign->getSrc(0);
5025 if (!llvm::isa<ConstantUndef>(Assign->getSrc(0))) {
5026 Src = legalize(Src);
5027 }
5028
5029 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
5030 // Use normal assignment lowering, except lower mem=mem specially
5031 // so we can register-allocate at the same time.
5032 if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
5033 lowerAssign(Assign);
5034 } else {
5035 assert(Dest->getType() == Src->getType());
5036 const llvm::SmallBitVector &RegsForType =
5037 getRegisterSetForType(Dest->getType());
5038 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
5039 Variable *SpillLoc = nullptr;
5040 Variable *Preg = nullptr;
5041 // TODO(stichnot): Opportunity for register randomization.
5042 int32_t RegNum = AvailRegsForType.find_first();
5043 bool IsVector = isVectorType(Dest->getType());
5044 bool NeedSpill = (RegNum == -1);
5045 if (NeedSpill) {
5046 // Pick some register to spill and update RegNum.
5047 // TODO(stichnot): Opportunity for register randomization.
5048 RegNum = RegsForType.find_first();
5049 Preg = getPhysicalRegister(RegNum, Dest->getType());
5050 SpillLoc = Func->makeVariable(Dest->getType());
5051 // Create a fake def of the physical register to avoid
5052 // liveness inconsistency problems during late-stage liveness
5053 // analysis (e.g. asm-verbose mode).
5054 Context.insert(InstFakeDef::create(Func, Preg));
5055 if (IsVector)
5056 _movp(SpillLoc, Preg);
5057 else
5058 _mov(SpillLoc, Preg);
5059 }
5060 assert(RegNum >= 0);
5061 if (llvm::isa<ConstantUndef>(Src))
5062 // Materialize an actual constant instead of undef. RegNum is
5063 // passed in for vector types because undef vectors are
5064 // lowered to vector register of zeroes.
5065 Src =
5066 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
5067 Variable *Tmp = makeReg(Dest->getType(), RegNum);
5068 if (IsVector) {
5069 _movp(Tmp, Src);
5070 _movp(Dest, Tmp);
5071 } else {
5072 _mov(Tmp, Src);
5073 _mov(Dest, Tmp);
5074 }
5075 if (NeedSpill) {
5076 // Restore the spilled register.
5077 if (IsVector)
5078 _movp(Preg, SpillLoc);
5079 else
5080 _mov(Preg, SpillLoc);
5081 // Create a fake use of the physical register to keep it live
5082 // for late-stage liveness analysis (e.g. asm-verbose mode).
5083 Context.insert(InstFakeUse::create(Func, Preg));
5084 }
5085 }
5086 // Update register availability before moving to the previous
5087 // instruction on the Assignments list.
5088 if (Dest->hasReg())
5089 Available[Dest->getRegNum()] = true;
5090 if (SrcVar && SrcVar->hasReg())
5091 Available[SrcVar->getRegNum()] = false;
5092 }
5093
5094 // Add the terminator branch instruction to the end.
5095 Context.setInsertPoint(Context.getEnd());
5096 _br(Succ);
5097 }
5098
5099 // There is no support for loading or emitting vector constants, so the 4924 // There is no support for loading or emitting vector constants, so the
5100 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4925 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,
5101 // etc. are initialized with register operations. 4926 // etc. are initialized with register operations.
5102 // 4927 //
5103 // TODO(wala): Add limited support for vector constants so that 4928 // TODO(wala): Add limited support for vector constants so that
5104 // complex initialization in registers is unnecessary. 4929 // complex initialization in registers is unnecessary.
5105 4930
5106 template <class Machine> 4931 template <class Machine>
5107 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { 4932 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
5108 Variable *Reg = makeReg(Ty, RegNum); 4933 Variable *Reg = makeReg(Ty, RegNum);
(...skipping 534 matching lines...) Expand 10 before | Expand all | Expand 10 after
5643 } 5468 }
5644 // the offset is not eligible for blinding or pooling, return the original 5469 // the offset is not eligible for blinding or pooling, return the original
5645 // mem operand 5470 // mem operand
5646 return MemOperand; 5471 return MemOperand;
5647 } 5472 }
5648 5473
5649 } // end of namespace X86Internal 5474 } // end of namespace X86Internal
5650 } // end of namespace Ice 5475 } // end of namespace Ice
5651 5476
5652 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5477 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceTimerTree.def » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698