Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index 4b8bf275e72f5bab532d187b0a3c6bff1f640606..dad6497bce118140071868d80d3d6892232279ff 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -210,6 +210,93 @@ TargetX8632::TargetX8632(Cfg *Func) |
| TypeToRegisterSet[IceType_f64] = FloatRegisters; |
| } |
| +void TargetX8632::translateO2() { |
| + GlobalContext *Context = Func->getContext(); |
| + Ostream &Str = Context->getStrDump(); |
| + Timer T_placePhiLoads; |
| + Func->placePhiLoads(); |
| + if (Func->hasError()) |
| + return; |
| + T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()"); |
| + Timer T_placePhiStores; |
| + Func->placePhiStores(); |
| + if (Func->hasError()) |
| + return; |
| + T_placePhiStores.printElapsedUs(Context, "placePhiStores()"); |
| + Timer T_deletePhis; |
| + Func->deletePhis(); |
| + if (Func->hasError()) |
| + return; |
| + T_deletePhis.printElapsedUs(Context, "deletePhis()"); |
| + Timer T_renumber1; |
| + Func->renumberInstructions(); |
| + if (Func->hasError()) |
| + return; |
| + T_renumber1.printElapsedUs(Context, "renumberInstructions()"); |
| + if (Context->isVerbose()) |
| + Str << "================ After Phi lowering ================\n"; |
| + Func->dump(); |
|
jvoung (off chromium)
2014/05/28 17:48:15
dump() vs isVerbose() consistency?
Jim Stichnoth
2014/05/29 01:39:46
The Cfg-level dumping should be cleaned up now.
|
| + |
| + Timer T_doAddressOpt; |
| + Func->doAddressOpt(); |
| + T_doAddressOpt.printElapsedUs(Context, "doAddressOpt()"); |
|
jvoung (off chromium)
2014/05/28 17:48:15
Does doAddressOpt() depend on the instruction numb
Jim Stichnoth
2014/05/29 01:39:46
You're right. Hopefully this is cleaned up now an
|
| + // Liveness may be incorrect after address mode optimization. |
| + Timer T_renumber2; |
| + Func->renumberInstructions(); |
| + if (Func->hasError()) |
| + return; |
| + T_renumber2.printElapsedUs(Context, "renumberInstructions()"); |
| + // TODO: It should be sufficient to use the fastest liveness |
| + // calculation, i.e. Liveness_LREndLightweight. However, for |
| + // some reason that slows down the rest of the translation. |
| + // Investigate. |
| + Timer T_liveness1; |
| + Func->liveness(Liveness_LREndFull); |
| + if (Func->hasError()) |
| + return; |
| + T_liveness1.printElapsedUs(Context, "liveness()"); |
| + if (Context->isVerbose()) |
| + Str << "================ After x86 address mode opt ================\n"; |
| + Func->dump(); |
| + Timer T_genCode; |
| + Func->genCode(); |
| + if (Func->hasError()) |
| + return; |
| + T_genCode.printElapsedUs(Context, "genCode()"); |
| + Timer T_renumber3; |
| + Func->renumberInstructions(); |
| + if (Func->hasError()) |
| + return; |
| + T_renumber3.printElapsedUs(Context, "renumberInstructions()"); |
| + Timer T_liveness2; |
| + Func->liveness(Liveness_RangesFull); |
| + if (Func->hasError()) |
| + return; |
| + T_liveness2.printElapsedUs(Context, "liveness()"); |
| + ComputedLiveRanges = true; |
| + if (Context->isVerbose()) |
| + Str << "================ After initial x8632 codegen ================\n"; |
| + Func->dump(); |
| + |
| + Timer T_regAlloc; |
| + regAlloc(); |
| + if (Func->hasError()) |
| + return; |
| + T_regAlloc.printElapsedUs(Context, "regAlloc()"); |
| + if (Context->isVerbose()) |
| + Str << "================ After linear scan regalloc ================\n"; |
| + Func->dump(); |
| + |
| + Timer T_genFrame; |
| + Func->genFrame(); |
| + if (Func->hasError()) |
| + return; |
| + T_genFrame.printElapsedUs(Context, "genFrame()"); |
| + if (Context->isVerbose()) |
| + Str << "================ After stack frame mapping ================\n"; |
| + Func->dump(); |
| +} |
| + |
| void TargetX8632::translateOm1() { |
| GlobalContext *Context = Func->getContext(); |
| Ostream &Str = Context->getStrDump(); |
| @@ -398,6 +485,9 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| // An argument passed on the stack already has a stack slot. |
| if (Var->getIsArg()) |
| continue; |
| + // An unreferenced variable doesn't need a stack slot. |
| + if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
| + continue; |
| // A spill slot linked to a variable with a stack slot should reuse |
| // that stack slot. |
| if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
| @@ -483,6 +573,8 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| } |
| if (Var->getIsArg()) |
| continue; |
| + if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
| + continue; |
| if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
| if (Variable *Linked = Var->getPreferredRegister()) { |
| if (!Linked->hasReg()) { |
| @@ -1601,6 +1693,37 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| Operand *Src1 = legalize(Inst->getSrc(1)); |
| Variable *Dest = Inst->getDest(); |
| + // If Src1 is an immediate, or known to be a physical register, we can |
| + // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| + // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| + // the physical register, but unfortunately we have to commit to one or |
| + // the other before register allocation.) |
| + bool IsSrc1ImmOrReg = false; |
| + if (llvm::isa<Constant>(Src1)) { |
| + IsSrc1ImmOrReg = true; |
| + } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| + if (Var->hasReg()) |
| + IsSrc1ImmOrReg = true; |
| + } |
| + |
| + // Try to fuse a compare immediately followed by a conditional branch. This |
| + // is possible when the compare dest and the branch source operands are the |
| + // same, and are their only uses. TODO: implement this optimization for i64. |
| + if (InstBr *NextBr = llvm::dyn_cast_or_null<InstBr>(Context.getNextInst())) { |
| + if (Src0->getType() != IceType_i64 && !NextBr->isUnconditional() && |
| + Dest == NextBr->getSrc(0) && NextBr->isLastUse(Dest)) { |
| + Operand *Src0New = |
| + legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| + _cmp(Src0New, Src1); |
| + _br(getIcmp32Mapping(Inst->getCondition()), NextBr->getTargetTrue(), |
| + NextBr->getTargetFalse()); |
| + // Skip over the following branch instruction. |
| + NextBr->setDeleted(); |
| + Context.advanceNext(); |
| + return; |
| + } |
| + } |
| + |
| // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: |
| Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); |
| Constant *One = Ctx->getConstantInt(IceType_i32, 1); |
| @@ -1637,19 +1760,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| return; |
| } |
| - // If Src1 is an immediate, or known to be a physical register, we can |
| - // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into |
| - // a physical register. (Actually, either Src0 or Src1 can be chosen for |
| - // the physical register, but unfortunately we have to commit to one or |
| - // the other before register allocation.) |
| - bool IsSrc1ImmOrReg = false; |
| - if (llvm::isa<Constant>(Src1)) { |
| - IsSrc1ImmOrReg = true; |
| - } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { |
| - if (Var->hasReg()) |
| - IsSrc1ImmOrReg = true; |
| - } |
| - |
| // cmp b, c |
| Operand *Src0New = |
| legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); |
| @@ -1662,6 +1772,134 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| Context.insert(Label); |
| } |
| +namespace { |
| + |
| +bool isAdd(const Inst *Inst) { |
| + if (const InstArithmetic *Arith = |
| + llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
| + return (Arith->getOp() == InstArithmetic::Add); |
| + } |
| + return false; |
| +} |
| + |
| +void computeAddressOpt(Cfg * /*Func*/, Variable *&Base, Variable *&Index, |
|
jvoung (off chromium)
2014/05/28 17:48:15
Is this expected to change to use the Func paramet
Jim Stichnoth
2014/05/29 01:39:46
Removed. I think this was originally meant for th
|
| + int32_t &Shift, int32_t &Offset) { |
| + (void)Offset; // TODO: pattern-match for non-zero offsets. |
| + if (Base == NULL) |
| + return; |
| + // If the Base has more than one use or is live across multiple |
| + // blocks, then don't go further. Alternatively (?), never consider |
| + // a transformation that would change a variable that is currently |
| + // *not* live across basic block boundaries into one that *is*. |
| + if (Base->isMultiblockLife() /* || Base->getUseCount() > 1*/) |
| + return; |
| + |
| + while (true) { |
| + // Base is Base=Var ==> |
| + // set Base=Var |
| + const Inst *BaseInst = Base->getDefinition(); |
| + Operand *BaseOperand0 = BaseInst ? BaseInst->getSrc(0) : NULL; |
| + Variable *BaseVariable0 = llvm::dyn_cast_or_null<Variable>(BaseOperand0); |
| + if (BaseInst && llvm::isa<InstAssign>(BaseInst) && BaseVariable0 && |
| + // TODO: ensure BaseVariable0 stays single-BB |
| + true) { |
| + Base = BaseVariable0; |
| + |
| + continue; |
| + } |
| + |
| + // Index is Index=Var ==> |
| + // set Index=Var |
|
jvoung (off chromium)
2014/05/28 17:48:15
Maybe the assignment transitivity checks could be
Jim Stichnoth
2014/05/29 01:39:46
Added a TODO (since it's actually only implemented
|
| + |
| + // Index==NULL && Base is Base=Var1+Var2 ==> |
| + // set Base=Var1, Index=Var2, Shift=0 |
| + Operand *BaseOperand1 = |
| + BaseInst && BaseInst->getSrcSize() >= 2 ? BaseInst->getSrc(1) : NULL; |
| + Variable *BaseVariable1 = llvm::dyn_cast_or_null<Variable>(BaseOperand1); |
| + if (Index == NULL && isAdd(BaseInst) && BaseVariable0 && BaseVariable1 && |
| + // TODO: ensure BaseVariable0 and BaseVariable1 stay single-BB |
| + true) { |
| + Base = BaseVariable0; |
| + Index = BaseVariable1; |
| + Shift = 0; // should already have been 0 |
| + continue; |
| + } |
| + |
| + // Index is Index=Var*Const && log2(Const)+Shift<=3 ==> |
| + // Index=Var, Shift+=log2(Const) |
| + const Inst *IndexInst = Index ? Index->getDefinition() : NULL; |
| + if (const InstArithmetic *ArithInst = |
| + llvm::dyn_cast_or_null<InstArithmetic>(IndexInst)) { |
| + Operand *IndexOperand0 = ArithInst->getSrc(0); |
| + Variable *IndexVariable0 = llvm::dyn_cast<Variable>(IndexOperand0); |
| + Operand *IndexOperand1 = ArithInst->getSrc(1); |
| + ConstantInteger *IndexConstant1 = |
| + llvm::dyn_cast<ConstantInteger>(IndexOperand1); |
| + if (ArithInst->getOp() == InstArithmetic::Mul && IndexVariable0 && |
| + IndexOperand1->getType() == IceType_i32 && IndexConstant1) { |
| + uint64_t Mult = IndexConstant1->getValue(); |
| + uint32_t LogMult; |
| + switch (Mult) { |
| + case 1: |
| + LogMult = 0; |
| + break; |
| + case 2: |
| + LogMult = 1; |
| + break; |
| + case 4: |
| + LogMult = 2; |
| + break; |
| + case 8: |
| + LogMult = 3; |
| + break; |
| + default: |
| + LogMult = 4; |
| + break; |
| + } |
| + if (Shift + LogMult <= 3) { |
| + Index = IndexVariable0; |
| + Shift += LogMult; |
| + continue; |
| + } |
| + } |
| + } |
| + |
| + // Index is Index=Var<<Const && Const+Shift<=3 ==> |
| + // Index=Var, Shift+=Const |
| + |
| + // Index is Index=Const*Var && log2(Const)+Shift<=3 ==> |
| + // Index=Var, Shift+=log2(Const) |
| + |
| + // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==> |
| + // swap(Index,Base) |
| + // Similar for Base=Const*Var and Base=Var<<Const |
| + |
| + // Base is Base=Var+Const ==> |
| + // set Base=Var, Offset+=Const |
| + |
| + // Base is Base=Const+Var ==> |
| + // set Base=Var, Offset+=Const |
| + |
| + // Base is Base=Var-Const ==> |
| + // set Base=Var, Offset-=Const |
| + |
| + // Index is Index=Var+Const ==> |
| + // set Index=Var, Offset+=(Const<<Shift) |
| + |
| + // Index is Index=Const+Var ==> |
| + // set Index=Var, Offset+=(Const<<Shift) |
| + |
| + // Index is Index=Var-Const ==> |
| + // set Index=Var, Offset-=(Const<<Shift) |
| + |
| + // TODO: consider overflow issues with respect to Offset. |
| + // TODO: handle symbolic constants. |
| + break; |
| + } |
| +} |
| + |
| +} // anonymous namespace |
| + |
| void TargetX8632::lowerLoad(const InstLoad *Inst) { |
| // A Load instruction can be treated the same as an Assign |
| // instruction, after the source operand is transformed into an |
| @@ -1679,10 +1917,64 @@ void TargetX8632::lowerLoad(const InstLoad *Inst) { |
| Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset); |
| } |
| + // Fuse this load with a subsequent Arithmetic instruction in the |
| + // following situations: |
| + // a=[mem]; c=b+a ==> c=b+[mem] if last use of a and a not in b |
| + // a=[mem]; c=a+b ==> c=b+[mem] if commutative and above is true |
| + // |
| + // TODO: Clean up and test thoroughly. |
| + // |
| + // TODO: Why limit to Arithmetic instructions? This could probably be |
| + // applied to most any instruction type. Look at all source operands |
| + // in the following instruction, and if there is one instance of the |
| + // load instruction's dest variable, and that instruction ends that |
| + // variable's live range, then make the substitution. Deal with |
| + // commutativity optimization in the arithmetic instruction lowering. |
| + InstArithmetic *NewArith = NULL; |
| + if (InstArithmetic *Arith = |
| + llvm::dyn_cast_or_null<InstArithmetic>(Context.getNextInst())) { |
| + Variable *DestLoad = Inst->getDest(); |
| + Variable *Src0Arith = llvm::dyn_cast<Variable>(Arith->getSrc(0)); |
| + Variable *Src1Arith = llvm::dyn_cast<Variable>(Arith->getSrc(1)); |
| + if (Src1Arith == DestLoad && Arith->isLastUse(Src1Arith) && |
| + DestLoad != Src0Arith) { |
| + NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(), |
| + Arith->getSrc(0), Src0); |
| + } else if (Src0Arith == DestLoad && Arith->isCommutative() && |
| + Arith->isLastUse(Src0Arith) && DestLoad != Src1Arith) { |
| + NewArith = InstArithmetic::create(Func, Arith->getOp(), Arith->getDest(), |
| + Arith->getSrc(1), Src0); |
| + } |
| + if (NewArith) { |
| + Arith->setDeleted(); |
| + Context.advanceNext(); |
| + lowerArithmetic(NewArith); |
| + return; |
| + } |
| + } |
| + |
| InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0); |
| lowerAssign(Assign); |
| } |
| +void TargetX8632::doAddressOptLoad() { |
| + Inst *Inst = *Context.getCur(); |
| + Variable *Dest = Inst->getDest(); |
| + Operand *Addr = Inst->getSrc(0); |
| + Variable *Index = NULL; |
| + int32_t Shift = 0; |
| + int32_t Offset = 0; // TODO: make Constant |
| + Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| + computeAddressOpt(Func, Base, Index, Shift, Offset); |
| + if (Base && Addr != Base) { |
| + Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); |
| + Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, |
| + Shift); |
| + Inst->setDeleted(); |
| + Context.insert(InstLoad::create(Func, Dest, Addr)); |
| + } |
| +} |
| + |
| void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { |
| Func->setError("Phi found in regular instruction list"); |
| } |
| @@ -1781,6 +2073,24 @@ void TargetX8632::lowerStore(const InstStore *Inst) { |
| } |
| } |
| +void TargetX8632::doAddressOptStore() { |
| + InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); |
| + Operand *Data = Inst->getData(); |
| + Operand *Addr = Inst->getAddr(); |
| + Variable *Index = NULL; |
| + int32_t Shift = 0; |
| + int32_t Offset = 0; // TODO: make Constant |
| + Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| + computeAddressOpt(Func, Base, Index, Shift, Offset); |
| + if (Base && Addr != Base) { |
| + Constant *OffsetOp = Ctx->getConstantInt(IceType_i32, Offset); |
| + Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, |
| + Shift); |
| + Inst->setDeleted(); |
| + Context.insert(InstStore::create(Func, Data, Addr)); |
| + } |
| +} |
| + |
| void TargetX8632::lowerSwitch(const InstSwitch *Inst) { |
| // This implements the most naive possible lowering. |
| // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default |
| @@ -1952,15 +2262,15 @@ void TargetX8632::postLower() { |
| } |
| } |
| -template <> void ConstantFloat::emit(const Cfg *Func) const { |
| - Ostream &Str = Func->getContext()->getStrEmit(); |
| +template <> void ConstantFloat::emit(GlobalContext *Ctx) const { |
| + Ostream &Str = Ctx->getStrEmit(); |
| // It would be better to prefix with ".L$" instead of "L$", but |
| // llvm-mc doesn't parse "dword ptr [.L$foo]". |
| Str << "dword ptr [L$" << IceType_f32 << "$" << getPoolEntryID() << "]"; |
| } |
| -template <> void ConstantDouble::emit(const Cfg *Func) const { |
| - Ostream &Str = Func->getContext()->getStrEmit(); |
| +template <> void ConstantDouble::emit(GlobalContext *Ctx) const { |
| + Ostream &Str = Ctx->getStrEmit(); |
| Str << "qword ptr [L$" << IceType_f64 << "$" << getPoolEntryID() << "]"; |
| } |