Index: src/IceTargetLoweringARM32.cpp |
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
index 9caa2ff06f05963a73211406777946a1214609f0..0568d051a5c679511e8cd319538723980f47c62d 100644 |
--- a/src/IceTargetLoweringARM32.cpp |
+++ b/src/IceTargetLoweringARM32.cpp |
@@ -22,6 +22,7 @@ |
#include "IceGlobalInits.h" |
#include "IceInstARM32.def" |
#include "IceInstARM32.h" |
+#include "IceInstVarIter.h" |
#include "IceLiveness.h" |
#include "IceOperand.h" |
#include "IcePhiLoweringImpl.h" |
@@ -1803,22 +1804,46 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
} |
} |
-void TargetARM32::lowerBr(const InstBr *Inst) { |
- if (Inst->isUnconditional()) { |
- _br(Inst->getTargetUnconditional()); |
+void TargetARM32::lowerBr(const InstBr *Instr) { |
+ if (Instr->isUnconditional()) { |
+ _br(Instr->getTargetUnconditional()); |
+ return; |
+ } |
+ Operand *Cond = Instr->getCondition(); |
+ |
+ CondARM32::Cond BrCondTrue0 = CondARM32::NE; |
+ CondARM32::Cond BrCondTrue1 = CondARM32::kNone; |
+ CondARM32::Cond BrCondFalse = CondARM32::kNone; |
+ if (!_mov_i1_to_flags(Cond, &BrCondTrue0, &BrCondTrue1, &BrCondFalse)) { |
+ // "Cond" was not fold. |
+ Type Ty = Cond->getType(); |
+ Variable *Src0R = legalizeToReg(Cond); |
+ assert(Ty == IceType_i1); |
+ if (Ty != IceType_i32) |
+ _uxt(Src0R, Src0R); |
+ Constant *_0 = Ctx->getConstantZero(IceType_i32); |
+ _cmp(Src0R, _0); |
+ BrCondTrue0 = CondARM32::NE; |
+ } |
+ |
+ if (BrCondTrue1 != CondARM32::kNone) { |
+ _br(Instr->getTargetTrue(), BrCondTrue1); |
+ } |
+ |
+ if (BrCondTrue0 == CondARM32::kNone) { |
+ assert(BrCondTrue1 == CondARM32::kNone); |
+ _br(Instr->getTargetFalse()); |
+ return; |
+ } |
+ |
+ if (BrCondTrue0 == CondARM32::AL) { |
+ assert(BrCondTrue1 == CondARM32::kNone); |
+ assert(BrCondFalse == CondARM32::kNone); |
+ _br(Instr->getTargetTrue()); |
return; |
} |
- Operand *Cond = Inst->getCondition(); |
- // TODO(jvoung): Handle folding opportunities. |
- Type Ty = Cond->getType(); |
- Variable *Src0R = legalizeToReg(Cond); |
- assert(Ty == IceType_i1); |
- if (Ty != IceType_i32) |
- _uxt(Src0R, Src0R); |
- Constant *Zero = Ctx->getConstantZero(IceType_i32); |
- _cmp(Src0R, Zero); |
- _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE); |
+ _br(Instr->getTargetTrue(), Instr->getTargetFalse(), BrCondTrue0); |
} |
void TargetARM32::lowerCall(const InstCall *Instr) { |
@@ -2050,13 +2075,22 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
if (Src0->getType() == IceType_i32) { |
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
_mov(T_Lo, Src0RF); |
- } else if (Src0->getType() == IceType_i1) { |
- Variable *Src0R = legalizeToReg(Src0); |
- _lsl(T_Lo, Src0R, ShiftAmt); |
- _asr(T_Lo, T_Lo, ShiftAmt); |
- } else { |
+ } else if (Src0->getType() != IceType_i1) { |
Variable *Src0R = legalizeToReg(Src0); |
_sxt(T_Lo, Src0R); |
+ } else { |
+ CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
+ if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
+ // Handle bool folding. |
+ Constant *_0 = Ctx->getConstantZero(IceType_i32); |
+ Operand *_m1 = |
+ legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex); |
+ _cmov(T_Lo, _m1, CondTrue0, CondTrue1, _0, CondFalse); |
+ } else { |
+ Variable *Src0R = legalizeToReg(Src0); |
+ _lsl(T_Lo, Src0R, ShiftAmt); |
+ _asr(T_Lo, T_Lo, ShiftAmt); |
+ } |
} |
_mov(DestLo, T_Lo); |
Variable *T_Hi = makeReg(DestHi->getType()); |
@@ -2068,22 +2102,31 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
_mov(T_Hi, T_Lo); |
} |
_mov(DestHi, T_Hi); |
- } else if (Src0->getType() == IceType_i1) { |
- // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. |
- // lsl t1, src_reg, 31 |
- // asr t1, t1, 31 |
- // dst = t1 |
+ } else if (Src0->getType() != IceType_i1) { |
+ // t1 = sxt src; dst = t1 |
Variable *Src0R = legalizeToReg(Src0); |
- Constant *ShiftAmt = Ctx->getConstantInt32(31); |
Variable *T = makeReg(Dest->getType()); |
- _lsl(T, Src0R, ShiftAmt); |
- _asr(T, T, ShiftAmt); |
+ _sxt(T, Src0R); |
_mov(Dest, T); |
} else { |
- // t1 = sxt src; dst = t1 |
- Variable *Src0R = legalizeToReg(Src0); |
Variable *T = makeReg(Dest->getType()); |
- _sxt(T, Src0R); |
+ CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
+ if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
+ // Handle bool folding. |
+ Constant *_0 = Ctx->getConstantZero(IceType_i32); |
+ Operand *_m1 = |
+ legalize(Ctx->getConstantInt32(-1), Legal_Reg | Legal_Flex); |
+ _cmov(T, _m1, CondTrue0, CondTrue1, _0, CondFalse); |
+ } else { |
+ // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. |
+ // lsl t1, src_reg, 31 |
+ // asr t1, t1, 31 |
+ // dst = t1 |
+ Variable *Src0R = legalizeToReg(Src0); |
+ Constant *ShiftAmt = Ctx->getConstantInt32(31); |
+ _lsl(T, Src0R, ShiftAmt); |
+ _asr(T, T, ShiftAmt); |
+ } |
_mov(Dest, T); |
} |
break; |
@@ -2096,10 +2139,23 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
UnimplementedError(Func->getContext()->getFlags()); |
} else if (Dest->getType() == IceType_i64) { |
// t1=uxtb src; dst.lo=t1; dst.hi=0 |
- Constant *Zero = Ctx->getConstantZero(IceType_i32); |
+ Constant *_0 = Ctx->getConstantZero(IceType_i32); |
+ Constant *_1 = Ctx->getConstantInt32(1); |
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
Variable *T_Lo = makeReg(DestLo->getType()); |
+ |
+ CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
+ if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
+ // Handle folding opportunities. |
+ Variable *T_Hi = makeReg(DestLo->getType()); |
+ _mov(T_Hi, _0); |
+ _mov(DestHi, T_Hi); |
+ _cmov(T_Lo, _1, CondTrue0, CondTrue1, _0, CondFalse); |
+ _mov(DestLo, T_Lo); |
+ return; |
+ } |
+ |
// i32 and i1 can just take up the whole register. i32 doesn't need uxt, |
// while i1 will have an and mask later anyway. |
if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
@@ -2115,18 +2171,28 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
} |
_mov(DestLo, T_Lo); |
Variable *T_Hi = makeReg(DestLo->getType()); |
- _mov(T_Hi, Zero); |
+ _mov(T_Hi, _0); |
_mov(DestHi, T_Hi); |
} else if (Src0->getType() == IceType_i1) { |
+ Constant *_1 = Ctx->getConstantInt32(1); |
+ Variable *T = makeReg(Dest->getType()); |
+ |
+ CondARM32::Cond CondTrue0, CondTrue1, CondFalse; |
+ if (_mov_i1_to_flags(Src0, &CondTrue0, &CondTrue1, &CondFalse)) { |
+ // Handle folding opportunities. |
+ Constant *_0 = Ctx->getConstantZero(IceType_i32); |
+ _cmov(T, _1, CondTrue0, CondTrue1, _0, CondFalse); |
+ _mov(Dest, T); |
+ return; |
+ } |
+ |
// t = Src0; t &= 1; Dest = t |
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
- Constant *One = Ctx->getConstantInt32(1); |
- Variable *T = makeReg(Dest->getType()); |
// Just use _mov instead of _uxt since all registers are 32-bit. _uxt |
// requires the source to be a register so could have required a _mov |
// from legalize anyway. |
_mov(T, Src0RF); |
- _and(T, T, One); |
+ _and(T, T, _1); |
_mov(Dest, T); |
} else { |
// t1 = uxt src; dst = t1 |
@@ -2397,8 +2463,37 @@ struct { |
}; |
} // end of anonymous namespace |
-void TargetARM32::lowerFcmp(const InstFcmp *Inst) { |
- Variable *Dest = Inst->getDest(); |
+void TargetARM32::lowerFcmpCond(const InstFcmp *Instr, |
+ CondARM32::Cond *CondIfTrue0, |
+ CondARM32::Cond *CondIfTrue1, |
+ CondARM32::Cond *CondIfFalse) { |
+ InstFcmp::FCond Condition = Instr->getCondition(); |
+ switch (Condition) { |
+ case InstFcmp::False: |
+ *CondIfFalse = CondARM32::AL; |
+ *CondIfTrue0 = *CondIfTrue1 = CondARM32::kNone; |
+ break; |
+ case InstFcmp::True: |
+ *CondIfFalse = *CondIfTrue1 = CondARM32::kNone; |
+ *CondIfTrue0 = CondARM32::AL; |
+ break; |
+ default: { |
+ Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
+ Variable *Src1R = legalizeToReg(Instr->getSrc(1)); |
+ _vcmp(Src0R, Src1R); |
+ _vmrs(); |
+ assert(Condition < llvm::array_lengthof(TableFcmp)); |
+ *CondIfTrue0 = TableFcmp[Condition].CC0; |
+ *CondIfTrue1 = TableFcmp[Condition].CC1; |
+ *CondIfFalse = (*CondIfTrue1 != CondARM32::kNone) |
+ ? CondARM32::AL |
+ : InstARM32::getOppositeCondition(*CondIfTrue0); |
+ } |
+ } |
+} |
+ |
+void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
+ Variable *Dest = Instr->getDest(); |
if (isVectorType(Dest->getType())) { |
Variable *T = makeReg(Dest->getType()); |
Context.insert(InstFakeDef::create(Func, T)); |
@@ -2407,48 +2502,43 @@ void TargetARM32::lowerFcmp(const InstFcmp *Inst) { |
return; |
} |
- Variable *Src0R = legalizeToReg(Inst->getSrc(0)); |
- Variable *Src1R = legalizeToReg(Inst->getSrc(1)); |
Variable *T = makeReg(IceType_i32); |
- _vcmp(Src0R, Src1R); |
- _mov(T, Ctx->getConstantZero(IceType_i32)); |
- _vmrs(); |
- Operand *One = Ctx->getConstantInt32(1); |
- InstFcmp::FCond Condition = Inst->getCondition(); |
- assert(Condition < llvm::array_lengthof(TableFcmp)); |
- CondARM32::Cond CC0 = TableFcmp[Condition].CC0; |
- CondARM32::Cond CC1 = TableFcmp[Condition].CC1; |
- if (CC0 != CondARM32::kNone) { |
- _mov(T, One, CC0); |
- // If this mov is not a maybe mov, but an actual mov (i.e., CC0 == AL), we |
- // don't want to _set_dest_redefined so that liveness + dead-code |
- // elimination will get rid of the previous assignment (i.e., T = 0) above. |
- // TODO(stichnot,jpp): We should be able to conditionally create the "T=0" |
- // instruction based on CC0, instead of relying on DCE to remove it. |
- if (CC0 != CondARM32::AL) |
- _set_dest_redefined(); |
+ Operand *_1 = Ctx->getConstantInt32(1); |
+ Operand *_0 = Ctx->getConstantZero(IceType_i32); |
+ |
+ CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse; |
+ lowerFcmpCond(Instr, &CondIfTrue0, &CondIfTrue1, &CondIfFalse); |
+ |
+ bool RedefineT = false; |
+ if (CondIfFalse != CondARM32::kNone) { |
+ assert(!RedefineT); |
+ _mov(T, _0, CondIfFalse); |
+ RedefineT = true; |
} |
- if (CC1 != CondARM32::kNone) { |
- assert(CC0 != CondARM32::kNone); |
- assert(CC1 != CondARM32::AL); |
- _mov_redefined(T, One, CC1); |
+ |
+ if (CondIfTrue0 != CondARM32::kNone) { |
+ if (RedefineT) { |
+ _mov_redefined(T, _1, CondIfTrue0); |
+ } else { |
+ _mov(T, _1, CondIfTrue0); |
+ } |
+ RedefineT = true; |
} |
+ |
+ if (CondIfTrue1 != CondARM32::kNone) { |
+ assert(RedefineT); |
+ _mov_redefined(T, _1, CondIfTrue1); |
+ } |
+ |
_mov(Dest, T); |
} |
-void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
- Variable *Dest = Inst->getDest(); |
+void TargetARM32::lowerIcmpCond(const InstIcmp *Inst, |
+ CondARM32::Cond *CondIfTrue, |
+ CondARM32::Cond *CondIfFalse) { |
Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
- if (isVectorType(Dest->getType())) { |
- Variable *T = makeReg(Dest->getType()); |
- Context.insert(InstFakeDef::create(Func, T)); |
- _mov(Dest, T); |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
- } |
- |
// a=icmp cond, b, c ==> |
// GCC does: |
// cmp b.hi, c.hi or cmp b.lo, c.lo |
@@ -2478,8 +2568,7 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
// |
// So, we are going with the GCC version since it's usually better (except |
// perhaps for eq/ne). We could revisit special-casing eq/ne later. |
- Constant *Zero = Ctx->getConstantZero(IceType_i32); |
- Constant *One = Ctx->getConstantInt32(1); |
+ |
if (Src0->getType() == IceType_i64) { |
InstIcmp::ICond Conditon = Inst->getCondition(); |
size_t Index = static_cast<size_t>(Conditon); |
@@ -2497,7 +2586,6 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
} |
- Variable *T = makeReg(IceType_i32); |
if (TableIcmp64[Index].IsSigned) { |
Variable *ScratchReg = makeReg(IceType_i32); |
_cmp(Src0Lo, Src1LoRF); |
@@ -2509,9 +2597,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
_cmp(Src0Hi, Src1HiRF); |
_cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
} |
- _mov(T, One, TableIcmp64[Index].C1); |
- _mov_redefined(T, Zero, TableIcmp64[Index].C2); |
- _mov(Dest, T); |
+ *CondIfTrue = TableIcmp64[Index].C1; |
+ *CondIfFalse = TableIcmp64[Index].C2; |
return; |
} |
@@ -2548,7 +2635,6 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
assert(ShiftAmt >= 0); |
Constant *ShiftConst = nullptr; |
Variable *Src0R = nullptr; |
- Variable *T = makeReg(IceType_i32); |
if (ShiftAmt) { |
ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
Src0R = makeReg(IceType_i32); |
@@ -2556,7 +2642,6 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
} else { |
Src0R = legalizeToReg(Src0); |
} |
- _mov(T, Zero); |
if (ShiftAmt) { |
Variable *Src1R = legalizeToReg(Src1); |
OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( |
@@ -2566,8 +2651,32 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
_cmp(Src0R, Src1RF); |
} |
- _mov_redefined(T, One, getIcmp32Mapping(Inst->getCondition())); |
+ *CondIfTrue = getIcmp32Mapping(Inst->getCondition()); |
+ *CondIfFalse = InstARM32::getOppositeCondition(*CondIfTrue); |
+} |
+ |
+void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
+ Variable *Dest = Inst->getDest(); |
+ |
+ if (isVectorType(Dest->getType())) { |
+ Variable *T = makeReg(Dest->getType()); |
+ Context.insert(InstFakeDef::create(Func, T)); |
+ _mov(Dest, T); |
+ UnimplementedError(Func->getContext()->getFlags()); |
+ return; |
+ } |
+ |
+ Constant *_0 = Ctx->getConstantZero(IceType_i32); |
+ Constant *_1 = Ctx->getConstantInt32(1); |
+ Variable *T = makeReg(IceType_i32); |
+ |
+ CondARM32::Cond CondIfTrue, CondIfFalse; |
+ lowerIcmpCond(Inst, &CondIfTrue, &CondIfFalse); |
+ |
+ _mov(T, _0, CondIfFalse); |
+ _mov_redefined(T, _1, CondIfTrue); |
_mov(Dest, T); |
+ |
return; |
} |
@@ -3329,56 +3438,119 @@ void TargetARM32::lowerSelect(const InstSelect *Inst) { |
UnimplementedError(Func->getContext()->getFlags()); |
return; |
} |
- // TODO(jvoung): handle folding opportunities. |
- // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t |
- Variable *CmpOpnd0 = legalizeToReg(Condition); |
- Type CmpOpnd0Ty = CmpOpnd0->getType(); |
- Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
- assert(CmpOpnd0Ty == IceType_i1); |
- if (CmpOpnd0Ty != IceType_i32) |
- _uxt(CmpOpnd0, CmpOpnd0); |
- _cmp(CmpOpnd0, CmpOpnd1); |
- static constexpr CondARM32::Cond Cond = CondARM32::NE; |
+ |
+ CondARM32::Cond CondIfTrue0, CondIfTrue1, CondIfFalse; |
+ if (!_mov_i1_to_flags(Condition, &CondIfTrue0, &CondIfTrue1, &CondIfFalse)) { |
+ // "Condition" was not fold. |
+ // cmp cond, #0; mov t, SrcF; mov_cond t, SrcT; mov dest, t |
+ Variable *CmpOpnd0 = legalizeToReg(Condition); |
+ Type CmpOpnd0Ty = CmpOpnd0->getType(); |
+ Operand *CmpOpnd1 = Ctx->getConstantZero(IceType_i32); |
+ assert(CmpOpnd0Ty == IceType_i1); |
+ if (CmpOpnd0Ty != IceType_i32) |
+ _uxt(CmpOpnd0, CmpOpnd0); |
+ _cmp(CmpOpnd0, CmpOpnd1); |
+ CondIfTrue0 = CondARM32::NE; |
+ CondIfTrue1 = CondARM32::kNone; |
+ CondIfFalse = CondARM32::EQ; |
+ } |
+ |
if (DestTy == IceType_i64) { |
SrcT = legalizeUndef(SrcT); |
SrcF = legalizeUndef(SrcF); |
// Set the low portion. |
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); |
Operand *SrcFLo = legalize(loOperand(SrcF), Legal_Reg | Legal_Flex); |
Variable *TLo = makeReg(SrcFLo->getType()); |
- _mov(TLo, SrcFLo); |
- Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Flex); |
- _mov_redefined(TLo, SrcTLo, Cond); |
+ bool RedefineTLo = false; |
+ if (CondIfFalse != CondARM32::kNone) { |
+ _mov(TLo, SrcFLo, CondIfFalse); |
+ RedefineTLo = true; |
+ } |
+ if (CondIfTrue0 != CondARM32::kNone) { |
+ if (!RedefineTLo) |
+ _mov(TLo, SrcTLo, CondIfTrue0); |
+ else |
+ _mov_redefined(TLo, SrcTLo, CondIfTrue0); |
+ RedefineTLo = true; |
+ } |
+ if (CondIfTrue1 != CondARM32::kNone) { |
+ assert(RedefineTLo); |
+ _mov_redefined(TLo, SrcTLo, CondIfTrue1); |
+ } |
_mov(DestLo, TLo); |
+ |
// Set the high portion. |
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); |
Operand *SrcFHi = legalize(hiOperand(SrcF), Legal_Reg | Legal_Flex); |
Variable *THi = makeReg(SrcFHi->getType()); |
- _mov(THi, SrcFHi); |
- Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Flex); |
- _mov_redefined(THi, SrcTHi, Cond); |
+ bool RedefineTHi = false; |
+ if (CondIfFalse != CondARM32::kNone) { |
+ _mov(THi, SrcFHi, CondIfFalse); |
+ RedefineTHi = true; |
+ } |
+ if (CondIfTrue0 != CondARM32::kNone) { |
+ if (!RedefineTHi) |
+ _mov(THi, SrcTHi, CondIfTrue0); |
+ else |
+ _mov_redefined(THi, SrcTHi, CondIfTrue0); |
+ RedefineTHi = true; |
+ } |
+ if (CondIfTrue1 != CondARM32::kNone) { |
+ assert(RedefineTHi); |
+ _mov_redefined(THi, SrcTHi, CondIfTrue1); |
+ } |
_mov(DestHi, THi); |
return; |
} |
if (isFloatingType(DestTy)) { |
- Variable *T = makeReg(DestTy); |
+ SrcT = legalizeToReg(SrcT); |
SrcF = legalizeToReg(SrcF); |
+ Variable *T = makeReg(DestTy); |
assert(DestTy == SrcF->getType()); |
- _mov(T, SrcF); |
- SrcT = legalizeToReg(SrcT); |
+ bool RedefineT = false; |
+ if (CondIfFalse != CondARM32::kNone) { |
+ _mov(T, SrcF, CondIfFalse); |
+ RedefineT = true; |
+ } |
+ if (CondIfTrue0 != CondARM32::kNone) { |
+ if (!RedefineT) |
+ _mov(T, SrcT, CondIfTrue0); |
+ else |
+ _mov_redefined(T, SrcT, CondIfTrue0); |
+ RedefineT = true; |
+ } |
+ if (CondIfTrue1 != CondARM32::kNone) { |
+ assert(RedefineT); |
+ _mov_redefined(T, SrcT, CondIfTrue1); |
+ } |
assert(DestTy == SrcT->getType()); |
- _mov(T, SrcT, Cond); |
- _set_dest_redefined(); |
_mov(Dest, T); |
return; |
} |
- SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); |
Variable *T = makeReg(SrcF->getType()); |
- _mov(T, SrcF); |
SrcT = legalize(SrcT, Legal_Reg | Legal_Flex); |
- _mov_redefined(T, SrcT, Cond); |
+ SrcF = legalize(SrcF, Legal_Reg | Legal_Flex); |
+ bool RedefineT = false; |
+ if (CondIfFalse != CondARM32::kNone) { |
+ _mov(T, SrcF, CondIfFalse); |
+ RedefineT = true; |
+ } |
+ if (CondIfTrue0 != CondARM32::kNone) { |
+ if (!RedefineT) |
+ _mov(T, SrcT, CondIfTrue0); |
+ else |
+ _mov_redefined(T, SrcT, CondIfTrue0); |
+ RedefineT = true; |
+ } |
+ if (CondIfTrue1 != CondARM32::kNone) { |
+ assert(RedefineT); |
+ _mov_redefined(T, SrcT, CondIfTrue1); |
+ } |
_mov(Dest, T); |
} |
@@ -3786,6 +3958,126 @@ void TargetARM32::emit(const ConstantUndef *) const { |
llvm::report_fatal_error("undef value encountered by emitter."); |
} |
+void TargetARM32::lowerTruncToFlags(Operand *Src, CondARM32::Cond *CondIfTrue, |
+ CondARM32::Cond *CondIfFalse) { |
+ Operand *_1 = Ctx->getConstantInt32(1); |
+ Variable *SrcR = |
+ legalizeToReg(Src->getType() == IceType_i64 ? loOperand(Src) : Src); |
+ _tst(SrcR, _1); |
+ *CondIfTrue = CondARM32::NE; // NE <-> APSR.Z == 0 |
+ *CondIfFalse = CondARM32::EQ; // EQ <-> APSR.Z == 1 |
+} |
+ |
+bool TargetARM32::_mov_i1_to_flags(Operand *Boolean, |
+ CondARM32::Cond *CondIfTrue0, |
+ CondARM32::Cond *CondIfTrue1, |
+ CondARM32::Cond *CondIfFalse) { |
+ *CondIfTrue0 = CondARM32::kNone; |
+ *CondIfTrue1 = CondARM32::kNone; |
+ *CondIfFalse = CondARM32::AL; |
+ bool FoldOK = false; |
+ if (const Inst *Producer = BoolComputations.getProducerOf(Boolean)) { |
+ if (const auto *IcmpProducer = llvm::dyn_cast<InstIcmp>(Producer)) { |
+ lowerIcmpCond(IcmpProducer, CondIfTrue0, CondIfFalse); |
+ FoldOK = true; |
+ } else if (const auto *FcmpProducer = llvm::dyn_cast<InstFcmp>(Producer)) { |
+ lowerFcmpCond(FcmpProducer, CondIfTrue0, CondIfTrue1, CondIfFalse); |
+ FoldOK = true; |
+ } else if (const auto *CastProducer = llvm::dyn_cast<InstCast>(Producer)) { |
+ assert(CastProducer->getCastKind() == InstCast::Trunc); |
+ lowerTruncToFlags(CastProducer->getSrc(0), CondIfTrue0, CondIfFalse); |
+ FoldOK = true; |
+ } |
+ } |
+ return FoldOK; |
+} |
+ |
+namespace { |
+namespace BoolFolding { |
+bool shouldTrackProducer(const Inst &Instr) { |
+ switch (static_cast<uint32_t>(Instr.getKind())) { |
+ case Inst::Icmp: |
+ return true; |
+ case Inst::Fcmp: |
+ return true; |
+ } |
+ if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) { |
+ switch (static_cast<uint32_t>(Cast->getCastKind())) { |
+ case InstCast::Trunc: |
+ return true; |
+ } |
+ } |
+ return false; |
+} |
+ |
+bool isValidConsumer(const Inst &Instr) { |
+ switch (static_cast<uint32_t>(Instr.getKind())) { |
+ case Inst::Br: |
+ return true; |
+ case Inst::Select: |
+ return !isVectorType(Instr.getDest()->getType()); |
+ } |
+ if (const auto *Cast = llvm::dyn_cast<InstCast>(&Instr)) { |
+ switch (static_cast<uint32_t>(Cast->getCastKind())) { |
+ case InstCast::Sext: |
+ return !isVectorType(Instr.getDest()->getType()); |
+ case InstCast::Zext: |
+ return !isVectorType(Instr.getDest()->getType()); |
+ } |
+ } |
+ return false; |
+} |
+} // end of namespace BoolFolding |
+} // end of anonymous namespace |
+ |
+void TargetARM32::BoolComputationTracker::recordProducers(CfgNode *Node) { |
+ for (Inst &Instr : Node->getInsts()) { |
+ // Check whether Instr is a valid producer. |
+ Variable *Dest = Instr.getDest(); |
+ if (!Instr.isDeleted() // only consider non-deleted instructions; and |
+ && Dest // only instructions with an actual dest var; and |
+ && Dest->getType() == IceType_i1 // only bool-type dest vars; and |
+ && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr. |
+ KnownComputations.emplace(Dest->getIndex(), BoolComputationEntry(&Instr)); |
+ } |
+ // Check each src variable against the map. |
+ FOREACH_VAR_IN_INST(Var, Instr) { |
+ SizeT VarNum = Var->getIndex(); |
+ auto ComputationIter = KnownComputations.find(VarNum); |
+ if (ComputationIter == KnownComputations.end()) { |
+ continue; |
+ } |
+ |
+ if (IndexOfVarOperandInInst(Var) != 0 || |
+ !BoolFolding::isValidConsumer(Instr)) { |
+ // All valid consumers use Var as the first source operand |
+ KnownComputations.erase(VarNum); |
+ continue; |
+ } |
+ |
+ if (Instr.isLastUse(Var)) { |
+ ComputationIter->second.IsLiveOut = false; |
+ } |
+ } |
+ } |
+ |
+ for (auto Iter = KnownComputations.begin(), End = KnownComputations.end(); |
+ Iter != End;) { |
+ // Disable the folding if its dest may be live beyond this block. |
+ if (Iter->second.IsLiveOut) { |
+ Iter = KnownComputations.erase(Iter); |
+ continue; |
+ } |
+ |
+ // Mark as "dead" rather than outright deleting. This is so that other |
+ // peephole style optimizations during or before lowering have access to |
+ // this instruction in undeleted form. See for example |
+ // tryOptimizedCmpxchgCmpBr(). |
+ Iter->second.Instr->setDead(); |
+ ++Iter; |
+ } |
+} |
+ |
TargetDataARM32::TargetDataARM32(GlobalContext *Ctx) |
: TargetDataLowering(Ctx) {} |