Index: src/IceTargetLoweringARM32.cpp |
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
index fcb865b4c0c54038b14070a8b259d22e781083d7..45efac20d2f8bef91cb33afce2d24f5441959a61 100644 |
--- a/src/IceTargetLoweringARM32.cpp |
+++ b/src/IceTargetLoweringARM32.cpp |
@@ -1297,29 +1297,26 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
Variable *SrcLoReg = legalizeToReg(SrcLo); |
switch (Ty) { |
default: |
- llvm_unreachable("Unexpected type"); |
- case IceType_i8: { |
- Operand *Mask = |
- legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); |
- _tst(SrcLoReg, Mask); |
- break; |
- } |
+ llvm::report_fatal_error("Unexpected type"); |
+ case IceType_i8: |
case IceType_i16: { |
- Operand *Mask = |
- legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); |
- _tst(SrcLoReg, Mask); |
- break; |
- } |
+ Operand *ShAmtF = |
+ legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)), |
+ Legal_Reg | Legal_Flex); |
+ Variable *T = makeReg(IceType_i32); |
+ _lsls(T, SrcLoReg, ShAmtF); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ } break; |
case IceType_i32: { |
_tst(SrcLoReg, SrcLoReg); |
break; |
} |
case IceType_i64: { |
- Variable *ScratchReg = makeReg(IceType_i32); |
- _orrs(ScratchReg, SrcLoReg, SrcHi); |
- // ScratchReg isn't going to be used, but we need the side-effect of |
- // setting flags from this operation. |
- Context.insert(InstFakeUse::create(Func, ScratchReg)); |
+ Variable *T = makeReg(IceType_i32); |
+ _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); |
+ // T isn't going to be used, but we need the side-effect of setting flags |
+ // from this operation. |
+ Context.insert(InstFakeUse::create(Func, T)); |
} |
} |
InstARM32Label *Label = InstARM32Label::create(Func, this); |
@@ -1404,289 +1401,575 @@ TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) { |
return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; |
} |
-void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
- Variable *Dest = Inst->getDest(); |
- if (Dest->getType() == IceType_i1) { |
- lowerInt1Arithmetic(Inst); |
- return; |
+namespace { |
+// NumericOperands is used during arithmetic/icmp lowering for constant folding. |
+// It holds the two sources operands, and maintains some state as to whether one |
+// of them is a constant. If one of the operands is a constant, then it will be |
+// be stored as the operation's second source, with a bit indicating whether the |
+// operands were swapped. |
+// |
+// The class is split into a base class with operand type-independent methods, |
+// and a derived, templated class, for each type of operand we want to fold |
+// constants for: |
+// |
+// NumericOperandsBase --> NumericOperands<ConstantFloat> |
+// --> NumericOperands<ConstantDouble> |
+// --> NumericOperands<ConstantInt32> |
+// |
+// NumericOperands<ConstantInt32> also exposes helper methods for emitting |
+// inverted/negated immediates. |
+class NumericOperandsBase { |
+ NumericOperandsBase() = delete; |
+ NumericOperandsBase(const NumericOperandsBase &) = delete; |
+ NumericOperandsBase &operator=(const NumericOperandsBase &) = delete; |
+ |
+public: |
+ NumericOperandsBase(Operand *S0, Operand *S1) |
+ : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)), |
+ Swapped(Src0 == S1 && S0 != S1) { |
+ assert(Src0 != nullptr); |
+ assert(Src1 != nullptr); |
+ assert(Src0 != Src1 || S0 == S1); |
} |
- // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to |
- // legalize Src0 to flex or Src1 to flex and there is a reversible |
- // instruction. E.g., reverse subtract with immediate, register vs register, |
- // immediate. |
- // Or it may be the case that the operands aren't swapped, but the bits can |
- // be flipped and a different operation applied. E.g., use BIC (bit clear) |
- // instead of AND for some masks. |
- Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
- Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
- if (Dest->getType() == IceType_i64) { |
- // These helper-call-involved instructions are lowered in this separate |
- // switch. This is because we would otherwise assume that we need to |
- // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
- // helper calls, and such unused/redundant instructions will fail liveness |
- // analysis under -Om1 setting. |
- switch (Inst->getOp()) { |
+ bool hasConstOperand() const { |
+ return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1); |
+ } |
+ |
+ bool swappedOperands() const { return Swapped; } |
+ |
+ Variable *src0R(TargetARM32 *Target) const { |
+ return legalizeToReg(Target, Src0); |
+ } |
+ |
+ Variable *unswappedSrc0R(TargetARM32 *Target) const { |
+ return legalizeToReg(Target, Swapped ? Src1 : Src0); |
+ } |
+ |
+ Operand *src1RF(TargetARM32 *Target) const { |
+ return legalizeToRegOrFlex(Target, Src1); |
+ } |
+ |
+ Variable *unswappedSrc1R(TargetARM32 *Target) const { |
+ return legalizeToReg(Target, Swapped ? Src0 : Src1); |
+ } |
+ |
+ Operand *unswappedSrc1RF(TargetARM32 *Target) const { |
+ return legalizeToRegOrFlex(Target, Swapped ? Src0 : Src1); |
+ } |
+ |
+protected: |
+ Operand *const Src0; |
+ Operand *const Src1; |
+ const bool Swapped; |
+ |
+ static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) { |
+ return Target->legalizeToReg(Src); |
+ } |
+ |
+ static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) { |
+ return Target->legalize(Src, |
+ TargetARM32::Legal_Reg | TargetARM32::Legal_Flex); |
+ } |
+ |
+private: |
+ static Operand *NonConstOperand(Operand *S0, Operand *S1) { |
+ if (!llvm::isa<Constant>(S0)) |
+ return S0; |
+ if (!llvm::isa<Constant>(S1)) |
+ return S1; |
+ if (llvm::isa<ConstantRelocatable>(S1) && |
+ !llvm::isa<ConstantRelocatable>(S0)) |
+ return S1; |
+ return S0; |
+ } |
+ |
+ static Operand *ConstOperand(Operand *S0, Operand *S1) { |
+ if (!llvm::isa<Constant>(S0)) |
+ return S1; |
+ if (!llvm::isa<Constant>(S1)) |
+ return S0; |
+ if (llvm::isa<ConstantRelocatable>(S1) && |
+ !llvm::isa<ConstantRelocatable>(S0)) |
+ return S0; |
+ return S1; |
+ } |
+}; |
+ |
+template <typename C> class NumericOperands : public NumericOperandsBase { |
+ NumericOperands() = delete; |
+ NumericOperands(const NumericOperands &) = delete; |
+ NumericOperands &operator=(const NumericOperands &) = delete; |
+ |
+public: |
+ NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) { |
+ assert(!hasConstOperand() || llvm::isa<C>(this->Src1)); |
+ } |
+ |
+ typename C::PrimType getConstantValue() const { |
+ return llvm::cast<C>(Src1)->getValue(); |
+ } |
+}; |
+ |
+using FloatOperands = NumericOperands<ConstantFloat>; |
+using DoubleOperands = NumericOperands<ConstantDouble>; |
+ |
+class Int32Operands : public NumericOperands<ConstantInteger32> { |
+ Int32Operands() = delete; |
+ Int32Operands(const Int32Operands &) = delete; |
+ Int32Operands &operator=(const Int32Operands &) = delete; |
+ |
+public: |
+ Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {} |
+ |
+ bool immediateIsFlexEncodable() const { |
+ uint32_t Rotate, Imm8; |
+ return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8); |
+ } |
+ |
+ bool negatedImmediateIsFlexEncodable() const { |
+ uint32_t Rotate, Imm8; |
+ return OperandARM32FlexImm::canHoldImm( |
+ -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8); |
+ } |
+ |
+ Operand *negatedSrc1F(TargetARM32 *Target) const { |
+ return legalizeToRegOrFlex(Target, |
+ Target->getCtx()->getConstantInt32( |
+ -static_cast<int32_t>(getConstantValue()))); |
+ } |
+ |
+ bool invertedImmediateIsFlexEncodable() const { |
+ uint32_t Rotate, Imm8; |
+ return OperandARM32FlexImm::canHoldImm( |
+ ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8); |
+ } |
+ |
+ Operand *invertedSrc1F(TargetARM32 *Target) const { |
+ return legalizeToRegOrFlex(Target, |
+ Target->getCtx()->getConstantInt32( |
+ ~static_cast<uint32_t>(getConstantValue()))); |
+ } |
+}; |
+} // end of anonymous namespace |
+ |
+void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, |
+ Variable *Dest, Operand *Src0, |
+ Operand *Src1) { |
+ Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); |
+ Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); |
+ assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); |
+ assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); |
+ |
+ // These helper-call-involved instructions are lowered in this separate |
+ // switch. This is because we would otherwise assume that we need to |
+ // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
+ // helper calls, and such unused/redundant instructions will fail liveness |
+ // analysis under -Om1 setting. |
+ switch (Op) { |
+ default: |
+ break; |
+ case InstArithmetic::Udiv: |
+ case InstArithmetic::Sdiv: |
+ case InstArithmetic::Urem: |
+ case InstArithmetic::Srem: { |
+ // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
+ // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
+ // register, which will hide a constant source operand. Instead, check |
+ // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
+ if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { |
+ if (SrcsLo.getConstantValue() == 0 && SrcsHi.getConstantValue() == 0) { |
+ _trap(); |
+ return; |
+ } |
+ } else { |
+ Operand *Src1Lo = SrcsLo.unswappedSrc1R(this); |
+ Operand *Src1Hi = SrcsHi.unswappedSrc1R(this); |
+ div0Check(IceType_i64, Src1Lo, Src1Hi); |
+ } |
+ // Technically, ARM has its own aeabi routines, but we can use the |
+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
+ // the more standard __moddi3 for rem. |
+ const char *HelperName = ""; |
+ switch (Op) { |
default: |
+ llvm::report_fatal_error("Should have only matched div ops."); |
break; |
case InstArithmetic::Udiv: |
+ HelperName = H_udiv_i64; |
+ break; |
case InstArithmetic::Sdiv: |
+ HelperName = H_sdiv_i64; |
+ break; |
case InstArithmetic::Urem: |
- case InstArithmetic::Srem: { |
- // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
- // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
- // register, which will hide a constant source operand. Instead, check |
- // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
- if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
- if (C64->getValue() == 0) { |
- _trap(); |
- return; |
- } |
- } else { |
- Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
- Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
- div0Check(IceType_i64, Src1Lo, Src1Hi); |
- } |
- // Technically, ARM has their own aeabi routines, but we can use the |
- // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
- // the more standard __moddi3 for rem. |
- const char *HelperName = ""; |
- switch (Inst->getOp()) { |
- default: |
- llvm_unreachable("Should have only matched div ops."); |
- break; |
- case InstArithmetic::Udiv: |
- HelperName = H_udiv_i64; |
- break; |
- case InstArithmetic::Sdiv: |
- HelperName = H_sdiv_i64; |
- break; |
- case InstArithmetic::Urem: |
- HelperName = H_urem_i64; |
- break; |
- case InstArithmetic::Srem: |
- HelperName = H_srem_i64; |
- break; |
- } |
- constexpr SizeT MaxSrcs = 2; |
- InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
- Call->addArg(Src0); |
- Call->addArg(Src1); |
- lowerCall(Call); |
- return; |
- } |
+ HelperName = H_urem_i64; |
+ break; |
+ case InstArithmetic::Srem: |
+ HelperName = H_srem_i64; |
+ break; |
} |
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
- Variable *Src0RLo = legalizeToReg(loOperand(Src0)); |
- Variable *Src0RHi = legalizeToReg(hiOperand(Src0)); |
- Operand *Src1Lo = loOperand(Src1); |
- Operand *Src1Hi = hiOperand(Src1); |
- Variable *T_Lo = makeReg(DestLo->getType()); |
- Variable *T_Hi = makeReg(DestHi->getType()); |
- switch (Inst->getOp()) { |
- case InstArithmetic::_num: |
- llvm_unreachable("Unknown arithmetic operator"); |
- return; |
- case InstArithmetic::Add: |
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
- _adds(T_Lo, Src0RLo, Src1Lo); |
- _mov(DestLo, T_Lo); |
- _adc(T_Hi, Src0RHi, Src1Hi); |
- _mov(DestHi, T_Hi); |
- return; |
- case InstArithmetic::And: |
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
- _and(T_Lo, Src0RLo, Src1Lo); |
- _mov(DestLo, T_Lo); |
- _and(T_Hi, Src0RHi, Src1Hi); |
- _mov(DestHi, T_Hi); |
- return; |
- case InstArithmetic::Or: |
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
- _orr(T_Lo, Src0RLo, Src1Lo); |
+ constexpr SizeT MaxSrcs = 2; |
+ InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
+ Call->addArg(Src0); |
+ Call->addArg(Src1); |
+ lowerCall(Call); |
+ return; |
+ } |
+ } |
+ |
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Variable *T_Lo = makeReg(DestLo->getType()); |
+ Variable *T_Hi = makeReg(DestHi->getType()); |
+ |
+ switch (Op) { |
+ case InstArithmetic::_num: |
+ llvm::report_fatal_error("Unknown arithmetic operator"); |
+ return; |
+ case InstArithmetic::Add: { |
+ Variable *Src0LoR = SrcsLo.src0R(this); |
+ Operand *Src1LoRF = SrcsLo.src1RF(this); |
+ Variable *Src0HiR = SrcsHi.src0R(this); |
+ Operand *Src1HiRF = SrcsHi.src1RF(this); |
+ _adds(T_Lo, Src0LoR, Src1LoRF); |
+ _mov(DestLo, T_Lo); |
+ _adc(T_Hi, Src0HiR, Src1HiRF); |
+ _mov(DestHi, T_Hi); |
+ return; |
+ } |
+ case InstArithmetic::And: { |
+ Variable *Src0LoR = SrcsLo.src0R(this); |
+ Operand *Src1LoRF = SrcsLo.src1RF(this); |
+ Variable *Src0HiR = SrcsHi.src0R(this); |
+ Operand *Src1HiRF = SrcsHi.src1RF(this); |
+ _and(T_Lo, Src0LoR, Src1LoRF); |
+ _mov(DestLo, T_Lo); |
+ _and(T_Hi, Src0HiR, Src1HiRF); |
+ _mov(DestHi, T_Hi); |
+ return; |
+ } |
+ case InstArithmetic::Or: { |
+ Variable *Src0LoR = SrcsLo.src0R(this); |
+ Operand *Src1LoRF = SrcsLo.src1RF(this); |
+ Variable *Src0HiR = SrcsHi.src0R(this); |
+ Operand *Src1HiRF = SrcsHi.src1RF(this); |
+ _orr(T_Lo, Src0LoR, Src1LoRF); |
+ _mov(DestLo, T_Lo); |
+ _orr(T_Hi, Src0HiR, Src1HiRF); |
+ _mov(DestHi, T_Hi); |
+ return; |
+ } |
+ case InstArithmetic::Xor: { |
+ Variable *Src0LoR = SrcsLo.src0R(this); |
+ Operand *Src1LoRF = SrcsLo.src1RF(this); |
+ Variable *Src0HiR = SrcsHi.src0R(this); |
+ Operand *Src1HiRF = SrcsHi.src1RF(this); |
+ _eor(T_Lo, Src0LoR, Src1LoRF); |
+ _mov(DestLo, T_Lo); |
+ _eor(T_Hi, Src0HiR, Src1HiRF); |
+ _mov(DestHi, T_Hi); |
+ return; |
+ } |
+ case InstArithmetic::Sub: { |
+ Variable *Src0LoR = SrcsLo.src0R(this); |
+ Operand *Src1LoRF = SrcsLo.src1RF(this); |
+ Variable *Src0HiR = SrcsHi.src0R(this); |
+ Operand *Src1HiRF = SrcsHi.src1RF(this); |
+ if (SrcsLo.swappedOperands()) { |
+ _rsbs(T_Lo, Src0LoR, Src1LoRF); |
_mov(DestLo, T_Lo); |
- _orr(T_Hi, Src0RHi, Src1Hi); |
+ _rsc(T_Hi, Src0HiR, Src1HiRF); |
_mov(DestHi, T_Hi); |
- return; |
- case InstArithmetic::Xor: |
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
- _eor(T_Lo, Src0RLo, Src1Lo); |
+ } else { |
+ _subs(T_Lo, Src0LoR, Src1LoRF); |
_mov(DestLo, T_Lo); |
- _eor(T_Hi, Src0RHi, Src1Hi); |
+ _sbc(T_Hi, Src0HiR, Src1HiRF); |
_mov(DestHi, T_Hi); |
- return; |
- case InstArithmetic::Sub: |
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
- _subs(T_Lo, Src0RLo, Src1Lo); |
- _mov(DestLo, T_Lo); |
- _sbc(T_Hi, Src0RHi, Src1Hi); |
+ } |
+ return; |
+ } |
+ case InstArithmetic::Mul: { |
+ // GCC 4.8 does: |
+ // a=b*c ==> |
+ // t_acc =(mul) (b.lo * c.hi) |
+ // t_acc =(mla) (c.lo * b.hi) + t_acc |
+ // t.hi,t.lo =(umull) b.lo * c.lo |
+ // t.hi += t_acc |
+ // a.lo = t.lo |
+ // a.hi = t.hi |
+ // |
+ // LLVM does: |
+ // t.hi,t.lo =(umull) b.lo * c.lo |
+ // t.hi =(mla) (b.lo * c.hi) + t.hi |
+ // t.hi =(mla) (b.hi * c.lo) + t.hi |
+ // a.lo = t.lo |
+ // a.hi = t.hi |
+ // |
+ // LLVM's lowering has fewer instructions, but more register pressure: |
+ // t.lo is live from beginning to end, while GCC delays the two-dest |
+ // instruction till the end, and kills c.hi immediately. |
+ Variable *T_Acc = makeReg(IceType_i32); |
+ Variable *T_Acc1 = makeReg(IceType_i32); |
+ Variable *T_Hi1 = makeReg(IceType_i32); |
+ Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); |
+ Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); |
+ Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); |
+ Variable *Src1RHi = SrcsHi.unswappedSrc1R(this); |
+ _mul(T_Acc, Src0RLo, Src1RHi); |
+ _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
+ _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
+ _add(T_Hi, T_Hi1, T_Acc1); |
+ _mov(DestLo, T_Lo); |
+ _mov(DestHi, T_Hi); |
+ return; |
+ } |
+ case InstArithmetic::Shl: { |
+ if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { |
+ Variable *Src0RLo = SrcsLo.src0R(this); |
+ // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. |
+ const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; |
+ if (ShAmtImm == 0) { |
+ _mov(DestLo, Src0RLo); |
+ _mov(DestHi, SrcsHi.src0R(this)); |
+ return; |
+ } |
+ |
+ if (ShAmtImm >= 32) { |
+ if (ShAmtImm == 32) { |
+ _mov(DestHi, Src0RLo); |
+ } else { |
+ Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), |
+ Legal_Reg | Legal_Flex); |
+ _lsl(T_Hi, Src0RLo, ShAmtOp); |
+ _mov(DestHi, T_Hi); |
+ } |
+ |
+ Operand *_0 = |
+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
+ _mov(T_Lo, _0); |
+ _mov(DestLo, T_Lo); |
+ return; |
+ } |
+ |
+ Variable *Src0RHi = SrcsHi.src0R(this); |
+ Operand *ShAmtOp = |
+ legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); |
+ Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), |
+ Legal_Reg | Legal_Flex); |
+ _lsl(T_Hi, Src0RHi, ShAmtOp); |
+ _orr(T_Hi, T_Hi, |
+ OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
+ OperandARM32::LSR, ComplShAmtOp)); |
_mov(DestHi, T_Hi); |
- return; |
- case InstArithmetic::Mul: { |
- // GCC 4.8 does: |
- // a=b*c ==> |
- // t_acc =(mul) (b.lo * c.hi) |
- // t_acc =(mla) (c.lo * b.hi) + t_acc |
- // t.hi,t.lo =(umull) b.lo * c.lo |
- // t.hi += t_acc |
- // a.lo = t.lo |
- // a.hi = t.hi |
- // |
- // LLVM does: |
- // t.hi,t.lo =(umull) b.lo * c.lo |
- // t.hi =(mla) (b.lo * c.hi) + t.hi |
- // t.hi =(mla) (b.hi * c.lo) + t.hi |
- // a.lo = t.lo |
- // a.hi = t.hi |
- // |
- // LLVM's lowering has fewer instructions, but more register pressure: |
- // t.lo is live from beginning to end, while GCC delays the two-dest |
- // instruction till the end, and kills c.hi immediately. |
- Variable *T_Acc = makeReg(IceType_i32); |
- Variable *T_Acc1 = makeReg(IceType_i32); |
- Variable *T_Hi1 = makeReg(IceType_i32); |
- Variable *Src1RLo = legalizeToReg(Src1Lo); |
- Variable *Src1RHi = legalizeToReg(Src1Hi); |
- _mul(T_Acc, Src0RLo, Src1RHi); |
- _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
- _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
- _add(T_Hi, T_Hi1, T_Acc1); |
+ |
+ _lsl(T_Lo, Src0RLo, ShAmtOp); |
_mov(DestLo, T_Lo); |
- _mov(DestHi, T_Hi); |
- return; |
- } |
- case InstArithmetic::Shl: { |
- // a=b<<c ==> |
- // pnacl-llc does: |
- // mov t_b.lo, b.lo |
- // mov t_b.hi, b.hi |
- // mov t_c.lo, c.lo |
- // rsb T0, t_c.lo, #32 |
- // lsr T1, t_b.lo, T0 |
- // orr t_a.hi, T1, t_b.hi, lsl t_c.lo |
- // sub T2, t_c.lo, #32 |
- // cmp T2, #0 |
- // lslge t_a.hi, t_b.lo, T2 |
- // lsl t_a.lo, t_b.lo, t_c.lo |
- // mov a.lo, t_a.lo |
- // mov a.hi, t_a.hi |
- // |
- // GCC 4.8 does: |
- // sub t_c1, c.lo, #32 |
- // lsl t_hi, b.hi, c.lo |
- // orr t_hi, t_hi, b.lo, lsl t_c1 |
- // rsb t_c2, c.lo, #32 |
- // orr t_hi, t_hi, b.lo, lsr t_c2 |
- // lsl t_lo, b.lo, c.lo |
- // a.lo = t_lo |
- // a.hi = t_hi |
- // |
- // These are incompatible, therefore we mimic pnacl-llc. |
- // Can be strength-reduced for constant-shifts, but we don't do that for |
- // now. |
- // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
- // ARM, shifts only take the lower 8 bits of the shift register, and |
- // saturate to the range 0-32, so the negative value will saturate to 32. |
- Constant *_32 = Ctx->getConstantInt32(32); |
- Constant *_0 = Ctx->getConstantZero(IceType_i32); |
- Variable *Src1RLo = legalizeToReg(Src1Lo); |
- Variable *T0 = makeReg(IceType_i32); |
- Variable *T1 = makeReg(IceType_i32); |
- Variable *T2 = makeReg(IceType_i32); |
- Variable *TA_Hi = makeReg(IceType_i32); |
- Variable *TA_Lo = makeReg(IceType_i32); |
- _rsb(T0, Src1RLo, _32); |
- _lsr(T1, Src0RLo, T0); |
- _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
- OperandARM32::LSL, Src1RLo)); |
- _sub(T2, Src1RLo, _32); |
- _cmp(T2, _0); |
- _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); |
- _set_dest_redefined(); |
- _lsl(TA_Lo, Src0RLo, Src1RLo); |
- _mov(DestLo, TA_Lo); |
- _mov(DestHi, TA_Hi); |
return; |
} |
- case InstArithmetic::Lshr: |
- case InstArithmetic::Ashr: { |
- // a=b>>c |
- // pnacl-llc does: |
- // mov t_b.lo, b.lo |
- // mov t_b.hi, b.hi |
- // mov t_c.lo, c.lo |
- // lsr T0, t_b.lo, t_c.lo |
- // rsb T1, t_c.lo, #32 |
- // orr t_a.lo, T0, t_b.hi, lsl T1 |
- // sub T2, t_c.lo, #32 |
- // cmp T2, #0 |
- // [al]srge t_a.lo, t_b.hi, T2 |
- // [al]sr t_a.hi, t_b.hi, t_c.lo |
- // mov a.lo, t_a.lo |
- // mov a.hi, t_a.hi |
- // |
- // GCC 4.8 does (lsr): |
- // rsb t_c1, c.lo, #32 |
- // lsr t_lo, b.lo, c.lo |
- // orr t_lo, t_lo, b.hi, lsl t_c1 |
- // sub t_c2, c.lo, #32 |
- // orr t_lo, t_lo, b.hi, lsr t_c2 |
- // lsr t_hi, b.hi, c.lo |
- // mov a.lo, t_lo |
- // mov a.hi, t_hi |
- // |
- // These are incompatible, therefore we mimic pnacl-llc. |
- const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
- Constant *_32 = Ctx->getConstantInt32(32); |
- Constant *_0 = Ctx->getConstantZero(IceType_i32); |
- Variable *Src1RLo = legalizeToReg(Src1Lo); |
- Variable *T0 = makeReg(IceType_i32); |
- Variable *T1 = makeReg(IceType_i32); |
- Variable *T2 = makeReg(IceType_i32); |
- Variable *TA_Lo = makeReg(IceType_i32); |
- Variable *TA_Hi = makeReg(IceType_i32); |
- _lsr(T0, Src0RLo, Src1RLo); |
- _rsb(T1, Src1RLo, _32); |
- _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
- OperandARM32::LSL, T1)); |
- _sub(T2, Src1RLo, _32); |
- _cmp(T2, _0); |
- if (IsAshr) { |
- _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
- _set_dest_redefined(); |
- _asr(TA_Hi, Src0RHi, Src1RLo); |
+ |
+ // a=b<<c ==> |
+ // pnacl-llc does: |
+ // mov t_b.lo, b.lo |
+ // mov t_b.hi, b.hi |
+ // mov t_c.lo, c.lo |
+ // rsb T0, t_c.lo, #32 |
+ // lsr T1, t_b.lo, T0 |
+ // orr t_a.hi, T1, t_b.hi, lsl t_c.lo |
+ // sub T2, t_c.lo, #32 |
+ // cmp T2, #0 |
+ // lslge t_a.hi, t_b.lo, T2 |
+ // lsl t_a.lo, t_b.lo, t_c.lo |
+ // mov a.lo, t_a.lo |
+ // mov a.hi, t_a.hi |
+ // |
+ // GCC 4.8 does: |
+ // sub t_c1, c.lo, #32 |
+ // lsl t_hi, b.hi, c.lo |
+ // orr t_hi, t_hi, b.lo, lsl t_c1 |
+ // rsb t_c2, c.lo, #32 |
+ // orr t_hi, t_hi, b.lo, lsr t_c2 |
+ // lsl t_lo, b.lo, c.lo |
+ // a.lo = t_lo |
+ // a.hi = t_hi |
+ // |
+ // These are incompatible, therefore we mimic pnacl-llc. |
+ // Can be strength-reduced for constant-shifts, but we don't do that for |
+ // now. |
+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
+ // ARM, shifts only take the lower 8 bits of the shift register, and |
+ // saturate to the range 0-32, so the negative value will saturate to 32. |
+ Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
+ Operand *_0 = |
+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
+ Variable *T0 = makeReg(IceType_i32); |
+ Variable *T1 = makeReg(IceType_i32); |
+ Variable *T2 = makeReg(IceType_i32); |
+ Variable *TA_Hi = makeReg(IceType_i32); |
+ Variable *TA_Lo = makeReg(IceType_i32); |
+ Variable *Src0RLo = SrcsLo.src0R(this); |
+ Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); |
+ Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); |
+ _rsb(T0, Src1RLo, _32); |
+ _lsr(T1, Src0RLo, T0); |
+ _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
+ OperandARM32::LSL, Src1RLo)); |
+ _sub(T2, Src1RLo, _32); |
+ _cmp(T2, _0); |
+ _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); |
+ _set_dest_redefined(); |
+ _lsl(TA_Lo, Src0RLo, Src1RLo); |
+ _mov(DestLo, TA_Lo); |
+ _mov(DestHi, TA_Hi); |
+ return; |
+ } |
+ case InstArithmetic::Lshr: |
+ case InstArithmetic::Ashr: { |
+ const bool ASR = Op == InstArithmetic::Ashr; |
+ if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) { |
+ Variable *Src0RHi = SrcsHi.src0R(this); |
+ // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. |
+ const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F; |
+ if (ShAmtImm == 0) { |
+ _mov(DestHi, Src0RHi); |
+ _mov(DestLo, SrcsLo.src0R(this)); |
+ return; |
+ } |
+ |
+ if (ShAmtImm >= 32) { |
+ if (ShAmtImm == 32) { |
+ _mov(DestLo, Src0RHi); |
+ } else { |
+ Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), |
+ Legal_Reg | Legal_Flex); |
+ if (ASR) { |
+ _asr(T_Lo, Src0RHi, ShAmtOp); |
+ } else { |
+ _lsr(T_Lo, Src0RHi, ShAmtOp); |
+ } |
+ _mov(DestLo, T_Lo); |
+ } |
+ |
+ if (ASR) { |
+ Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32), |
+ Legal_Reg | Legal_Flex); |
+ _asr(T_Hi, Src0RHi, _31); |
+ } else { |
+ Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), |
+ Legal_Reg | Legal_Flex); |
+ _mov(T_Hi, _0); |
+ } |
+ _mov(DestHi, T_Hi); |
+ return; |
+ } |
+ |
+ Variable *Src0RLo = SrcsLo.src0R(this); |
+ Operand *ShAmtOp = |
+ legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); |
+ Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), |
+ Legal_Reg | Legal_Flex); |
+ _lsr(T_Lo, Src0RLo, ShAmtOp); |
+ _orr(T_Lo, T_Lo, |
+ OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
+ OperandARM32::LSL, ComplShAmtOp)); |
+ _mov(DestLo, T_Lo); |
+ |
+ if (ASR) { |
+ _asr(T_Hi, Src0RHi, ShAmtOp); |
} else { |
- _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
- _set_dest_redefined(); |
- _lsr(TA_Hi, Src0RHi, Src1RLo); |
+ _lsr(T_Hi, Src0RHi, ShAmtOp); |
} |
- _mov(DestLo, TA_Lo); |
- _mov(DestHi, TA_Hi); |
+ _mov(DestHi, T_Hi); |
return; |
} |
- case InstArithmetic::Fadd: |
- case InstArithmetic::Fsub: |
- case InstArithmetic::Fmul: |
- case InstArithmetic::Fdiv: |
- case InstArithmetic::Frem: |
- llvm_unreachable("FP instruction with i64 type"); |
- return; |
- case InstArithmetic::Udiv: |
- case InstArithmetic::Sdiv: |
- case InstArithmetic::Urem: |
- case InstArithmetic::Srem: |
- llvm_unreachable("Call-helper-involved instruction for i64 type " |
- "should have already been handled before"); |
- return; |
+ |
+ // a=b>>c |
+ // pnacl-llc does: |
+ // mov t_b.lo, b.lo |
+ // mov t_b.hi, b.hi |
+ // mov t_c.lo, c.lo |
+ // lsr T0, t_b.lo, t_c.lo |
+ // rsb T1, t_c.lo, #32 |
+ // orr t_a.lo, T0, t_b.hi, lsl T1 |
+ // sub T2, t_c.lo, #32 |
+ // cmp T2, #0 |
+ // [al]srge t_a.lo, t_b.hi, T2 |
+ // [al]sr t_a.hi, t_b.hi, t_c.lo |
+ // mov a.lo, t_a.lo |
+ // mov a.hi, t_a.hi |
+ // |
+ // GCC 4.8 does (lsr): |
+ // rsb t_c1, c.lo, #32 |
+ // lsr t_lo, b.lo, c.lo |
+ // orr t_lo, t_lo, b.hi, lsl t_c1 |
+ // sub t_c2, c.lo, #32 |
+ // orr t_lo, t_lo, b.hi, lsr t_c2 |
+ // lsr t_hi, b.hi, c.lo |
+ // mov a.lo, t_lo |
+ // mov a.hi, t_hi |
+ // |
+ // These are incompatible, therefore we mimic pnacl-llc. |
+ Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
+ Operand *_0 = |
+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
+ Variable *T0 = makeReg(IceType_i32); |
+ Variable *T1 = makeReg(IceType_i32); |
+ Variable *T2 = makeReg(IceType_i32); |
+ Variable *TA_Lo = makeReg(IceType_i32); |
+ Variable *TA_Hi = makeReg(IceType_i32); |
+ Variable *Src0RLo = SrcsLo.unswappedSrc0R(this); |
+ Variable *Src0RHi = SrcsHi.unswappedSrc0R(this); |
+ Variable *Src1RLo = SrcsLo.unswappedSrc1R(this); |
+ _lsr(T0, Src0RLo, Src1RLo); |
+ _rsb(T1, Src1RLo, _32); |
+ _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
+ OperandARM32::LSL, T1)); |
+ _sub(T2, Src1RLo, _32); |
+ _cmp(T2, _0); |
+ if (ASR) { |
+ _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
+ _set_dest_redefined(); |
+ _asr(TA_Hi, Src0RHi, Src1RLo); |
+ } else { |
+ _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
+ _set_dest_redefined(); |
+ _lsr(TA_Hi, Src0RHi, Src1RLo); |
} |
+ _mov(DestLo, TA_Lo); |
+ _mov(DestHi, TA_Hi); |
return; |
- } else if (isVectorType(Dest->getType())) { |
+ } |
+ case InstArithmetic::Fadd: |
+ case InstArithmetic::Fsub: |
+ case InstArithmetic::Fmul: |
+ case InstArithmetic::Fdiv: |
+ case InstArithmetic::Frem: |
+ llvm::report_fatal_error("FP instruction with i64 type"); |
+ return; |
+ case InstArithmetic::Udiv: |
+ case InstArithmetic::Sdiv: |
+ case InstArithmetic::Urem: |
+ case InstArithmetic::Srem: |
+ llvm::report_fatal_error("Call-helper-involved instruction for i64 type " |
+ "should have already been handled before"); |
+ return; |
+ } |
+} |
+ |
+void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
+ Variable *Dest = Inst->getDest(); |
+ if (Dest->getType() == IceType_i1) { |
+ lowerInt1Arithmetic(Inst); |
+ return; |
+ } |
+ |
+ Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
+ Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
+ if (Dest->getType() == IceType_i64) { |
+ lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); |
+ return; |
+ } |
+ |
+ if (isVectorType(Dest->getType())) { |
// Add a fake def to keep liveness consistent in the meantime. |
Variable *T = makeReg(Dest->getType()); |
Context.insert(InstFakeDef::create(Func, T)); |
@@ -1694,41 +1977,49 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
UnimplementedError(Func->getContext()->getFlags()); |
return; |
} |
+ |
// Dest->getType() is a non-i64 scalar. |
- Variable *Src0R = legalizeToReg(Src0); |
Variable *T = makeReg(Dest->getType()); |
- // Handle div/rem separately. They require a non-legalized Src1 to inspect |
+ |
+ // * Handle div/rem separately. They require a non-legalized Src1 to inspect |
// whether or not Src1 is a non-zero constant. Once legalized it is more |
// difficult to determine (constant may be moved to a register). |
+ // * Handle floating point arithmetic separately: they require Src1 to be |
+ // legalized to a register. |
switch (Inst->getOp()) { |
default: |
break; |
case InstArithmetic::Udiv: { |
constexpr bool NotRemainder = false; |
+ Variable *Src0R = legalizeToReg(Src0); |
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
H_udiv_i32, NotRemainder); |
return; |
} |
case InstArithmetic::Sdiv: { |
constexpr bool NotRemainder = false; |
+ Variable *Src0R = legalizeToReg(Src0); |
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, |
H_sdiv_i32, NotRemainder); |
return; |
} |
case InstArithmetic::Urem: { |
constexpr bool IsRemainder = true; |
+ Variable *Src0R = legalizeToReg(Src0); |
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv, |
H_urem_i32, IsRemainder); |
return; |
} |
case InstArithmetic::Srem: { |
constexpr bool IsRemainder = true; |
+ Variable *Src0R = legalizeToReg(Src0); |
lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv, |
H_srem_i32, IsRemainder); |
return; |
} |
case InstArithmetic::Frem: { |
- const SizeT MaxSrcs = 2; |
+ constexpr SizeT MaxSrcs = 2; |
+ Variable *Src0R = legalizeToReg(Src0); |
Type Ty = Dest->getType(); |
InstCall *Call = makeHelperCall( |
isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
@@ -1737,32 +2028,29 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
lowerCall(Call); |
return; |
} |
- } |
- |
- // Handle floating point arithmetic separately: they require Src1 to be |
- // legalized to a register. |
- switch (Inst->getOp()) { |
- default: |
- break; |
case InstArithmetic::Fadd: { |
+ Variable *Src0R = legalizeToReg(Src0); |
Variable *Src1R = legalizeToReg(Src1); |
_vadd(T, Src0R, Src1R); |
_mov(Dest, T); |
return; |
} |
case InstArithmetic::Fsub: { |
+ Variable *Src0R = legalizeToReg(Src0); |
Variable *Src1R = legalizeToReg(Src1); |
_vsub(T, Src0R, Src1R); |
_mov(Dest, T); |
return; |
} |
case InstArithmetic::Fmul: { |
+ Variable *Src0R = legalizeToReg(Src0); |
Variable *Src1R = legalizeToReg(Src1); |
_vmul(T, Src0R, Src1R); |
_mov(Dest, T); |
return; |
} |
case InstArithmetic::Fdiv: { |
+ Variable *Src0R = legalizeToReg(Src0); |
Variable *Src1R = legalizeToReg(Src1); |
_vdiv(T, Src0R, Src1R); |
_mov(Dest, T); |
@@ -1770,67 +2058,136 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
} |
} |
- Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
+ // Handle everything else here. |
+ Int32Operands Srcs(Src0, Src1); |
switch (Inst->getOp()) { |
case InstArithmetic::_num: |
- llvm_unreachable("Unknown arithmetic operator"); |
+ llvm::report_fatal_error("Unknown arithmetic operator"); |
return; |
- case InstArithmetic::Add: |
+ case InstArithmetic::Add: { |
+ if (Srcs.hasConstOperand()) { |
+ if (!Srcs.immediateIsFlexEncodable() && |
+ Srcs.negatedImmediateIsFlexEncodable()) { |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1F = Srcs.negatedSrc1F(this); |
+ if (!Srcs.swappedOperands()) { |
+ _sub(T, Src0R, Src1F); |
+ } else { |
+ _rsb(T, Src0R, Src1F); |
+ } |
+ _mov(Dest, T); |
+ return; |
+ } |
+ } |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1RF = Srcs.src1RF(this); |
_add(T, Src0R, Src1RF); |
_mov(Dest, T); |
return; |
- case InstArithmetic::And: |
+ } |
+ case InstArithmetic::And: { |
+ if (Srcs.hasConstOperand()) { |
+ if (!Srcs.immediateIsFlexEncodable() && |
+ Srcs.invertedImmediateIsFlexEncodable()) { |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1F = Srcs.invertedSrc1F(this); |
+ _bic(T, Src0R, Src1F); |
+ _mov(Dest, T); |
+ return; |
+ } |
+ } |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1RF = Srcs.src1RF(this); |
_and(T, Src0R, Src1RF); |
_mov(Dest, T); |
return; |
- case InstArithmetic::Or: |
+ } |
+ case InstArithmetic::Or: { |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1RF = Srcs.src1RF(this); |
_orr(T, Src0R, Src1RF); |
_mov(Dest, T); |
return; |
- case InstArithmetic::Xor: |
+ } |
+ case InstArithmetic::Xor: { |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1RF = Srcs.src1RF(this); |
_eor(T, Src0R, Src1RF); |
_mov(Dest, T); |
return; |
- case InstArithmetic::Sub: |
- _sub(T, Src0R, Src1RF); |
+ } |
+ case InstArithmetic::Sub: { |
+ if (Srcs.hasConstOperand()) { |
+ Variable *Src0R = Srcs.src0R(this); |
+ if (Srcs.immediateIsFlexEncodable()) { |
+ Operand *Src1RF = Srcs.src1RF(this); |
+ if (Srcs.swappedOperands()) { |
+ _rsb(T, Src0R, Src1RF); |
+ } else { |
+ _sub(T, Src0R, Src1RF); |
+ } |
+ _mov(Dest, T); |
+ return; |
+ } |
+ if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { |
+ Operand *Src1F = Srcs.negatedSrc1F(this); |
+ _add(T, Src0R, Src1F); |
+ _mov(Dest, T); |
+ return; |
+ } |
+ } |
+ Variable *Src0R = Srcs.unswappedSrc0R(this); |
+ Variable *Src1R = Srcs.unswappedSrc1R(this); |
+ _sub(T, Src0R, Src1R); |
_mov(Dest, T); |
return; |
+ } |
case InstArithmetic::Mul: { |
- Variable *Src1R = legalizeToReg(Src1RF); |
+ Variable *Src0R = Srcs.unswappedSrc0R(this); |
+ Variable *Src1R = Srcs.unswappedSrc1R(this); |
_mul(T, Src0R, Src1R); |
_mov(Dest, T); |
return; |
} |
- case InstArithmetic::Shl: |
- _lsl(T, Src0R, Src1RF); |
+ case InstArithmetic::Shl: { |
+ Variable *Src0R = Srcs.unswappedSrc0R(this); |
+ Operand *Src1R = Srcs.unswappedSrc1RF(this); |
+ _lsl(T, Src0R, Src1R); |
_mov(Dest, T); |
return; |
- case InstArithmetic::Lshr: |
+ } |
+ case InstArithmetic::Lshr: { |
+ Variable *Src0R = Srcs.unswappedSrc0R(this); |
if (Dest->getType() != IceType_i32) { |
_uxt(Src0R, Src0R); |
} |
- _lsr(T, Src0R, Src1RF); |
+ _lsr(T, Src0R, Srcs.unswappedSrc1RF(this)); |
_mov(Dest, T); |
return; |
- case InstArithmetic::Ashr: |
+ } |
+ case InstArithmetic::Ashr: { |
+ Variable *Src0R = Srcs.unswappedSrc0R(this); |
if (Dest->getType() != IceType_i32) { |
_sxt(Src0R, Src0R); |
} |
- _asr(T, Src0R, Src1RF); |
+ _asr(T, Src0R, Srcs.unswappedSrc1RF(this)); |
_mov(Dest, T); |
return; |
+ } |
case InstArithmetic::Udiv: |
case InstArithmetic::Sdiv: |
case InstArithmetic::Urem: |
case InstArithmetic::Srem: |
- llvm_unreachable("Integer div/rem should have been handled earlier."); |
+ llvm::report_fatal_error( |
+ "Integer div/rem should have been handled earlier."); |
return; |
case InstArithmetic::Fadd: |
case InstArithmetic::Fsub: |
case InstArithmetic::Fmul: |
case InstArithmetic::Fdiv: |
case InstArithmetic::Frem: |
- llvm_unreachable("Floating point arith should have been handled earlier."); |
+ llvm::report_fatal_error( |
+ "Floating point arith should have been handled earlier."); |
return; |
} |
} |
@@ -1841,40 +2198,39 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
assert(Dest->getType() == Src0->getType()); |
if (Dest->getType() == IceType_i64) { |
Src0 = legalizeUndef(Src0); |
- Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
- Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
- Variable *T_Lo = makeReg(IceType_i32); |
- Variable *T_Hi = makeReg(IceType_i32); |
+ Variable *T_Lo = makeReg(IceType_i32); |
+ auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
_mov(T_Lo, Src0Lo); |
_mov(DestLo, T_Lo); |
+ |
+ Variable *T_Hi = makeReg(IceType_i32); |
+ auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
_mov(T_Hi, Src0Hi); |
_mov(DestHi, T_Hi); |
+ |
+ return; |
+ } |
+ |
+ Operand *NewSrc; |
+ if (Dest->hasReg()) { |
+ // If Dest already has a physical register, then legalize the Src operand |
+ // into a Variable with the same register assignment. This especially |
+ // helps allow the use of Flex operands. |
+ NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
} else { |
- Operand *NewSrc; |
- if (Dest->hasReg()) { |
- // If Dest already has a physical register, then legalize the Src operand |
- // into a Variable with the same register assignment. This especially |
- // helps allow the use of Flex operands. |
- NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
- } else { |
- // Dest could be a stack operand. Since we could potentially need to do a |
- // Store (and store can only have Register operands), legalize this to a |
- // register. |
- NewSrc = legalize(Src0, Legal_Reg); |
- } |
- if (isVectorType(Dest->getType())) { |
- Variable *SrcR = legalizeToReg(NewSrc); |
- _mov(Dest, SrcR); |
- } else if (isFloatingType(Dest->getType())) { |
- Variable *SrcR = legalizeToReg(NewSrc); |
- _mov(Dest, SrcR); |
- } else { |
- _mov(Dest, NewSrc); |
- } |
+ // Dest could be a stack operand. Since we could potentially need to do a |
+ // Store (and store can only have Register operands), legalize this to a |
+ // register. |
+ NewSrc = legalize(Src0, Legal_Reg); |
} |
+ |
+ if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { |
+ NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); |
+ } |
+ _mov(Dest, NewSrc); |
} |
TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
@@ -2580,6 +2936,18 @@ struct { |
FCMPARM32_TABLE |
#undef X |
}; |
+ |
+bool isFloatingPointZero(Operand *Src) { |
+ if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { |
+ return Utils::isPositiveZero(F32->getValue()); |
+ } |
+ |
+ if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { |
+ return Utils::isPositiveZero(F64->getValue()); |
+ } |
+ |
+ return false; |
+} |
} // end of anonymous namespace |
TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
@@ -2592,8 +2960,12 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
break; |
default: { |
Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
- Variable *Src1R = legalizeToReg(Instr->getSrc(1)); |
- _vcmp(Src0R, Src1R); |
+ Operand *Src1 = Instr->getSrc(1); |
+ if (isFloatingPointZero(Src1)) { |
+ _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); |
+ } else { |
+ _vcmp(Src0R, legalizeToReg(Src1)); |
+ } |
_vmrs(); |
assert(Condition < llvm::array_lengthof(TableFcmp)); |
return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); |
@@ -2642,12 +3014,87 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
_mov(Dest, T); |
} |
-TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
- assert(Inst->getSrc(0)->getType() != IceType_i1); |
- assert(Inst->getSrc(1)->getType() != IceType_i1); |
+TargetARM32::CondWhenTrue |
+TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
+ Operand *Src1) { |
+ size_t Index = static_cast<size_t>(Condition); |
+ assert(Index < llvm::array_lengthof(TableIcmp64)); |
+ |
+ Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1)); |
+ Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1)); |
+ assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand()); |
+ assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands()); |
+ |
+ if (SrcsLo.hasConstOperand()) { |
+ const uint32_t ValueLo = SrcsLo.getConstantValue(); |
+ const uint32_t ValueHi = SrcsHi.getConstantValue(); |
+ const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo; |
+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && |
+ Value == 0) { |
+ Variable *T = makeReg(IceType_i32); |
+ Variable *Src0LoR = SrcsLo.src0R(this); |
+ Variable *Src0HiR = SrcsHi.src0R(this); |
+ _orrs(T, Src0LoR, Src0HiR); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ return CondWhenTrue(TableIcmp64[Index].C1); |
+ } |
- Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
- Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
+ Variable *Src0RLo = SrcsLo.src0R(this); |
+ Variable *Src0RHi = SrcsHi.src0R(this); |
+ Operand *Src1RFLo = SrcsLo.src1RF(this); |
+ Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this); |
+ |
+ const bool UseRsb = TableIcmp64[Index].Swapped != SrcsLo.swappedOperands(); |
+ |
+ if (UseRsb) { |
+ if (TableIcmp64[Index].IsSigned) { |
+ Variable *T = makeReg(IceType_i32); |
+ _rsbs(T, Src0RLo, Src1RFLo); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ |
+ T = makeReg(IceType_i32); |
+ _rscs(T, Src0RHi, Src1RFHi); |
+ // We need to add a FakeUse here because liveness gets mad at us (Def |
+ // without Use.) Note that flag-setting instructions are considered to |
+ // have side effects and, therefore, are not DCE'ed. |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ } else { |
+ Variable *T = makeReg(IceType_i32); |
+ _rsbs(T, Src0RHi, Src1RFHi); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ |
+ T = makeReg(IceType_i32); |
+ _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ } |
+ } else { |
+ if (TableIcmp64[Index].IsSigned) { |
+ _cmp(Src0RLo, Src1RFLo); |
+ Variable *T = makeReg(IceType_i32); |
+ _sbcs(T, Src0RHi, Src1RFHi); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ } else { |
+ _cmp(Src0RHi, Src1RFHi); |
+ _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); |
+ } |
+ } |
+ |
+ return CondWhenTrue(TableIcmp64[Index].C1); |
+ } |
+ |
+ Variable *Src0RLo, *Src0RHi; |
+ Operand *Src1RFLo, *Src1RFHi; |
+ if (TableIcmp64[Index].Swapped) { |
+ Src0RLo = legalizeToReg(loOperand(Src1)); |
+ Src0RHi = legalizeToReg(hiOperand(Src1)); |
+ Src1RFLo = legalizeToReg(loOperand(Src0)); |
+ Src1RFHi = legalizeToReg(hiOperand(Src0)); |
+ } else { |
+ Src0RLo = legalizeToReg(loOperand(Src0)); |
+ Src0RHi = legalizeToReg(hiOperand(Src0)); |
+ Src1RFLo = legalizeToReg(loOperand(Src1)); |
+ Src1RFHi = legalizeToReg(hiOperand(Src1)); |
+ } |
// a=icmp cond, b, c ==> |
// GCC does: |
@@ -2678,38 +3125,111 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
// |
// So, we are going with the GCC version since it's usually better (except |
// perhaps for eq/ne). We could revisit special-casing eq/ne later. |
+ if (TableIcmp64[Index].IsSigned) { |
+ Variable *ScratchReg = makeReg(IceType_i32); |
+ _cmp(Src0RLo, Src1RFLo); |
+ _sbcs(ScratchReg, Src0RHi, Src1RFHi); |
+ // ScratchReg isn't going to be used, but we need the side-effect of |
+ // setting flags from this operation. |
+ Context.insert(InstFakeUse::create(Func, ScratchReg)); |
+ } else { |
+ _cmp(Src0RHi, Src1RFHi); |
+ _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); |
+ } |
+ return CondWhenTrue(TableIcmp64[Index].C1); |
+} |
- if (Src0->getType() == IceType_i64) { |
- InstIcmp::ICond Conditon = Inst->getCondition(); |
- size_t Index = static_cast<size_t>(Conditon); |
- assert(Index < llvm::array_lengthof(TableIcmp64)); |
- Variable *Src0Lo, *Src0Hi; |
- Operand *Src1LoRF, *Src1HiRF; |
- if (TableIcmp64[Index].Swapped) { |
- Src0Lo = legalizeToReg(loOperand(Src1)); |
- Src0Hi = legalizeToReg(hiOperand(Src1)); |
- Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
- Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
- } else { |
- Src0Lo = legalizeToReg(loOperand(Src0)); |
- Src0Hi = legalizeToReg(hiOperand(Src0)); |
- Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
- Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
- } |
- if (TableIcmp64[Index].IsSigned) { |
- Variable *ScratchReg = makeReg(IceType_i32); |
- _cmp(Src0Lo, Src1LoRF); |
- _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
- // ScratchReg isn't going to be used, but we need the side-effect of |
- // setting flags from this operation. |
- Context.insert(InstFakeUse::create(Func, ScratchReg)); |
- } else { |
- _cmp(Src0Hi, Src1HiRF); |
- _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
- } |
- return CondWhenTrue(TableIcmp64[Index].C1); |
+TargetARM32::CondWhenTrue |
+TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
+ Operand *Src1) { |
+ Int32Operands Srcs(Src0, Src1); |
+ if (!Srcs.hasConstOperand()) { |
+ |
+ Variable *Src0R = Srcs.src0R(this); |
+ Operand *Src1RF = Srcs.src1RF(this); |
+ _cmp(Src0R, Src1RF); |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+ } |
+ |
+ Variable *Src0R = Srcs.src0R(this); |
+ const int32_t Value = Srcs.getConstantValue(); |
+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { |
+ _tst(Src0R, Src0R); |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+ } |
+ |
+ if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() && |
+ Srcs.negatedImmediateIsFlexEncodable()) { |
+ Operand *Src1F = Srcs.negatedSrc1F(this); |
+ _cmn(Src0R, Src1F); |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+ } |
+ |
+ Operand *Src1RF = Srcs.src1RF(this); |
+ if (!Srcs.swappedOperands()) { |
+ _cmp(Src0R, Src1RF); |
+ } else { |
+ Variable *T = makeReg(IceType_i32); |
+ _rsbs(T, Src0R, Src1RF); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ } |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+} |
+ |
+TargetARM32::CondWhenTrue |
+TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
+ Operand *Src1) { |
+ Int32Operands Srcs(Src0, Src1); |
+ const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
+ assert(ShAmt >= 0); |
+ |
+ if (!Srcs.hasConstOperand()) { |
+ Variable *Src0R = makeReg(IceType_i32); |
+ Operand *ShAmtF = |
+ legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex); |
+ _lsl(Src0R, legalizeToReg(Src0), ShAmtF); |
+ |
+ Variable *Src1R = legalizeToReg(Src1); |
+ OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create( |
+ Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF); |
+ _cmp(Src0R, Src1F); |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+ } |
+ |
+ const int32_t Value = Srcs.getConstantValue(); |
+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) { |
+ Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt); |
+ Variable *T = makeReg(IceType_i32); |
+ _lsls(T, Srcs.src0R(this), ShAmtOp); |
+ Context.insert(InstFakeUse::create(Func, T)); |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+ } |
+ |
+ Variable *ConstR = makeReg(IceType_i32); |
+ _mov(ConstR, |
+ legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex)); |
+ Operand *NonConstF = OperandARM32FlexReg::create( |
+ Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL, |
+ Ctx->getConstantInt32(ShAmt)); |
+ |
+ if (Srcs.swappedOperands()) { |
+ _cmp(ConstR, NonConstF); |
+ } else { |
+ Variable *T = makeReg(IceType_i32); |
+ _rsbs(T, ConstR, NonConstF); |
+ Context.insert(InstFakeUse::create(Func, T)); |
} |
+ return CondWhenTrue(getIcmp32Mapping(Condition)); |
+} |
+ |
+TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
+ assert(Inst->getSrc(0)->getType() != IceType_i1); |
+ assert(Inst->getSrc(1)->getType() != IceType_i1); |
+ |
+ Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
+ Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
+ const InstIcmp::ICond Condition = Inst->getCondition(); |
// a=icmp cond b, c ==> |
// GCC does: |
// <u/s>xtb tb, b |
@@ -2739,27 +3259,17 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
// |
// We'll go with the LLVM way for now, since it's shorter and has just as few |
// dependencies. |
- int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
- assert(ShiftAmt >= 0); |
- Constant *ShiftConst = nullptr; |
- Variable *Src0R = nullptr; |
- if (ShiftAmt) { |
- ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
- Src0R = makeReg(IceType_i32); |
- _lsl(Src0R, legalizeToReg(Src0), ShiftConst); |
- } else { |
- Src0R = legalizeToReg(Src0); |
- } |
- if (ShiftAmt) { |
- Variable *Src1R = legalizeToReg(Src1); |
- OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( |
- Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); |
- _cmp(Src0R, Src1RShifted); |
- } else { |
- Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
- _cmp(Src0R, Src1RF); |
+ switch (Src0->getType()) { |
+ default: |
+ llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); |
+ case IceType_i8: |
+ case IceType_i16: |
+ return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1); |
+ case IceType_i32: |
+ return lowerInt32IcmpCond(Condition, Src0, Src1); |
+ case IceType_i64: |
+ return lowerInt64IcmpCond(Condition, Src0, Src1); |
} |
- return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); |
} |
void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
@@ -4254,13 +4764,24 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
return Reg; |
} else { |
assert(isScalarFloatingType(Ty)); |
+ uint32_t ModifiedImm; |
+ if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
+ Variable *T = makeReg(Ty, RegNum); |
+ _mov(T, |
+ OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
+ return T; |
+ } |
+ |
+ if (Ty == IceType_f64 && isFloatingPointZero(From)) { |
+ // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 |
+ // because ARM does not have a veor instruction with S registers. |
+ Variable *T = makeReg(IceType_f64, RegNum); |
+ Context.insert(InstFakeDef::create(Func, T)); |
+ _veor(T, T, T); |
+ return T; |
+ } |
+ |
// Load floats/doubles from literal pool. |
- // TODO(jvoung): Allow certain immediates to be encoded directly in an |
- // operand. See Table A7-18 of the ARM manual: "Floating-point modified |
- // immediate constants". Or, for 32-bit floating point numbers, just |
- // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG |
- // instead of using a movw/movt pair to get the const-pool address then |
- // loading to SREG. |
std::string Buffer; |
llvm::raw_string_ostream StrBuf(Buffer); |
llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |