Index: src/IceTargetLoweringARM32.cpp |
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
index d65b5469c18c7f6b175b9a916c9474ec7bc37e6b..73eb77c0f841b873fb542cbee4b895a3c82353d7 100644 |
--- a/src/IceTargetLoweringARM32.cpp |
+++ b/src/IceTargetLoweringARM32.cpp |
@@ -540,40 +540,135 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
// Or it may be the case that the operands aren't swapped, but the |
// bits can be flipped and a different operation applied. |
// E.g., use BIC (bit clear) instead of AND for some masks. |
- Variable *Src0 = legalizeToVar(Inst->getSrc(0)); |
- Operand *Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex); |
- (void)Src0; |
- (void)Src1; |
+ Operand *Src0 = Inst->getSrc(0); |
+ Operand *Src1 = Inst->getSrc(1); |
if (Dest->getType() == IceType_i64) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- } else if (isVectorType(Dest->getType())) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- } else { // Dest->getType() is non-i64 scalar |
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Variable *Src0RLo = legalizeToVar(loOperand(Src0)); |
+ Variable *Src0RHi = legalizeToVar(hiOperand(Src0)); |
+ Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
+ Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
+ Variable *T_Lo = makeReg(DestLo->getType()); |
+ Variable *T_Hi = makeReg(DestHi->getType()); |
switch (Inst->getOp()) { |
case InstArithmetic::_num: |
llvm_unreachable("Unknown arithmetic operator"); |
break; |
- case InstArithmetic::Add: { |
- UnimplementedError(Func->getContext()->getFlags()); |
- // Variable *T = makeReg(Dest->getType()); |
- // _add(T, Src0, Src1); |
- // _mov(Dest, T); |
- } break; |
+ case InstArithmetic::Add: |
+ _adds(T_Lo, Src0RLo, Src1Lo); |
+ _mov(DestLo, T_Lo); |
+ _adc(T_Hi, Src0RHi, Src1Hi); |
+ _mov(DestHi, T_Hi); |
+ break; |
case InstArithmetic::And: |
- UnimplementedError(Func->getContext()->getFlags()); |
+ _and(T_Lo, Src0RLo, Src1Lo); |
+ _mov(DestLo, T_Lo); |
+ _and(T_Hi, Src0RHi, Src1Hi); |
+ _mov(DestHi, T_Hi); |
break; |
case InstArithmetic::Or: |
- UnimplementedError(Func->getContext()->getFlags()); |
+ _orr(T_Lo, Src0RLo, Src1Lo); |
+ _mov(DestLo, T_Lo); |
+ _orr(T_Hi, Src0RHi, Src1Hi); |
+ _mov(DestHi, T_Hi); |
break; |
case InstArithmetic::Xor: |
- UnimplementedError(Func->getContext()->getFlags()); |
+ _eor(T_Lo, Src0RLo, Src1Lo); |
+ _mov(DestLo, T_Lo); |
+ _eor(T_Hi, Src0RHi, Src1Hi); |
+ _mov(DestHi, T_Hi); |
break; |
case InstArithmetic::Sub: |
- UnimplementedError(Func->getContext()->getFlags()); |
+ _subs(T_Lo, Src0RLo, Src1Lo); |
+ _mov(DestLo, T_Lo); |
+ _sbc(T_Hi, Src0RHi, Src1Hi); |
+ _mov(DestHi, T_Hi); |
break; |
- case InstArithmetic::Mul: |
+ case InstArithmetic::Mul: { |
+ // GCC 4.8 does: |
+ // a=b*c ==> |
+ // t_acc =(mul) (b.lo * c.hi) |
+ // t_acc =(mla) (c.lo * b.hi) + t_acc |
+ // t.hi,t.lo =(umull) b.lo * c.lo |
+ // t.hi += t_acc |
+ // a.lo = t.lo |
+ // a.hi = t.hi |
+ // |
+ // LLVM does: |
+ // t.hi,t.lo =(umull) b.lo * c.lo |
+ // t.hi =(mla) (b.lo * c.hi) + t.hi |
+ // t.hi =(mla) (b.hi * c.lo) + t.hi |
+ // a.lo = t.lo |
+ // a.hi = t.hi |
+ // |
+ // LLVM's lowering has fewer instructions, but more register pressure: |
+ // t.lo is live from beginning to end, while GCC delays the two-dest |
+ // instruction till the end, and kills c.hi immediately. |
+ Variable *T_Acc = makeReg(IceType_i32); |
+ Variable *T_Acc1 = makeReg(IceType_i32); |
+ Variable *T_Hi1 = makeReg(IceType_i32); |
+ Variable *Src1RLo = legalizeToVar(Src1Lo); |
+ Variable *Src1RHi = legalizeToVar(Src1Hi); |
+ _mul(T_Acc, Src0RLo, Src1RHi); |
+ _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
+ _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
+ _add(T_Hi, T_Hi1, T_Acc1); |
+ _mov(DestLo, T_Lo); |
+ _mov(DestHi, T_Hi); |
+ } break; |
+ case InstArithmetic::Shl: |
+ case InstArithmetic::Lshr: |
+ case InstArithmetic::Ashr: |
+ case InstArithmetic::Udiv: |
+ case InstArithmetic::Sdiv: |
+ case InstArithmetic::Urem: |
+ case InstArithmetic::Srem: |
UnimplementedError(Func->getContext()->getFlags()); |
break; |
+ case InstArithmetic::Fadd: |
+ case InstArithmetic::Fsub: |
+ case InstArithmetic::Fmul: |
+ case InstArithmetic::Fdiv: |
+ case InstArithmetic::Frem: |
+ llvm_unreachable("FP instruction with i64 type"); |
+ break; |
+ } |
+ } else if (isVectorType(Dest->getType())) { |
+ UnimplementedError(Func->getContext()->getFlags()); |
+ } else { // Dest->getType() is non-i64 scalar |
+ Variable *Src0R = legalizeToVar(Inst->getSrc(0)); |
+ Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex); |
+ Variable *T = makeReg(Dest->getType()); |
+ switch (Inst->getOp()) { |
+ case InstArithmetic::_num: |
+ llvm_unreachable("Unknown arithmetic operator"); |
+ break; |
+ case InstArithmetic::Add: { |
+ _add(T, Src0R, Src1); |
+ _mov(Dest, T); |
+ } break; |
+ case InstArithmetic::And: { |
+ _and(T, Src0R, Src1); |
+ _mov(Dest, T); |
+ } break; |
+ case InstArithmetic::Or: { |
+ _orr(T, Src0R, Src1); |
+ _mov(Dest, T); |
+ } break; |
+ case InstArithmetic::Xor: { |
+ _eor(T, Src0R, Src1); |
+ _mov(Dest, T); |
+ } break; |
+ case InstArithmetic::Sub: { |
+ _sub(T, Src0R, Src1); |
+ _mov(Dest, T); |
+ } break; |
+ case InstArithmetic::Mul: { |
+ Variable *Src1R = legalizeToVar(Src1); |
+ _mul(T, Src0R, Src1R); |
+ _mov(Dest, T); |
+ } break; |
case InstArithmetic::Shl: |
UnimplementedError(Func->getContext()->getFlags()); |
break; |