Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index 37f5e19ed65dee937cd8b01f31877211192f4541..228e32d0aaab41f82d2b9d8e76941e052e5f28f9 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -1069,9 +1069,90 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| _mov(DestLo, T_Lo); |
| _mov(DestHi, T_Hi); |
| } break; |
| - case InstArithmetic::Shl: |
| + case InstArithmetic::Shl: { |
| + // a=b<<c ==> |
| + // GCC 4.8 does: |
| + // sub t_c1, c.lo, #32 |
| + // lsl t_hi, b.hi, c.lo |
| + // orr t_hi, t_hi, b.lo, lsl t_c1 |
| + // rsb t_c2, c.lo, #32 |
| + // orr t_hi, t_hi, b.lo, lsr t_c2 |
| + // lsl t_lo, b.lo, c.lo |
| + // a.lo = t_lo |
| + // a.hi = t_hi |
| + // Can be strength-reduced for constant-shifts, but we don't do |
| + // that for now. |
| + // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. |
| + // On ARM, shifts only take the lower 8 bits of the shift register, |
| + // and saturate to the range 0-32, so the negative value will |
| + // saturate to 32. |
| + Variable *T_Hi = makeReg(IceType_i32); |
| + Variable *Src1RLo = legalizeToVar(Src1Lo); |
| + Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| + Variable *T_C1 = makeReg(IceType_i32); |
| + Variable *T_C2 = makeReg(IceType_i32); |
| + _sub(T_C1, Src1RLo, ThirtyTwo); |
| + _lsl(T_Hi, Src0RHi, Src1RLo); |
| + _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| + OperandARM32::LSL, T_C1)); |
| + _rsb(T_C2, Src1RLo, ThirtyTwo); |
| + _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| + OperandARM32::LSR, T_C2)); |
| + _mov(DestHi, T_Hi); |
| + Variable *T_Lo = makeReg(IceType_i32); |
| + // _mov seems to sometimes have better register preferencing than lsl. |
| + // Otherwise mov w/ lsl shifted register is a pseudo-instruction |
| + // that maps to lsl. |
| + _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| + OperandARM32::LSL, Src1RLo)); |
| + _mov(DestLo, T_Lo); |
| + } break; |
|
Jim Stichnoth
2015/06/04 22:10:59
I've been thinking (in the x86-32 version and also
jvoung (off chromium)
2015/06/04 23:15:14
That sounds good to me, but will leave for a separ
|
| case InstArithmetic::Lshr: |
| - case InstArithmetic::Ashr: |
| + // a=b>>c (unsigned) ==> |
| + // GCC 4.8 does: |
| + // rsb t_c1, c.lo, #32 |
| + // lsr t_lo, b.lo, c.lo |
| + // orr t_lo, t_lo, b.hi, lsl t_c1 |
| + // sub t_c2, c.lo, #32 |
| + // orr t_lo, t_lo, b.hi, lsr t_c2 |
| + // lsr t_hi, b.hi, c.lo |
| + // a.lo = t_lo |
| + // a.hi = t_hi |
| + case InstArithmetic::Ashr: { |
| + // a=b>>c (signed) ==> ... |
| + // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, |
| + // and the next orr should be conditioned on PLUS. The last two |
| + // right shifts should also be arithmetic. |
| + bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
| + Variable *T_Lo = makeReg(IceType_i32); |
| + Variable *Src1RLo = legalizeToVar(Src1Lo); |
| + Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
| + Variable *T_C1 = makeReg(IceType_i32); |
| + Variable *T_C2 = makeReg(IceType_i32); |
| + _rsb(T_C1, Src1RLo, ThirtyTwo); |
| + _lsr(T_Lo, Src0RLo, Src1RLo); |
| + _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| + OperandARM32::LSL, T_C1)); |
| + OperandARM32::ShiftKind RShiftKind; |
| + CondARM32::Cond Pred; |
| + if (IsAshr) { |
| + _subs(T_C2, Src1RLo, ThirtyTwo); |
| + RShiftKind = OperandARM32::ASR; |
| + Pred = CondARM32::PL; |
| + } else { |
| + _sub(T_C2, Src1RLo, ThirtyTwo); |
| + RShiftKind = OperandARM32::LSR; |
| + Pred = CondARM32::AL; |
| + } |
| + _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| + RShiftKind, T_C2), |
| + Pred); |
| + _mov(DestLo, T_Lo); |
| + Variable *T_Hi = makeReg(IceType_i32); |
| + _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| + RShiftKind, Src1RLo)); |
| + _mov(DestHi, T_Hi); |
| + } break; |
| case InstArithmetic::Udiv: |
| case InstArithmetic::Sdiv: |
| case InstArithmetic::Urem: |
| @@ -1122,13 +1203,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::Shl: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + _lsl(T, Src0R, Src1); |
| + _mov(Dest, T); |
| break; |
| case InstArithmetic::Lshr: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + _lsr(T, Src0R, Src1); |
| + _mov(Dest, T); |
| break; |
| case InstArithmetic::Ashr: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + _asr(T, Src0R, Src1); |
| + _mov(Dest, T); |
| break; |
| case InstArithmetic::Udiv: |
| UnimplementedError(Func->getContext()->getFlags()); |
| @@ -1311,20 +1395,123 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
| void TargetARM32::lowerCast(const InstCast *Inst) { |
| InstCast::OpKind CastKind = Inst->getCastKind(); |
| + Variable *Dest = Inst->getDest(); |
| + Operand *Src0 = Inst->getSrc(0); |
| switch (CastKind) { |
| default: |
| Func->setError("Cast type not supported"); |
| return; |
| case InstCast::Sext: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + if (isVectorType(Dest->getType())) { |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + } else if (Dest->getType() == IceType_i64) { |
| + // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2 |
| + Constant *Shift = Ctx->getConstantInt32(31); |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Variable *T_Lo = makeReg(DestLo->getType()); |
| + if (Src0->getType() == IceType_i32) { |
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
| + _mov(T_Lo, Src0RF); |
| + } else if (Src0->getType() == IceType_i1) { |
| + Variable *Src0R = legalizeToVar(Src0); |
| + _lsl(T_Lo, Src0R, Shift); |
| + _asr(T_Lo, T_Lo, Shift); |
| + } else { |
| + Variable *Src0R = legalizeToVar(Src0); |
| + _sxt(T_Lo, Src0R); |
| + } |
| + _mov(DestLo, T_Lo); |
| + Variable *T_Hi = makeReg(DestHi->getType()); |
| + if (Src0->getType() != IceType_i1) { |
| + _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo, |
| + OperandARM32::ASR, Shift)); |
| + } else { |
| + // For i1, the asr instruction is already done above. |
| + _mov(T_Hi, T_Lo); |
| + } |
| + _mov(DestHi, T_Hi); |
| + } else if (Src0->getType() == IceType_i1) { |
| + // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1. |
| + // lsl t1, src_reg, 31 |
| + // asr t1, t1, 31 |
| + // dst = t1 |
| + Variable *Src0R = legalizeToVar(Src0); |
| + Constant *ShiftAmount = Ctx->getConstantInt32(31); |
|
Jim Stichnoth
2015/06/04 22:10:59
Maybe use the same variable name for ShiftAmount a
jvoung (off chromium)
2015/06/04 23:15:14
Done -- picked middle ground of ShiftAmt =)
|
| + Variable *T = makeReg(Dest->getType()); |
| + _lsl(T, Src0R, ShiftAmount); |
| + _asr(T, T, ShiftAmount); |
| + _mov(Dest, T); |
| + } else { |
| + // t1 = sxt src; dst = t1 |
| + Variable *Src0R = legalizeToVar(Src0); |
| + Variable *T = makeReg(Dest->getType()); |
| + _sxt(T, Src0R); |
| + _mov(Dest, T); |
| + } |
| break; |
| } |
| case InstCast::Zext: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + if (isVectorType(Dest->getType())) { |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + } else if (Dest->getType() == IceType_i64) { |
| + // t1=uxtb src; dst.lo=t1; dst.hi=0 |
| + Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Variable *T_Lo = makeReg(DestLo->getType()); |
| + // i32 and i1 can just take up the whole register. |
| + // i32 doesn't need uxt, while i1 will have an and mask later anyway. |
| + if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
| + _mov(T_Lo, Src0RF); |
| + } else { |
| + Variable *Src0R = legalizeToVar(Src0); |
| + _uxt(T_Lo, Src0R); |
| + } |
| + if (Src0->getType() == IceType_i1) { |
| + Constant *One = Ctx->getConstantInt32(1); |
| + _and(T_Lo, T_Lo, One); |
| + } |
| + _mov(DestLo, T_Lo); |
| + Variable *T_Hi = makeReg(DestLo->getType()); |
| + _mov(T_Hi, Zero); |
| + _mov(DestHi, T_Hi); |
| + } else if (Src0->getType() == IceType_i1) { |
| + // t = Src0; t &= 1; Dest = t |
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
| + Constant *One = Ctx->getConstantInt32(1); |
| + Variable *T = makeReg(Dest->getType()); |
| + // Just use _mov instead of _uxt since all registers are 32-bit. |
| + // _uxt requires the source to be a register so could have required |
| + // a _mov from legalize anyway. |
| + _mov(T, Src0RF); |
| + _and(T, T, One); |
| + _mov(Dest, T); |
| + } else { |
| + // t1 = uxt src; dst = t1 |
| + Variable *Src0R = legalizeToVar(Src0); |
| + Variable *T = makeReg(Dest->getType()); |
| + _uxt(T, Src0R); |
| + _mov(Dest, T); |
| + } |
| break; |
| } |
| case InstCast::Trunc: { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + if (isVectorType(Dest->getType())) { |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + } else { |
| + Operand *Src0 = Inst->getSrc(0); |
| + if (Src0->getType() == IceType_i64) |
| + Src0 = loOperand(Src0); |
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
| + // t1 = trunc Src0RF; Dest = t1 |
| + Variable *T = makeReg(Dest->getType()); |
| + _mov(T, Src0RF); |
| + if (Dest->getType() == IceType_i1) |
| + _and(T, T, Ctx->getConstantInt1(1)); |
| + _mov(Dest, T); |
| + } |
| break; |
| } |
| case InstCast::Fptrunc: |
| @@ -1348,6 +1535,12 @@ void TargetARM32::lowerCast(const InstCast *Inst) { |
| break; |
| } |
| case InstCast::Bitcast: { |
| + Operand *Src0 = Inst->getSrc(0); |
| + if (Dest->getType() == Src0->getType()) { |
| + InstAssign *Assign = InstAssign::create(Func, Dest, Src0); |
| + lowerAssign(Assign); |
| + return; |
| + } |
| UnimplementedError(Func->getContext()->getFlags()); |
| break; |
| } |