| Index: src/IceTargetLoweringARM32.cpp
|
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
|
| index 37f5e19ed65dee937cd8b01f31877211192f4541..8f7c331584ac696cfdc9943cc0d6d08c0ca68298 100644
|
| --- a/src/IceTargetLoweringARM32.cpp
|
| +++ b/src/IceTargetLoweringARM32.cpp
|
| @@ -1069,9 +1069,90 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
|
| _mov(DestLo, T_Lo);
|
| _mov(DestHi, T_Hi);
|
| } break;
|
| - case InstArithmetic::Shl:
|
| + case InstArithmetic::Shl: {
|
| + // a=b<<c ==>
|
| + // GCC 4.8 does:
|
| + // sub t_c1, c.lo, #32
|
| + // lsl t_hi, b.hi, c.lo
|
| + // orr t_hi, t_hi, b.lo, lsl t_c1
|
| + // rsb t_c2, c.lo, #32
|
| + // orr t_hi, t_hi, b.lo, lsr t_c2
|
| + // lsl t_lo, b.lo, c.lo
|
| + // a.lo = t_lo
|
| + // a.hi = t_hi
|
| + // Can be strength-reduced for constant-shifts, but we don't do
|
| + // that for now.
|
| + // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
|
| + // On ARM, shifts only take the lower 8 bits of the shift register,
|
| + // and saturate to the range 0-32, so the negative value will
|
| + // saturate to 32.
|
| + Variable *T_Hi = makeReg(IceType_i32);
|
| + Variable *Src1RLo = legalizeToVar(Src1Lo);
|
| + Constant *ThirtyTwo = Ctx->getConstantInt32(32);
|
| + Variable *T_C1 = makeReg(IceType_i32);
|
| + Variable *T_C2 = makeReg(IceType_i32);
|
| + _sub(T_C1, Src1RLo, ThirtyTwo);
|
| + _lsl(T_Hi, Src0RHi, Src1RLo);
|
| + _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
|
| + OperandARM32::LSL, T_C1));
|
| + _rsb(T_C2, Src1RLo, ThirtyTwo);
|
| + _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
|
| + OperandARM32::LSR, T_C2));
|
| + _mov(DestHi, T_Hi);
|
| + Variable *T_Lo = makeReg(IceType_i32);
|
| + // _mov seems to sometimes have better register preferencing than lsl.
|
| + // Otherwise mov w/ lsl shifted register is a pseudo-instruction
|
| + // that maps to lsl.
|
| + _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
|
| + OperandARM32::LSL, Src1RLo));
|
| + _mov(DestLo, T_Lo);
|
| + } break;
|
| case InstArithmetic::Lshr:
|
| - case InstArithmetic::Ashr:
|
| + // a=b>>c (unsigned) ==>
|
| + // GCC 4.8 does:
|
| + // rsb t_c1, c.lo, #32
|
| + // lsr t_lo, b.lo, c.lo
|
| + // orr t_lo, t_lo, b.hi, lsl t_c1
|
| + // sub t_c2, c.lo, #32
|
| + // orr t_lo, t_lo, b.hi, lsr t_c2
|
| + // lsr t_hi, b.hi, c.lo
|
| + // a.lo = t_lo
|
| + // a.hi = t_hi
|
| + case InstArithmetic::Ashr: {
|
| + // a=b>>c (signed) ==> ...
|
| + // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
|
| + // and the next orr should be conditioned on PLUS. The last two
|
| + // right shifts should also be arithmetic.
|
| + bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
|
| + Variable *T_Lo = makeReg(IceType_i32);
|
| + Variable *Src1RLo = legalizeToVar(Src1Lo);
|
| + Constant *ThirtyTwo = Ctx->getConstantInt32(32);
|
| + Variable *T_C1 = makeReg(IceType_i32);
|
| + Variable *T_C2 = makeReg(IceType_i32);
|
| + _rsb(T_C1, Src1RLo, ThirtyTwo);
|
| + _lsr(T_Lo, Src0RLo, Src1RLo);
|
| + _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
|
| + OperandARM32::LSL, T_C1));
|
| + OperandARM32::ShiftKind RShiftKind;
|
| + CondARM32::Cond Pred;
|
| + if (IsAshr) {
|
| + _subs(T_C2, Src1RLo, ThirtyTwo);
|
| + RShiftKind = OperandARM32::ASR;
|
| + Pred = CondARM32::PL;
|
| + } else {
|
| + _sub(T_C2, Src1RLo, ThirtyTwo);
|
| + RShiftKind = OperandARM32::LSR;
|
| + Pred = CondARM32::AL;
|
| + }
|
| + _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
|
| + RShiftKind, T_C2),
|
| + Pred);
|
| + _mov(DestLo, T_Lo);
|
| + Variable *T_Hi = makeReg(IceType_i32);
|
| + _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
|
| + RShiftKind, Src1RLo));
|
| + _mov(DestHi, T_Hi);
|
| + } break;
|
| case InstArithmetic::Udiv:
|
| case InstArithmetic::Sdiv:
|
| case InstArithmetic::Urem:
|
| @@ -1122,13 +1203,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
|
| _mov(Dest, T);
|
| } break;
|
| case InstArithmetic::Shl:
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + _lsl(T, Src0R, Src1);
|
| + _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Lshr:
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + _lsr(T, Src0R, Src1);
|
| + _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Ashr:
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + _asr(T, Src0R, Src1);
|
| + _mov(Dest, T);
|
| break;
|
| case InstArithmetic::Udiv:
|
| UnimplementedError(Func->getContext()->getFlags());
|
| @@ -1311,20 +1395,123 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
|
|
|
| void TargetARM32::lowerCast(const InstCast *Inst) {
|
| InstCast::OpKind CastKind = Inst->getCastKind();
|
| + Variable *Dest = Inst->getDest();
|
| + Operand *Src0 = Inst->getSrc(0);
|
| switch (CastKind) {
|
| default:
|
| Func->setError("Cast type not supported");
|
| return;
|
| case InstCast::Sext: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + if (isVectorType(Dest->getType())) {
|
| + UnimplementedError(Func->getContext()->getFlags());
|
| + } else if (Dest->getType() == IceType_i64) {
|
| + // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
|
| + Constant *ShiftAmt = Ctx->getConstantInt32(31);
|
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| + Variable *T_Lo = makeReg(DestLo->getType());
|
| + if (Src0->getType() == IceType_i32) {
|
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
|
| + _mov(T_Lo, Src0RF);
|
| + } else if (Src0->getType() == IceType_i1) {
|
| + Variable *Src0R = legalizeToVar(Src0);
|
| + _lsl(T_Lo, Src0R, ShiftAmt);
|
| + _asr(T_Lo, T_Lo, ShiftAmt);
|
| + } else {
|
| + Variable *Src0R = legalizeToVar(Src0);
|
| + _sxt(T_Lo, Src0R);
|
| + }
|
| + _mov(DestLo, T_Lo);
|
| + Variable *T_Hi = makeReg(DestHi->getType());
|
| + if (Src0->getType() != IceType_i1) {
|
| + _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
|
| + OperandARM32::ASR, ShiftAmt));
|
| + } else {
|
| + // For i1, the asr instruction is already done above.
|
| + _mov(T_Hi, T_Lo);
|
| + }
|
| + _mov(DestHi, T_Hi);
|
| + } else if (Src0->getType() == IceType_i1) {
|
| + // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.
|
| + // lsl t1, src_reg, 31
|
| + // asr t1, t1, 31
|
| + // dst = t1
|
| + Variable *Src0R = legalizeToVar(Src0);
|
| + Constant *ShiftAmt = Ctx->getConstantInt32(31);
|
| + Variable *T = makeReg(Dest->getType());
|
| + _lsl(T, Src0R, ShiftAmt);
|
| + _asr(T, T, ShiftAmt);
|
| + _mov(Dest, T);
|
| + } else {
|
| + // t1 = sxt src; dst = t1
|
| + Variable *Src0R = legalizeToVar(Src0);
|
| + Variable *T = makeReg(Dest->getType());
|
| + _sxt(T, Src0R);
|
| + _mov(Dest, T);
|
| + }
|
| break;
|
| }
|
| case InstCast::Zext: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + if (isVectorType(Dest->getType())) {
|
| + UnimplementedError(Func->getContext()->getFlags());
|
| + } else if (Dest->getType() == IceType_i64) {
|
| + // t1=uxtb src; dst.lo=t1; dst.hi=0
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| + Variable *T_Lo = makeReg(DestLo->getType());
|
| + // i32 and i1 can just take up the whole register.
|
| + // i32 doesn't need uxt, while i1 will have an and mask later anyway.
|
| + if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
|
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
|
| + _mov(T_Lo, Src0RF);
|
| + } else {
|
| + Variable *Src0R = legalizeToVar(Src0);
|
| + _uxt(T_Lo, Src0R);
|
| + }
|
| + if (Src0->getType() == IceType_i1) {
|
| + Constant *One = Ctx->getConstantInt32(1);
|
| + _and(T_Lo, T_Lo, One);
|
| + }
|
| + _mov(DestLo, T_Lo);
|
| + Variable *T_Hi = makeReg(DestLo->getType());
|
| + _mov(T_Hi, Zero);
|
| + _mov(DestHi, T_Hi);
|
| + } else if (Src0->getType() == IceType_i1) {
|
| + // t = Src0; t &= 1; Dest = t
|
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
|
| + Constant *One = Ctx->getConstantInt32(1);
|
| + Variable *T = makeReg(Dest->getType());
|
| + // Just use _mov instead of _uxt since all registers are 32-bit.
|
| + // _uxt requires the source to be a register so could have required
|
| + // a _mov from legalize anyway.
|
| + _mov(T, Src0RF);
|
| + _and(T, T, One);
|
| + _mov(Dest, T);
|
| + } else {
|
| + // t1 = uxt src; dst = t1
|
| + Variable *Src0R = legalizeToVar(Src0);
|
| + Variable *T = makeReg(Dest->getType());
|
| + _uxt(T, Src0R);
|
| + _mov(Dest, T);
|
| + }
|
| break;
|
| }
|
| case InstCast::Trunc: {
|
| - UnimplementedError(Func->getContext()->getFlags());
|
| + if (isVectorType(Dest->getType())) {
|
| + UnimplementedError(Func->getContext()->getFlags());
|
| + } else {
|
| + Operand *Src0 = Inst->getSrc(0);
|
| + if (Src0->getType() == IceType_i64)
|
| + Src0 = loOperand(Src0);
|
| + Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
|
| + // t1 = trunc Src0RF; Dest = t1
|
| + Variable *T = makeReg(Dest->getType());
|
| + _mov(T, Src0RF);
|
| + if (Dest->getType() == IceType_i1)
|
| + _and(T, T, Ctx->getConstantInt1(1));
|
| + _mov(Dest, T);
|
| + }
|
| break;
|
| }
|
| case InstCast::Fptrunc:
|
| @@ -1348,6 +1535,12 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
|
| break;
|
| }
|
| case InstCast::Bitcast: {
|
| + Operand *Src0 = Inst->getSrc(0);
|
| + if (Dest->getType() == Src0->getType()) {
|
| + InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
|
| + lowerAssign(Assign);
|
| + return;
|
| + }
|
| UnimplementedError(Func->getContext()->getFlags());
|
| break;
|
| }
|
| @@ -1469,20 +1662,20 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
|
| //
|
| // We'll go with the LLVM way for now, since it's shorter and has just as
|
| // few dependencies.
|
| - int32_t ShiftAmount = 32 - getScalarIntBitWidth(Src0->getType());
|
| - assert(ShiftAmount >= 0);
|
| + int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
|
| + assert(ShiftAmt >= 0);
|
| Constant *ShiftConst = nullptr;
|
| Variable *Src0R = nullptr;
|
| Variable *T = makeReg(IceType_i32);
|
| - if (ShiftAmount) {
|
| - ShiftConst = Ctx->getConstantInt32(ShiftAmount);
|
| + if (ShiftAmt) {
|
| + ShiftConst = Ctx->getConstantInt32(ShiftAmt);
|
| Src0R = makeReg(IceType_i32);
|
| _lsl(Src0R, legalizeToVar(Src0), ShiftConst);
|
| } else {
|
| Src0R = legalizeToVar(Src0);
|
| }
|
| _mov(T, Zero);
|
| - if (ShiftAmount) {
|
| + if (ShiftAmt) {
|
| Variable *Src1R = legalizeToVar(Src1);
|
| OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
|
| Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
|
|
|