Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index fcb865b4c0c54038b14070a8b259d22e781083d7..83be39e8875b80ef03db1a066b49ead64765f7aa 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -1297,29 +1297,26 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { |
| Variable *SrcLoReg = legalizeToReg(SrcLo); |
| switch (Ty) { |
| default: |
| - llvm_unreachable("Unexpected type"); |
| - case IceType_i8: { |
| - Operand *Mask = |
| - legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); |
| - _tst(SrcLoReg, Mask); |
| - break; |
| - } |
| + llvm::report_fatal_error("Unexpected type"); |
| + case IceType_i8: |
| case IceType_i16: { |
| - Operand *Mask = |
| - legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); |
| - _tst(SrcLoReg, Mask); |
| - break; |
| - } |
| + Operand *ShAmtF = |
| + legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)), |
| + Legal_Reg | Legal_Flex); |
| + Variable *T = makeReg(IceType_i32); |
| + _lsls(T, SrcLoReg, ShAmtF); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + } break; |
| case IceType_i32: { |
| _tst(SrcLoReg, SrcLoReg); |
| break; |
| } |
| case IceType_i64: { |
| - Variable *ScratchReg = makeReg(IceType_i32); |
| - _orrs(ScratchReg, SrcLoReg, SrcHi); |
| - // ScratchReg isn't going to be used, but we need the side-effect of |
| - // setting flags from this operation. |
| - Context.insert(InstFakeUse::create(Func, ScratchReg)); |
| + Variable *T = makeReg(IceType_i32); |
| + _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex)); |
| + // T isn't going to be used, but we need the side-effect of setting flags |
| + // from this operation. |
| + Context.insert(InstFakeUse::create(Func, T)); |
| } |
| } |
| InstARM32Label *Label = InstARM32Label::create(Func, this); |
| @@ -1404,6 +1401,389 @@ TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) { |
| return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No; |
| } |
| +void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op, |
| + Variable *Dest, Operand *Src0, |
| + Operand *Src1) { |
| + // These helper-call-involved instructions are lowered in this separate |
| + // switch. This is because we would otherwise assume that we need to |
| + // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
| + // helper calls, and such unused/redundant instructions will fail liveness |
| + // analysis under -Om1 setting. |
| + switch (Op) { |
| + default: |
| + break; |
| + case InstArithmetic::Udiv: |
| + case InstArithmetic::Sdiv: |
| + case InstArithmetic::Urem: |
| + case InstArithmetic::Srem: { |
| + // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
| + // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
| + // register, which will hide a constant source operand. Instead, check |
| + // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
| + if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
| + if (C64->getValue() == 0) { |
| + _trap(); |
| + return; |
| + } |
| + } else { |
| + Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
| + Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
| + div0Check(IceType_i64, Src1Lo, Src1Hi); |
| + } |
| + // Technically, ARM has their own aeabi routines, but we can use the |
|
sehr
2015/11/13 21:56:29
either "has its" or "have their".
John
2015/11/14 00:00:38
For a moment I thought this was Jim. :)
Done.
|
| + // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
| + // the more standard __moddi3 for rem. |
| + const char *HelperName = ""; |
| + switch (Op) { |
| + default: |
| + llvm::report_fatal_error("Should have only matched div ops."); |
| + break; |
| + case InstArithmetic::Udiv: |
| + HelperName = H_udiv_i64; |
| + break; |
| + case InstArithmetic::Sdiv: |
| + HelperName = H_sdiv_i64; |
| + break; |
| + case InstArithmetic::Urem: |
| + HelperName = H_urem_i64; |
| + break; |
| + case InstArithmetic::Srem: |
| + HelperName = H_srem_i64; |
| + break; |
| + } |
| + constexpr SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
| + Call->addArg(Src0); |
| + Call->addArg(Src1); |
| + lowerCall(Call); |
| + return; |
| + } |
| + } |
| + |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Variable *Src0RLo = nullptr; |
| + Variable *Src0RHi = nullptr; |
| + // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr. |
|
Jim Stichnoth
2015/11/16 13:56:10
s/got/for/ ?
Lshr
|
| + if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) { |
| + Src0RLo = legalizeToReg(loOperand(Src0)); |
| + } |
| + if (Op != InstArithmetic::Shl) { |
| + Src0RHi = legalizeToReg(hiOperand(Src0)); |
| + } |
| + Operand *Src1Lo = loOperand(Src1); |
| + Operand *Src1Hi = hiOperand(Src1); |
| + Variable *T_Lo = makeReg(DestLo->getType()); |
| + Variable *T_Hi = makeReg(DestHi->getType()); |
| + |
| + switch (Op) { |
| + case InstArithmetic::_num: |
| + llvm::report_fatal_error("Unknown arithmetic operator"); |
| + return; |
| + case InstArithmetic::Add: |
| + Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| + Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| + _adds(T_Lo, Src0RLo, Src1Lo); |
| + _mov(DestLo, T_Lo); |
| + _adc(T_Hi, Src0RHi, Src1Hi); |
| + _mov(DestHi, T_Hi); |
| + return; |
| + case InstArithmetic::And: |
| + Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| + Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| + _and(T_Lo, Src0RLo, Src1Lo); |
| + _mov(DestLo, T_Lo); |
| + _and(T_Hi, Src0RHi, Src1Hi); |
| + _mov(DestHi, T_Hi); |
| + return; |
| + case InstArithmetic::Or: |
| + Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| + Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| + _orr(T_Lo, Src0RLo, Src1Lo); |
| + _mov(DestLo, T_Lo); |
| + _orr(T_Hi, Src0RHi, Src1Hi); |
| + _mov(DestHi, T_Hi); |
| + return; |
| + case InstArithmetic::Xor: |
| + Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| + Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| + _eor(T_Lo, Src0RLo, Src1Lo); |
| + _mov(DestLo, T_Lo); |
| + _eor(T_Hi, Src0RHi, Src1Hi); |
| + _mov(DestHi, T_Hi); |
| + return; |
| + case InstArithmetic::Sub: |
| + Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| + Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| + _subs(T_Lo, Src0RLo, Src1Lo); |
| + _mov(DestLo, T_Lo); |
| + _sbc(T_Hi, Src0RHi, Src1Hi); |
| + _mov(DestHi, T_Hi); |
| + return; |
| + case InstArithmetic::Mul: { |
| + // GCC 4.8 does: |
| + // a=b*c ==> |
| + // t_acc =(mul) (b.lo * c.hi) |
| + // t_acc =(mla) (c.lo * b.hi) + t_acc |
| + // t.hi,t.lo =(umull) b.lo * c.lo |
| + // t.hi += t_acc |
| + // a.lo = t.lo |
| + // a.hi = t.hi |
| + // |
| + // LLVM does: |
| + // t.hi,t.lo =(umull) b.lo * c.lo |
| + // t.hi =(mla) (b.lo * c.hi) + t.hi |
| + // t.hi =(mla) (b.hi * c.lo) + t.hi |
| + // a.lo = t.lo |
| + // a.hi = t.hi |
| + // |
| + // LLVM's lowering has fewer instructions, but more register pressure: |
| + // t.lo is live from beginning to end, while GCC delays the two-dest |
| + // instruction till the end, and kills c.hi immediately. |
| + Variable *T_Acc = makeReg(IceType_i32); |
| + Variable *T_Acc1 = makeReg(IceType_i32); |
| + Variable *T_Hi1 = makeReg(IceType_i32); |
| + Variable *Src1RLo = legalizeToReg(Src1Lo); |
| + Variable *Src1RHi = legalizeToReg(Src1Hi); |
| + _mul(T_Acc, Src0RLo, Src1RHi); |
| + _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
| + _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
| + _add(T_Hi, T_Hi1, T_Acc1); |
| + _mov(DestLo, T_Lo); |
| + _mov(DestHi, T_Hi); |
| + return; |
| + } |
| + case InstArithmetic::Shl: { |
| + assert(Src0RLo != nullptr); |
| + if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. |
| + const int32_t ShAmtImm = C->getValue() & 0x3F; |
| + if (ShAmtImm == 0) { |
| + Src0RHi = legalizeToReg(hiOperand(Src0)); |
| + _mov(DestLo, Src0RLo); |
| + _mov(DestHi, Src0RHi); |
| + return; |
| + } |
| + |
| + if (ShAmtImm >= 32) { |
| + if (ShAmtImm == 32) { |
| + _mov(DestHi, Src0RLo); |
| + } else { |
| + Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), |
| + Legal_Reg | Legal_Flex); |
| + _lsl(T_Hi, Src0RLo, ShAmtOp); |
| + _mov(DestHi, T_Hi); |
| + } |
| + |
| + Operand *_0 = |
| + legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
| + _mov(T_Lo, _0); |
| + _mov(DestLo, T_Lo); |
| + return; |
| + } |
| + |
| + Src0RHi = legalizeToReg(hiOperand(Src0)); |
| + Operand *ShAmtOp = |
| + legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); |
| + Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), |
| + Legal_Reg | Legal_Flex); |
| + _lsl(T_Hi, Src0RHi, ShAmtOp); |
| + _orr(T_Hi, T_Hi, |
| + OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
| + OperandARM32::LSR, ComplShAmtOp)); |
| + _mov(DestHi, T_Hi); |
| + |
| + _lsl(T_Lo, Src0RLo, ShAmtOp); |
| + _mov(DestLo, T_Lo); |
| + return; |
| + } |
| + |
| + // a=b<<c ==> |
| + // pnacl-llc does: |
| + // mov t_b.lo, b.lo |
| + // mov t_b.hi, b.hi |
| + // mov t_c.lo, c.lo |
| + // rsb T0, t_c.lo, #32 |
| + // lsr T1, t_b.lo, T0 |
| + // orr t_a.hi, T1, t_b.hi, lsl t_c.lo |
| + // sub T2, t_c.lo, #32 |
| + // cmp T2, #0 |
| + // lslge t_a.hi, t_b.lo, T2 |
| + // lsl t_a.lo, t_b.lo, t_c.lo |
| + // mov a.lo, t_a.lo |
| + // mov a.hi, t_a.hi |
| + // |
| + // GCC 4.8 does: |
| + // sub t_c1, c.lo, #32 |
| + // lsl t_hi, b.hi, c.lo |
| + // orr t_hi, t_hi, b.lo, lsl t_c1 |
| + // rsb t_c2, c.lo, #32 |
| + // orr t_hi, t_hi, b.lo, lsr t_c2 |
| + // lsl t_lo, b.lo, c.lo |
| + // a.lo = t_lo |
| + // a.hi = t_hi |
| + // |
| + // These are incompatible, therefore we mimic pnacl-llc. |
| + // Can be strength-reduced for constant-shifts, but we don't do that for |
| + // now. |
| + // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
| + // ARM, shifts only take the lower 8 bits of the shift register, and |
| + // saturate to the range 0-32, so the negative value will saturate to 32. |
| + Constant *_32 = Ctx->getConstantInt32(32); |
| + Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| + Src0RHi = legalizeToReg(hiOperand(Src0)); |
| + Variable *Src1RLo = legalizeToReg(Src1Lo); |
| + Variable *T0 = makeReg(IceType_i32); |
| + Variable *T1 = makeReg(IceType_i32); |
| + Variable *T2 = makeReg(IceType_i32); |
| + Variable *TA_Hi = makeReg(IceType_i32); |
| + Variable *TA_Lo = makeReg(IceType_i32); |
| + _rsb(T0, Src1RLo, _32); |
| + _lsr(T1, Src0RLo, T0); |
| + _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| + OperandARM32::LSL, Src1RLo)); |
| + _sub(T2, Src1RLo, _32); |
| + _cmp(T2, _0); |
| + _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); |
| + _set_dest_redefined(); |
| + _lsl(TA_Lo, Src0RLo, Src1RLo); |
| + _mov(DestLo, TA_Lo); |
| + _mov(DestHi, TA_Hi); |
| + return; |
| + } |
| + case InstArithmetic::Lshr: |
| + case InstArithmetic::Ashr: { |
| + assert(Src0RHi != nullptr); |
| + const bool ASR = Op == InstArithmetic::Ashr; |
| + if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + // Truncating the ShAmt to [0, 63] because that's what ARM does anyway. |
| + const int32_t ShAmtImm = C->getValue() & 0x3F; |
| + if (ShAmtImm == 0) { |
| + Src0RLo = legalizeToReg(loOperand(Src0)); |
| + _mov(DestLo, Src0RLo); |
| + _mov(DestHi, Src0RHi); |
| + return; |
| + } |
| + |
| + if (ShAmtImm >= 32) { |
| + if (ShAmtImm == 32) { |
| + _mov(DestLo, Src0RHi); |
| + } else { |
| + Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32), |
| + Legal_Reg | Legal_Flex); |
| + if (ASR) { |
| + _asr(T_Lo, Src0RHi, ShAmtOp); |
| + } else { |
| + _lsr(T_Lo, Src0RHi, ShAmtOp); |
| + } |
| + _mov(DestLo, T_Lo); |
| + } |
| + |
| + if (ASR) { |
| + Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32), |
| + Legal_Reg | Legal_Flex); |
| + _asr(T_Hi, Src0RHi, _31); |
| + } else { |
| + Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32), |
| + Legal_Reg | Legal_Flex); |
| + _mov(T_Hi, _0); |
| + } |
| + _mov(DestHi, T_Hi); |
| + return; |
| + } |
| + |
| + Src0RLo = legalizeToReg(loOperand(Src0)); |
| + Operand *ShAmtOp = |
| + legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex); |
| + Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm), |
| + Legal_Reg | Legal_Flex); |
| + _lsr(T_Lo, Src0RLo, ShAmtOp); |
| + _orr(T_Lo, T_Lo, |
| + OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| + OperandARM32::LSL, ComplShAmtOp)); |
| + _mov(DestLo, T_Lo); |
| + |
| + if (ASR) { |
| + _asr(T_Hi, Src0RHi, ShAmtOp); |
| + } else { |
| + _lsr(T_Hi, Src0RHi, ShAmtOp); |
| + } |
| + _mov(DestHi, T_Hi); |
| + return; |
| + } |
| + |
| + // a=b>>c |
| + // pnacl-llc does: |
| + // mov t_b.lo, b.lo |
| + // mov t_b.hi, b.hi |
| + // mov t_c.lo, c.lo |
| + // lsr T0, t_b.lo, t_c.lo |
| + // rsb T1, t_c.lo, #32 |
| + // orr t_a.lo, T0, t_b.hi, lsl T1 |
| + // sub T2, t_c.lo, #32 |
| + // cmp T2, #0 |
| + // [al]srge t_a.lo, t_b.hi, T2 |
| + // [al]sr t_a.hi, t_b.hi, t_c.lo |
| + // mov a.lo, t_a.lo |
| + // mov a.hi, t_a.hi |
| + // |
| + // GCC 4.8 does (lsr): |
| + // rsb t_c1, c.lo, #32 |
| + // lsr t_lo, b.lo, c.lo |
| + // orr t_lo, t_lo, b.hi, lsl t_c1 |
| + // sub t_c2, c.lo, #32 |
| + // orr t_lo, t_lo, b.hi, lsr t_c2 |
| + // lsr t_hi, b.hi, c.lo |
| + // mov a.lo, t_lo |
| + // mov a.hi, t_hi |
| + // |
| + // These are incompatible, therefore we mimic pnacl-llc. |
| + const bool IsAshr = Op == InstArithmetic::Ashr; |
| + Constant *_32 = Ctx->getConstantInt32(32); |
| + Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| + Src0RLo = legalizeToReg(loOperand(Src0)); |
| + Variable *Src1RLo = legalizeToReg(Src1Lo); |
| + Variable *T0 = makeReg(IceType_i32); |
| + Variable *T1 = makeReg(IceType_i32); |
| + Variable *T2 = makeReg(IceType_i32); |
| + Variable *TA_Lo = makeReg(IceType_i32); |
| + Variable *TA_Hi = makeReg(IceType_i32); |
| + _lsr(T0, Src0RLo, Src1RLo); |
| + _rsb(T1, Src1RLo, _32); |
| + _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| + OperandARM32::LSL, T1)); |
| + _sub(T2, Src1RLo, _32); |
| + _cmp(T2, _0); |
| + if (IsAshr) { |
| + _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
| + _set_dest_redefined(); |
| + _asr(TA_Hi, Src0RHi, Src1RLo); |
| + } else { |
| + _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
| + _set_dest_redefined(); |
| + _lsr(TA_Hi, Src0RHi, Src1RLo); |
| + } |
| + _mov(DestLo, TA_Lo); |
| + _mov(DestHi, TA_Hi); |
| + return; |
| + } |
| + case InstArithmetic::Fadd: |
| + case InstArithmetic::Fsub: |
| + case InstArithmetic::Fmul: |
| + case InstArithmetic::Fdiv: |
| + case InstArithmetic::Frem: |
| + llvm::report_fatal_error("FP instruction with i64 type"); |
| + return; |
| + case InstArithmetic::Udiv: |
| + case InstArithmetic::Sdiv: |
| + case InstArithmetic::Urem: |
| + case InstArithmetic::Srem: |
| + llvm::report_fatal_error("Call-helper-involved instruction for i64 type " |
| + "should have already been handled before"); |
| + return; |
| + } |
| +} |
| + |
| void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| Variable *Dest = Inst->getDest(); |
| if (Dest->getType() == IceType_i1) { |
| @@ -1421,272 +1801,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
| if (Dest->getType() == IceType_i64) { |
| - // These helper-call-involved instructions are lowered in this separate |
| - // switch. This is because we would otherwise assume that we need to |
| - // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
| - // helper calls, and such unused/redundant instructions will fail liveness |
| - // analysis under -Om1 setting. |
| - switch (Inst->getOp()) { |
| - default: |
| - break; |
| - case InstArithmetic::Udiv: |
| - case InstArithmetic::Sdiv: |
| - case InstArithmetic::Urem: |
| - case InstArithmetic::Srem: { |
| - // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
| - // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
| - // register, which will hide a constant source operand. Instead, check |
| - // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
| - if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
| - if (C64->getValue() == 0) { |
| - _trap(); |
| - return; |
| - } |
| - } else { |
| - Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
| - Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
| - div0Check(IceType_i64, Src1Lo, Src1Hi); |
| - } |
| - // Technically, ARM has their own aeabi routines, but we can use the |
| - // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
| - // the more standard __moddi3 for rem. |
| - const char *HelperName = ""; |
| - switch (Inst->getOp()) { |
| - default: |
| - llvm_unreachable("Should have only matched div ops."); |
| - break; |
| - case InstArithmetic::Udiv: |
| - HelperName = H_udiv_i64; |
| - break; |
| - case InstArithmetic::Sdiv: |
| - HelperName = H_sdiv_i64; |
| - break; |
| - case InstArithmetic::Urem: |
| - HelperName = H_urem_i64; |
| - break; |
| - case InstArithmetic::Srem: |
| - HelperName = H_srem_i64; |
| - break; |
| - } |
| - constexpr SizeT MaxSrcs = 2; |
| - InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
| - Call->addArg(Src0); |
| - Call->addArg(Src1); |
| - lowerCall(Call); |
| - return; |
| - } |
| - } |
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| - Variable *Src0RLo = legalizeToReg(loOperand(Src0)); |
| - Variable *Src0RHi = legalizeToReg(hiOperand(Src0)); |
| - Operand *Src1Lo = loOperand(Src1); |
| - Operand *Src1Hi = hiOperand(Src1); |
| - Variable *T_Lo = makeReg(DestLo->getType()); |
| - Variable *T_Hi = makeReg(DestHi->getType()); |
| - switch (Inst->getOp()) { |
| - case InstArithmetic::_num: |
| - llvm_unreachable("Unknown arithmetic operator"); |
| - return; |
| - case InstArithmetic::Add: |
| - Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| - Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| - _adds(T_Lo, Src0RLo, Src1Lo); |
| - _mov(DestLo, T_Lo); |
| - _adc(T_Hi, Src0RHi, Src1Hi); |
| - _mov(DestHi, T_Hi); |
| - return; |
| - case InstArithmetic::And: |
| - Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| - Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| - _and(T_Lo, Src0RLo, Src1Lo); |
| - _mov(DestLo, T_Lo); |
| - _and(T_Hi, Src0RHi, Src1Hi); |
| - _mov(DestHi, T_Hi); |
| - return; |
| - case InstArithmetic::Or: |
| - Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| - Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| - _orr(T_Lo, Src0RLo, Src1Lo); |
| - _mov(DestLo, T_Lo); |
| - _orr(T_Hi, Src0RHi, Src1Hi); |
| - _mov(DestHi, T_Hi); |
| - return; |
| - case InstArithmetic::Xor: |
| - Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| - Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| - _eor(T_Lo, Src0RLo, Src1Lo); |
| - _mov(DestLo, T_Lo); |
| - _eor(T_Hi, Src0RHi, Src1Hi); |
| - _mov(DestHi, T_Hi); |
| - return; |
| - case InstArithmetic::Sub: |
| - Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex); |
| - Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex); |
| - _subs(T_Lo, Src0RLo, Src1Lo); |
| - _mov(DestLo, T_Lo); |
| - _sbc(T_Hi, Src0RHi, Src1Hi); |
| - _mov(DestHi, T_Hi); |
| - return; |
| - case InstArithmetic::Mul: { |
| - // GCC 4.8 does: |
| - // a=b*c ==> |
| - // t_acc =(mul) (b.lo * c.hi) |
| - // t_acc =(mla) (c.lo * b.hi) + t_acc |
| - // t.hi,t.lo =(umull) b.lo * c.lo |
| - // t.hi += t_acc |
| - // a.lo = t.lo |
| - // a.hi = t.hi |
| - // |
| - // LLVM does: |
| - // t.hi,t.lo =(umull) b.lo * c.lo |
| - // t.hi =(mla) (b.lo * c.hi) + t.hi |
| - // t.hi =(mla) (b.hi * c.lo) + t.hi |
| - // a.lo = t.lo |
| - // a.hi = t.hi |
| - // |
| - // LLVM's lowering has fewer instructions, but more register pressure: |
| - // t.lo is live from beginning to end, while GCC delays the two-dest |
| - // instruction till the end, and kills c.hi immediately. |
| - Variable *T_Acc = makeReg(IceType_i32); |
| - Variable *T_Acc1 = makeReg(IceType_i32); |
| - Variable *T_Hi1 = makeReg(IceType_i32); |
| - Variable *Src1RLo = legalizeToReg(Src1Lo); |
| - Variable *Src1RHi = legalizeToReg(Src1Hi); |
| - _mul(T_Acc, Src0RLo, Src1RHi); |
| - _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc); |
| - _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo); |
| - _add(T_Hi, T_Hi1, T_Acc1); |
| - _mov(DestLo, T_Lo); |
| - _mov(DestHi, T_Hi); |
| - return; |
| - } |
| - case InstArithmetic::Shl: { |
| - // a=b<<c ==> |
| - // pnacl-llc does: |
| - // mov t_b.lo, b.lo |
| - // mov t_b.hi, b.hi |
| - // mov t_c.lo, c.lo |
| - // rsb T0, t_c.lo, #32 |
| - // lsr T1, t_b.lo, T0 |
| - // orr t_a.hi, T1, t_b.hi, lsl t_c.lo |
| - // sub T2, t_c.lo, #32 |
| - // cmp T2, #0 |
| - // lslge t_a.hi, t_b.lo, T2 |
| - // lsl t_a.lo, t_b.lo, t_c.lo |
| - // mov a.lo, t_a.lo |
| - // mov a.hi, t_a.hi |
| - // |
| - // GCC 4.8 does: |
| - // sub t_c1, c.lo, #32 |
| - // lsl t_hi, b.hi, c.lo |
| - // orr t_hi, t_hi, b.lo, lsl t_c1 |
| - // rsb t_c2, c.lo, #32 |
| - // orr t_hi, t_hi, b.lo, lsr t_c2 |
| - // lsl t_lo, b.lo, c.lo |
| - // a.lo = t_lo |
| - // a.hi = t_hi |
| - // |
| - // These are incompatible, therefore we mimic pnacl-llc. |
| - // Can be strength-reduced for constant-shifts, but we don't do that for |
| - // now. |
| - // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
| - // ARM, shifts only take the lower 8 bits of the shift register, and |
| - // saturate to the range 0-32, so the negative value will saturate to 32. |
| - Constant *_32 = Ctx->getConstantInt32(32); |
| - Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| - Variable *Src1RLo = legalizeToReg(Src1Lo); |
| - Variable *T0 = makeReg(IceType_i32); |
| - Variable *T1 = makeReg(IceType_i32); |
| - Variable *T2 = makeReg(IceType_i32); |
| - Variable *TA_Hi = makeReg(IceType_i32); |
| - Variable *TA_Lo = makeReg(IceType_i32); |
| - _rsb(T0, Src1RLo, _32); |
| - _lsr(T1, Src0RLo, T0); |
| - _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| - OperandARM32::LSL, Src1RLo)); |
| - _sub(T2, Src1RLo, _32); |
| - _cmp(T2, _0); |
| - _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE); |
| - _set_dest_redefined(); |
| - _lsl(TA_Lo, Src0RLo, Src1RLo); |
| - _mov(DestLo, TA_Lo); |
| - _mov(DestHi, TA_Hi); |
| - return; |
| - } |
| - case InstArithmetic::Lshr: |
| - case InstArithmetic::Ashr: { |
| - // a=b>>c |
| - // pnacl-llc does: |
| - // mov t_b.lo, b.lo |
| - // mov t_b.hi, b.hi |
| - // mov t_c.lo, c.lo |
| - // lsr T0, t_b.lo, t_c.lo |
| - // rsb T1, t_c.lo, #32 |
| - // orr t_a.lo, T0, t_b.hi, lsl T1 |
| - // sub T2, t_c.lo, #32 |
| - // cmp T2, #0 |
| - // [al]srge t_a.lo, t_b.hi, T2 |
| - // [al]sr t_a.hi, t_b.hi, t_c.lo |
| - // mov a.lo, t_a.lo |
| - // mov a.hi, t_a.hi |
| - // |
| - // GCC 4.8 does (lsr): |
| - // rsb t_c1, c.lo, #32 |
| - // lsr t_lo, b.lo, c.lo |
| - // orr t_lo, t_lo, b.hi, lsl t_c1 |
| - // sub t_c2, c.lo, #32 |
| - // orr t_lo, t_lo, b.hi, lsr t_c2 |
| - // lsr t_hi, b.hi, c.lo |
| - // mov a.lo, t_lo |
| - // mov a.hi, t_hi |
| - // |
| - // These are incompatible, therefore we mimic pnacl-llc. |
| - const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
| - Constant *_32 = Ctx->getConstantInt32(32); |
| - Constant *_0 = Ctx->getConstantZero(IceType_i32); |
| - Variable *Src1RLo = legalizeToReg(Src1Lo); |
| - Variable *T0 = makeReg(IceType_i32); |
| - Variable *T1 = makeReg(IceType_i32); |
| - Variable *T2 = makeReg(IceType_i32); |
| - Variable *TA_Lo = makeReg(IceType_i32); |
| - Variable *TA_Hi = makeReg(IceType_i32); |
| - _lsr(T0, Src0RLo, Src1RLo); |
| - _rsb(T1, Src1RLo, _32); |
| - _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
| - OperandARM32::LSL, T1)); |
| - _sub(T2, Src1RLo, _32); |
| - _cmp(T2, _0); |
| - if (IsAshr) { |
| - _asr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
| - _set_dest_redefined(); |
| - _asr(TA_Hi, Src0RHi, Src1RLo); |
| - } else { |
| - _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE); |
| - _set_dest_redefined(); |
| - _lsr(TA_Hi, Src0RHi, Src1RLo); |
| - } |
| - _mov(DestLo, TA_Lo); |
| - _mov(DestHi, TA_Hi); |
| - return; |
| - } |
| - case InstArithmetic::Fadd: |
| - case InstArithmetic::Fsub: |
| - case InstArithmetic::Fmul: |
| - case InstArithmetic::Fdiv: |
| - case InstArithmetic::Frem: |
| - llvm_unreachable("FP instruction with i64 type"); |
| - return; |
| - case InstArithmetic::Udiv: |
| - case InstArithmetic::Sdiv: |
| - case InstArithmetic::Urem: |
| - case InstArithmetic::Srem: |
| - llvm_unreachable("Call-helper-involved instruction for i64 type " |
| - "should have already been handled before"); |
| - return; |
| - } |
| + lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); |
| return; |
| - } else if (isVectorType(Dest->getType())) { |
| + } |
| + |
| + if (isVectorType(Dest->getType())) { |
| // Add a fake def to keep liveness consistent in the meantime. |
| Variable *T = makeReg(Dest->getType()); |
| Context.insert(InstFakeDef::create(Func, T)); |
| @@ -1694,9 +1813,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| UnimplementedError(Func->getContext()->getFlags()); |
| return; |
| } |
| + |
| // Dest->getType() is a non-i64 scalar. |
| Variable *Src0R = legalizeToReg(Src0); |
| Variable *T = makeReg(Dest->getType()); |
| + |
| // Handle div/rem separately. They require a non-legalized Src1 to inspect |
| // whether or not Src1 is a non-zero constant. Once legalized it is more |
| // difficult to determine (constant may be moved to a register). |
| @@ -1773,7 +1894,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
| switch (Inst->getOp()) { |
| case InstArithmetic::_num: |
| - llvm_unreachable("Unknown arithmetic operator"); |
| + llvm::report_fatal_error("Unknown arithmetic operator"); |
| return; |
| case InstArithmetic::Add: |
| _add(T, Src0R, Src1RF); |
| @@ -1823,14 +1944,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| case InstArithmetic::Sdiv: |
| case InstArithmetic::Urem: |
| case InstArithmetic::Srem: |
| - llvm_unreachable("Integer div/rem should have been handled earlier."); |
| + llvm::report_fatal_error( |
| + "Integer div/rem should have been handled earlier."); |
| return; |
| case InstArithmetic::Fadd: |
| case InstArithmetic::Fsub: |
| case InstArithmetic::Fmul: |
| case InstArithmetic::Fdiv: |
| case InstArithmetic::Frem: |
| - llvm_unreachable("Floating point arith should have been handled earlier."); |
| + llvm::report_fatal_error( |
| + "Floating point arith should have been handled earlier."); |
| return; |
| } |
| } |
| @@ -1841,40 +1964,39 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
| assert(Dest->getType() == Src0->getType()); |
| if (Dest->getType() == IceType_i64) { |
| Src0 = legalizeUndef(Src0); |
| - Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| - Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| - Variable *T_Lo = makeReg(IceType_i32); |
| - Variable *T_Hi = makeReg(IceType_i32); |
| + Variable *T_Lo = makeReg(IceType_i32); |
| + auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| _mov(T_Lo, Src0Lo); |
| _mov(DestLo, T_Lo); |
| + |
| + Variable *T_Hi = makeReg(IceType_i32); |
| + auto *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
| _mov(T_Hi, Src0Hi); |
| _mov(DestHi, T_Hi); |
| + |
| + return; |
| + } |
| + |
| + Operand *NewSrc; |
| + if (Dest->hasReg()) { |
| + // If Dest already has a physical register, then legalize the Src operand |
| + // into a Variable with the same register assignment. This especially |
| + // helps allow the use of Flex operands. |
| + NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
| } else { |
| - Operand *NewSrc; |
| - if (Dest->hasReg()) { |
| - // If Dest already has a physical register, then legalize the Src operand |
| - // into a Variable with the same register assignment. This especially |
| - // helps allow the use of Flex operands. |
| - NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
| - } else { |
| - // Dest could be a stack operand. Since we could potentially need to do a |
| - // Store (and store can only have Register operands), legalize this to a |
| - // register. |
| - NewSrc = legalize(Src0, Legal_Reg); |
| - } |
| - if (isVectorType(Dest->getType())) { |
| - Variable *SrcR = legalizeToReg(NewSrc); |
| - _mov(Dest, SrcR); |
| - } else if (isFloatingType(Dest->getType())) { |
| - Variable *SrcR = legalizeToReg(NewSrc); |
| - _mov(Dest, SrcR); |
| - } else { |
| - _mov(Dest, NewSrc); |
| - } |
| + // Dest could be a stack operand. Since we could potentially need to do a |
| + // Store (and store can only have Register operands), legalize this to a |
| + // register. |
| + NewSrc = legalize(Src0, Legal_Reg); |
| + } |
| + |
| + if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) { |
| + NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem); |
| } |
| + _mov(Dest, NewSrc); |
| } |
| TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch( |
| @@ -2580,6 +2702,18 @@ struct { |
| FCMPARM32_TABLE |
| #undef X |
| }; |
| + |
|
sehr
2015/11/13 21:56:29
Is there a more common place for this sort of func
John
2015/11/13 22:00:41
Maybe. If you think it's useful, you could add fro
John
2015/11/14 00:00:38
Oh, I thought this was Jim. He had the same routin
|
| +bool isFloatingPointZero(Operand *Src) { |
| + if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { |
| + return F32->getValue() == 0.0f; |
| + } |
| + |
| + if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { |
| + return F64->getValue() == 0.0; |
| + } |
| + |
| + return false; |
| +} |
| } // end of anonymous namespace |
| TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
| @@ -2592,8 +2726,12 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
| break; |
| default: { |
| Variable *Src0R = legalizeToReg(Instr->getSrc(0)); |
| - Variable *Src1R = legalizeToReg(Instr->getSrc(1)); |
| - _vcmp(Src0R, Src1R); |
| + Operand *Src1 = Instr->getSrc(1); |
| + if (isFloatingPointZero(Src1)) { |
| + _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType())); |
| + } else { |
| + _vcmp(Src0R, legalizeToReg(Src1)); |
| + } |
| _vmrs(); |
| assert(Condition < llvm::array_lengthof(TableFcmp)); |
| return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1); |
| @@ -2641,13 +2779,102 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) { |
| _mov(Dest, T); |
| } |
| +TargetARM32::CondWhenTrue |
| +TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0, |
| + Operand *Src1) { |
| + size_t Index = static_cast<size_t>(Condition); |
| + assert(Index < llvm::array_lengthof(TableIcmp64)); |
| + |
| + Operand *NonConstOp = nullptr; |
| + uint64_t Value; |
| + if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
| + Value = C->getValue(); |
| + NonConstOp = Src0; |
| + } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) { |
| + Value = C->getValue(); |
| + NonConstOp = Src1; |
| + } |
| + |
| + Variable *Src0RLo, *Src0RHi; |
| + Operand *Src1RFLo, *Src1RFHi; |
| + |
| + if (NonConstOp != nullptr) { |
| + if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && |
| + Value == 0) { |
| + Variable *T = makeReg(IceType_i32); |
| + _orrs(T, legalizeToReg(loOperand(NonConstOp)), |
| + legalize(hiOperand(NonConstOp), Legal_Reg | Legal_Flex)); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + return CondWhenTrue(TableIcmp64[Index].C1); |
| + } |
| -TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
| - assert(Inst->getSrc(0)->getType() != IceType_i1); |
| - assert(Inst->getSrc(1)->getType() != IceType_i1); |
| + Src0RLo = legalizeToReg(loOperand(NonConstOp)); |
| + Src0RHi = legalizeToReg(hiOperand(NonConstOp)); |
| + if ((Value >> 32) == (Value & 0xFFFFFFFF)) { |
| + Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF), |
| + Legal_Reg | Legal_Flex); |
| + Src1RFHi = Src1RFLo; |
| + } else { |
| + Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF), |
| + Legal_Reg | Legal_Flex); |
| + Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF), |
| + Legal_Reg | Legal_Flex); |
| + } |
| - Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| - Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
| + bool UseRsb = false; |
| + if (TableIcmp64[Index].Swapped) { |
| + UseRsb = NonConstOp == Src0; |
| + } else { |
| + UseRsb = NonConstOp == Src1; |
| + } |
| + |
| + if (UseRsb) { |
| + if (TableIcmp64[Index].IsSigned) { |
| + Variable *T = makeReg(IceType_i32); |
| + _rsbs(T, Src0RLo, Src1RFLo); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + |
| + T = makeReg(IceType_i32); |
| + _rscs(T, Src0RHi, Src1RFHi); |
| + // We need to add a FakeUse here because liveness gets mad at us (Def |
| + // without Use.) Note that flag-setting instructions are considered to |
| + // have side effects and, therefore, are not DCE'ed. |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + } else { |
| + Variable *T = makeReg(IceType_i32); |
| + _rsbs(T, Src0RHi, Src1RFHi); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + |
| + T = makeReg(IceType_i32); |
| + _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + } |
| + } else { |
| + if (TableIcmp64[Index].IsSigned) { |
| + _cmp(Src0RLo, Src1RFLo); |
| + Variable *T = makeReg(IceType_i32); |
| + _sbcs(T, Src0RHi, Src1RFHi); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + } else { |
| + _cmp(Src0RHi, Src1RFHi); |
| + _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); |
| + } |
| + } |
| + |
| + return CondWhenTrue(TableIcmp64[Index].C1); |
| + } |
| + |
| + if (TableIcmp64[Index].Swapped) { |
| + Src0RLo = legalizeToReg(loOperand(Src1)); |
| + Src0RHi = legalizeToReg(hiOperand(Src1)); |
| + Src1RFLo = legalizeToReg(loOperand(Src0)); |
| + Src1RFHi = legalizeToReg(hiOperand(Src0)); |
| + } else { |
| + Src0RLo = legalizeToReg(loOperand(Src0)); |
| + Src0RHi = legalizeToReg(hiOperand(Src0)); |
| + Src1RFLo = legalizeToReg(loOperand(Src1)); |
| + Src1RFHi = legalizeToReg(hiOperand(Src1)); |
| + } |
| // a=icmp cond, b, c ==> |
| // GCC does: |
| @@ -2678,38 +2905,28 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
| // |
| // So, we are going with the GCC version since it's usually better (except |
| // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
| - |
| - if (Src0->getType() == IceType_i64) { |
| - InstIcmp::ICond Conditon = Inst->getCondition(); |
| - size_t Index = static_cast<size_t>(Conditon); |
| - assert(Index < llvm::array_lengthof(TableIcmp64)); |
| - Variable *Src0Lo, *Src0Hi; |
| - Operand *Src1LoRF, *Src1HiRF; |
| - if (TableIcmp64[Index].Swapped) { |
| - Src0Lo = legalizeToReg(loOperand(Src1)); |
| - Src0Hi = legalizeToReg(hiOperand(Src1)); |
| - Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| - Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
| - } else { |
| - Src0Lo = legalizeToReg(loOperand(Src0)); |
| - Src0Hi = legalizeToReg(hiOperand(Src0)); |
| - Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
| - Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
| - } |
| - if (TableIcmp64[Index].IsSigned) { |
| - Variable *ScratchReg = makeReg(IceType_i32); |
| - _cmp(Src0Lo, Src1LoRF); |
| - _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
| - // ScratchReg isn't going to be used, but we need the side-effect of |
| - // setting flags from this operation. |
| - Context.insert(InstFakeUse::create(Func, ScratchReg)); |
| - } else { |
| - _cmp(Src0Hi, Src1HiRF); |
| - _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
| - } |
| - return CondWhenTrue(TableIcmp64[Index].C1); |
| + if (TableIcmp64[Index].IsSigned) { |
| + Variable *ScratchReg = makeReg(IceType_i32); |
| + _cmp(Src0RLo, Src1RFLo); |
| + _sbcs(ScratchReg, Src0RHi, Src1RFHi); |
| + // ScratchReg isn't going to be used, but we need the side-effect of |
| + // setting flags from this operation. |
| + Context.insert(InstFakeUse::create(Func, ScratchReg)); |
| + } else { |
| + _cmp(Src0RHi, Src1RFHi); |
| + _cmp(Src0RLo, Src1RFLo, CondARM32::EQ); |
| } |
| + return CondWhenTrue(TableIcmp64[Index].C1); |
| +} |
| + |
| +TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
| + assert(Inst->getSrc(0)->getType() != IceType_i1); |
| + assert(Inst->getSrc(1)->getType() != IceType_i1); |
| + Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
| + Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
| + |
| + InstIcmp::ICond Condition = Inst->getCondition(); |
| // a=icmp cond b, c ==> |
| // GCC does: |
| // <u/s>xtb tb, b |
| @@ -2739,27 +2956,94 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) { |
| // |
| // We'll go with the LLVM way for now, since it's shorter and has just as few |
| // dependencies. |
| - int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
| - assert(ShiftAmt >= 0); |
| - Constant *ShiftConst = nullptr; |
| - Variable *Src0R = nullptr; |
| - if (ShiftAmt) { |
| - ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
| - Src0R = makeReg(IceType_i32); |
| - _lsl(Src0R, legalizeToReg(Src0), ShiftConst); |
| - } else { |
| - Src0R = legalizeToReg(Src0); |
| + Operand *NonConstOp = nullptr; |
| + int32_t Value; |
| + if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| + Value = C->getValue(); |
| + NonConstOp = Src0; |
| + } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) { |
| + Value = C->getValue(); |
| + NonConstOp = Src1; |
| + } |
| + |
| + switch (Src0->getType()) { |
| + default: |
| + llvm::report_fatal_error("Unhandled type in lowerIcmpCond"); |
| + case IceType_i64: |
| + return lowerInt64IcmpCond(Condition, Src0, Src1); |
| + case IceType_i8: |
| + case IceType_i16: { |
| + int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
| + assert(ShAmt >= 0); |
| + |
| + if (NonConstOp != nullptr) { |
| + if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && |
| + Value == 0) { |
| + Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt); |
| + Variable *T = makeReg(IceType_i32); |
| + _lsls(T, legalizeToReg(NonConstOp), ShAmtOp); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + return CondWhenTrue(getIcmp32Mapping(Condition)); |
| + } |
| + Variable *ConstR = makeReg(IceType_i32); |
| + _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt), |
| + Legal_Reg | Legal_Flex)); |
| + Operand *NonConstF = OperandARM32FlexReg::create( |
| + Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL, |
| + Ctx->getConstantInt32(ShAmt)); |
| + |
| + if (Src1 == NonConstOp) { |
| + _cmp(ConstR, NonConstF); |
| + } else { |
| + Variable *T = makeReg(IceType_i32); |
| + _rsbs(T, ConstR, NonConstF); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + } |
| + return CondWhenTrue(getIcmp32Mapping(Condition)); |
| + } |
| + |
| + Variable *Src0R = makeReg(IceType_i32); |
| + Operand *ShAmtF = |
| + legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex); |
| + _lsl(Src0R, legalizeToReg(Src0), ShAmtF); |
| + |
| + Variable *Src1R = legalizeToReg(Src1); |
| + OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create( |
| + Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF); |
| + _cmp(Src0R, Src1F); |
| + return CondWhenTrue(getIcmp32Mapping(Condition)); |
| } |
| - if (ShiftAmt) { |
| + case IceType_i32: { |
| + if (NonConstOp != nullptr) { |
| + if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && |
| + Value == 0) { |
| + Variable *T = makeReg(IceType_i32); |
| + Variable *OpR = legalizeToReg(NonConstOp); |
| + _orrs(T, OpR, OpR); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + return CondWhenTrue(getIcmp32Mapping(Condition)); |
| + } |
| + |
| + Operand *ConstRF = |
| + legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex); |
| + Variable *NonConstR = legalizeToReg(NonConstOp); |
| + |
| + if (Src0 == NonConstOp) { |
| + _cmp(NonConstR, ConstRF); |
| + } else { |
| + Variable *T = makeReg(IceType_i32); |
| + _rsbs(T, NonConstR, ConstRF); |
| + Context.insert(InstFakeUse::create(Func, T)); |
| + } |
| + return CondWhenTrue(getIcmp32Mapping(Condition)); |
| + } |
| + |
| + Variable *Src0R = legalizeToReg(Src0); |
| Variable *Src1R = legalizeToReg(Src1); |
| - OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( |
| - Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); |
| - _cmp(Src0R, Src1RShifted); |
| - } else { |
| - Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
| - _cmp(Src0R, Src1RF); |
| + _cmp(Src0R, Src1R); |
| + return CondWhenTrue(getIcmp32Mapping(Condition)); |
| + } |
| } |
| - return CondWhenTrue(getIcmp32Mapping(Inst->getCondition())); |
| } |
| void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
| @@ -4254,13 +4538,15 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
| return Reg; |
| } else { |
| assert(isScalarFloatingType(Ty)); |
| + uint32_t ModifiedImm; |
| + if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
| + Variable *T = makeReg(Ty, RegNum); |
| + _mov(T, |
| + OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
| + return T; |
| + } |
| + |
| // Load floats/doubles from literal pool. |
| - // TODO(jvoung): Allow certain immediates to be encoded directly in an |
| - // operand. See Table A7-18 of the ARM manual: "Floating-point modified |
| - // immediate constants". Or, for 32-bit floating point numbers, just |
| - // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG |
| - // instead of using a movw/movt pair to get the const-pool address then |
| - // loading to SREG. |
| std::string Buffer; |
| llvm::raw_string_ostream StrBuf(Buffer); |
| llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |