Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixes the lit tests. Double is too precise. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringARM32.cpp
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index fcb865b4c0c54038b14070a8b259d22e781083d7..83be39e8875b80ef03db1a066b49ead64765f7aa 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -1297,29 +1297,26 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
Variable *SrcLoReg = legalizeToReg(SrcLo);
switch (Ty) {
default:
- llvm_unreachable("Unexpected type");
- case IceType_i8: {
- Operand *Mask =
- legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);
- _tst(SrcLoReg, Mask);
- break;
- }
+ llvm::report_fatal_error("Unexpected type");
+ case IceType_i8:
case IceType_i16: {
- Operand *Mask =
- legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);
- _tst(SrcLoReg, Mask);
- break;
- }
+ Operand *ShAmtF =
+ legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),
+ Legal_Reg | Legal_Flex);
+ Variable *T = makeReg(IceType_i32);
+ _lsls(T, SrcLoReg, ShAmtF);
+ Context.insert(InstFakeUse::create(Func, T));
+ } break;
case IceType_i32: {
_tst(SrcLoReg, SrcLoReg);
break;
}
case IceType_i64: {
- Variable *ScratchReg = makeReg(IceType_i32);
- _orrs(ScratchReg, SrcLoReg, SrcHi);
- // ScratchReg isn't going to be used, but we need the side-effect of
- // setting flags from this operation.
- Context.insert(InstFakeUse::create(Func, ScratchReg));
+ Variable *T = makeReg(IceType_i32);
+ _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
+ // T isn't going to be used, but we need the side-effect of setting flags
+ // from this operation.
+ Context.insert(InstFakeUse::create(Func, T));
}
}
InstARM32Label *Label = InstARM32Label::create(Func, this);
@@ -1404,6 +1401,389 @@ TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) {
return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
}
+void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
+ Variable *Dest, Operand *Src0,
+ Operand *Src1) {
+ // These helper-call-involved instructions are lowered in this separate
+ // switch. This is because we would otherwise assume that we need to
+ // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
+ // helper calls, and such unused/redundant instructions will fail liveness
+ // analysis under -Om1 setting.
+ switch (Op) {
+ default:
+ break;
+ case InstArithmetic::Udiv:
+ case InstArithmetic::Sdiv:
+ case InstArithmetic::Urem:
+ case InstArithmetic::Srem: {
+ // Check for divide by 0 (ARM normally doesn't trap, but we want it to
+ // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
+ // register, which will hide a constant source operand. Instead, check
+ // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
+ if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
+ if (C64->getValue() == 0) {
+ _trap();
+ return;
+ }
+ } else {
+ Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
+ Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
+ div0Check(IceType_i64, Src1Lo, Src1Hi);
+ }
+ // Technically, ARM has their own aeabi routines, but we can use the
sehr 2015/11/13 21:56:29 either "has its" or "have their".
John 2015/11/14 00:00:38 For a moment I thought this was Jim. :) Done.
+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
+ // the more standard __moddi3 for rem.
+ const char *HelperName = "";
+ switch (Op) {
+ default:
+ llvm::report_fatal_error("Should have only matched div ops.");
+ break;
+ case InstArithmetic::Udiv:
+ HelperName = H_udiv_i64;
+ break;
+ case InstArithmetic::Sdiv:
+ HelperName = H_sdiv_i64;
+ break;
+ case InstArithmetic::Urem:
+ HelperName = H_urem_i64;
+ break;
+ case InstArithmetic::Srem:
+ HelperName = H_srem_i64;
+ break;
+ }
+ constexpr SizeT MaxSrcs = 2;
+ InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
+ Call->addArg(Src0);
+ Call->addArg(Src1);
+ lowerCall(Call);
+ return;
+ }
+ }
+
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *Src0RLo = nullptr;
+ Variable *Src0RHi = nullptr;
+ // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr.
Jim Stichnoth 2015/11/16 13:56:10 s/got/for/ ? Lshr
+ if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) {
+ Src0RLo = legalizeToReg(loOperand(Src0));
+ }
+ if (Op != InstArithmetic::Shl) {
+ Src0RHi = legalizeToReg(hiOperand(Src0));
+ }
+ Operand *Src1Lo = loOperand(Src1);
+ Operand *Src1Hi = hiOperand(Src1);
+ Variable *T_Lo = makeReg(DestLo->getType());
+ Variable *T_Hi = makeReg(DestHi->getType());
+
+ switch (Op) {
+ case InstArithmetic::_num:
+ llvm::report_fatal_error("Unknown arithmetic operator");
+ return;
+ case InstArithmetic::Add:
+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
+ _adds(T_Lo, Src0RLo, Src1Lo);
+ _mov(DestLo, T_Lo);
+ _adc(T_Hi, Src0RHi, Src1Hi);
+ _mov(DestHi, T_Hi);
+ return;
+ case InstArithmetic::And:
+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
+ _and(T_Lo, Src0RLo, Src1Lo);
+ _mov(DestLo, T_Lo);
+ _and(T_Hi, Src0RHi, Src1Hi);
+ _mov(DestHi, T_Hi);
+ return;
+ case InstArithmetic::Or:
+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
+ _orr(T_Lo, Src0RLo, Src1Lo);
+ _mov(DestLo, T_Lo);
+ _orr(T_Hi, Src0RHi, Src1Hi);
+ _mov(DestHi, T_Hi);
+ return;
+ case InstArithmetic::Xor:
+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
+ _eor(T_Lo, Src0RLo, Src1Lo);
+ _mov(DestLo, T_Lo);
+ _eor(T_Hi, Src0RHi, Src1Hi);
+ _mov(DestHi, T_Hi);
+ return;
+ case InstArithmetic::Sub:
+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
+ _subs(T_Lo, Src0RLo, Src1Lo);
+ _mov(DestLo, T_Lo);
+ _sbc(T_Hi, Src0RHi, Src1Hi);
+ _mov(DestHi, T_Hi);
+ return;
+ case InstArithmetic::Mul: {
+ // GCC 4.8 does:
+ // a=b*c ==>
+ // t_acc =(mul) (b.lo * c.hi)
+ // t_acc =(mla) (c.lo * b.hi) + t_acc
+ // t.hi,t.lo =(umull) b.lo * c.lo
+ // t.hi += t_acc
+ // a.lo = t.lo
+ // a.hi = t.hi
+ //
+ // LLVM does:
+ // t.hi,t.lo =(umull) b.lo * c.lo
+ // t.hi =(mla) (b.lo * c.hi) + t.hi
+ // t.hi =(mla) (b.hi * c.lo) + t.hi
+ // a.lo = t.lo
+ // a.hi = t.hi
+ //
+ // LLVM's lowering has fewer instructions, but more register pressure:
+ // t.lo is live from beginning to end, while GCC delays the two-dest
+ // instruction till the end, and kills c.hi immediately.
+ Variable *T_Acc = makeReg(IceType_i32);
+ Variable *T_Acc1 = makeReg(IceType_i32);
+ Variable *T_Hi1 = makeReg(IceType_i32);
+ Variable *Src1RLo = legalizeToReg(Src1Lo);
+ Variable *Src1RHi = legalizeToReg(Src1Hi);
+ _mul(T_Acc, Src0RLo, Src1RHi);
+ _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
+ _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
+ _add(T_Hi, T_Hi1, T_Acc1);
+ _mov(DestLo, T_Lo);
+ _mov(DestHi, T_Hi);
+ return;
+ }
+ case InstArithmetic::Shl: {
+ assert(Src0RLo != nullptr);
+ if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
+ const int32_t ShAmtImm = C->getValue() & 0x3F;
+ if (ShAmtImm == 0) {
+ Src0RHi = legalizeToReg(hiOperand(Src0));
+ _mov(DestLo, Src0RLo);
+ _mov(DestHi, Src0RHi);
+ return;
+ }
+
+ if (ShAmtImm >= 32) {
+ if (ShAmtImm == 32) {
+ _mov(DestHi, Src0RLo);
+ } else {
+ Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
+ Legal_Reg | Legal_Flex);
+ _lsl(T_Hi, Src0RLo, ShAmtOp);
+ _mov(DestHi, T_Hi);
+ }
+
+ Operand *_0 =
+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
+ _mov(T_Lo, _0);
+ _mov(DestLo, T_Lo);
+ return;
+ }
+
+ Src0RHi = legalizeToReg(hiOperand(Src0));
+ Operand *ShAmtOp =
+ legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
+ Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
+ Legal_Reg | Legal_Flex);
+ _lsl(T_Hi, Src0RHi, ShAmtOp);
+ _orr(T_Hi, T_Hi,
+ OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
+ OperandARM32::LSR, ComplShAmtOp));
+ _mov(DestHi, T_Hi);
+
+ _lsl(T_Lo, Src0RLo, ShAmtOp);
+ _mov(DestLo, T_Lo);
+ return;
+ }
+
+ // a=b<<c ==>
+ // pnacl-llc does:
+ // mov t_b.lo, b.lo
+ // mov t_b.hi, b.hi
+ // mov t_c.lo, c.lo
+ // rsb T0, t_c.lo, #32
+ // lsr T1, t_b.lo, T0
+ // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
+ // sub T2, t_c.lo, #32
+ // cmp T2, #0
+ // lslge t_a.hi, t_b.lo, T2
+ // lsl t_a.lo, t_b.lo, t_c.lo
+ // mov a.lo, t_a.lo
+ // mov a.hi, t_a.hi
+ //
+ // GCC 4.8 does:
+ // sub t_c1, c.lo, #32
+ // lsl t_hi, b.hi, c.lo
+ // orr t_hi, t_hi, b.lo, lsl t_c1
+ // rsb t_c2, c.lo, #32
+ // orr t_hi, t_hi, b.lo, lsr t_c2
+ // lsl t_lo, b.lo, c.lo
+ // a.lo = t_lo
+ // a.hi = t_hi
+ //
+ // These are incompatible, therefore we mimic pnacl-llc.
+ // Can be strength-reduced for constant-shifts, but we don't do that for
+ // now.
+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
+ // ARM, shifts only take the lower 8 bits of the shift register, and
+ // saturate to the range 0-32, so the negative value will saturate to 32.
+ Constant *_32 = Ctx->getConstantInt32(32);
+ Constant *_0 = Ctx->getConstantZero(IceType_i32);
+ Src0RHi = legalizeToReg(hiOperand(Src0));
+ Variable *Src1RLo = legalizeToReg(Src1Lo);
+ Variable *T0 = makeReg(IceType_i32);
+ Variable *T1 = makeReg(IceType_i32);
+ Variable *T2 = makeReg(IceType_i32);
+ Variable *TA_Hi = makeReg(IceType_i32);
+ Variable *TA_Lo = makeReg(IceType_i32);
+ _rsb(T0, Src1RLo, _32);
+ _lsr(T1, Src0RLo, T0);
+ _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
+ OperandARM32::LSL, Src1RLo));
+ _sub(T2, Src1RLo, _32);
+ _cmp(T2, _0);
+ _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
+ _set_dest_redefined();
+ _lsl(TA_Lo, Src0RLo, Src1RLo);
+ _mov(DestLo, TA_Lo);
+ _mov(DestHi, TA_Hi);
+ return;
+ }
+ case InstArithmetic::Lshr:
+ case InstArithmetic::Ashr: {
+ assert(Src0RHi != nullptr);
+ const bool ASR = Op == InstArithmetic::Ashr;
+ if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
+ const int32_t ShAmtImm = C->getValue() & 0x3F;
+ if (ShAmtImm == 0) {
+ Src0RLo = legalizeToReg(loOperand(Src0));
+ _mov(DestLo, Src0RLo);
+ _mov(DestHi, Src0RHi);
+ return;
+ }
+
+ if (ShAmtImm >= 32) {
+ if (ShAmtImm == 32) {
+ _mov(DestLo, Src0RHi);
+ } else {
+ Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),
+ Legal_Reg | Legal_Flex);
+ if (ASR) {
+ _asr(T_Lo, Src0RHi, ShAmtOp);
+ } else {
+ _lsr(T_Lo, Src0RHi, ShAmtOp);
+ }
+ _mov(DestLo, T_Lo);
+ }
+
+ if (ASR) {
+ Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),
+ Legal_Reg | Legal_Flex);
+ _asr(T_Hi, Src0RHi, _31);
+ } else {
+ Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
+ Legal_Reg | Legal_Flex);
+ _mov(T_Hi, _0);
+ }
+ _mov(DestHi, T_Hi);
+ return;
+ }
+
+ Src0RLo = legalizeToReg(loOperand(Src0));
+ Operand *ShAmtOp =
+ legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);
+ Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),
+ Legal_Reg | Legal_Flex);
+ _lsr(T_Lo, Src0RLo, ShAmtOp);
+ _orr(T_Lo, T_Lo,
+ OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
+ OperandARM32::LSL, ComplShAmtOp));
+ _mov(DestLo, T_Lo);
+
+ if (ASR) {
+ _asr(T_Hi, Src0RHi, ShAmtOp);
+ } else {
+ _lsr(T_Hi, Src0RHi, ShAmtOp);
+ }
+ _mov(DestHi, T_Hi);
+ return;
+ }
+
+ // a=b>>c
+ // pnacl-llc does:
+ // mov t_b.lo, b.lo
+ // mov t_b.hi, b.hi
+ // mov t_c.lo, c.lo
+ // lsr T0, t_b.lo, t_c.lo
+ // rsb T1, t_c.lo, #32
+ // orr t_a.lo, T0, t_b.hi, lsl T1
+ // sub T2, t_c.lo, #32
+ // cmp T2, #0
+ // [al]srge t_a.lo, t_b.hi, T2
+ // [al]sr t_a.hi, t_b.hi, t_c.lo
+ // mov a.lo, t_a.lo
+ // mov a.hi, t_a.hi
+ //
+ // GCC 4.8 does (lsr):
+ // rsb t_c1, c.lo, #32
+ // lsr t_lo, b.lo, c.lo
+ // orr t_lo, t_lo, b.hi, lsl t_c1
+ // sub t_c2, c.lo, #32
+ // orr t_lo, t_lo, b.hi, lsr t_c2
+ // lsr t_hi, b.hi, c.lo
+ // mov a.lo, t_lo
+ // mov a.hi, t_hi
+ //
+ // These are incompatible, therefore we mimic pnacl-llc.
+ const bool IsAshr = Op == InstArithmetic::Ashr;
+ Constant *_32 = Ctx->getConstantInt32(32);
+ Constant *_0 = Ctx->getConstantZero(IceType_i32);
+ Src0RLo = legalizeToReg(loOperand(Src0));
+ Variable *Src1RLo = legalizeToReg(Src1Lo);
+ Variable *T0 = makeReg(IceType_i32);
+ Variable *T1 = makeReg(IceType_i32);
+ Variable *T2 = makeReg(IceType_i32);
+ Variable *TA_Lo = makeReg(IceType_i32);
+ Variable *TA_Hi = makeReg(IceType_i32);
+ _lsr(T0, Src0RLo, Src1RLo);
+ _rsb(T1, Src1RLo, _32);
+ _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
+ OperandARM32::LSL, T1));
+ _sub(T2, Src1RLo, _32);
+ _cmp(T2, _0);
+ if (IsAshr) {
+ _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
+ _set_dest_redefined();
+ _asr(TA_Hi, Src0RHi, Src1RLo);
+ } else {
+ _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
+ _set_dest_redefined();
+ _lsr(TA_Hi, Src0RHi, Src1RLo);
+ }
+ _mov(DestLo, TA_Lo);
+ _mov(DestHi, TA_Hi);
+ return;
+ }
+ case InstArithmetic::Fadd:
+ case InstArithmetic::Fsub:
+ case InstArithmetic::Fmul:
+ case InstArithmetic::Fdiv:
+ case InstArithmetic::Frem:
+ llvm::report_fatal_error("FP instruction with i64 type");
+ return;
+ case InstArithmetic::Udiv:
+ case InstArithmetic::Sdiv:
+ case InstArithmetic::Urem:
+ case InstArithmetic::Srem:
+ llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
+ "should have already been handled before");
+ return;
+ }
+}
+
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
if (Dest->getType() == IceType_i1) {
@@ -1421,272 +1801,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this separate
- // switch. This is because we would otherwise assume that we need to
- // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
- // helper calls, and such unused/redundant instructions will fail liveness
- // analysis under -Om1 setting.
- switch (Inst->getOp()) {
- default:
- break;
- case InstArithmetic::Udiv:
- case InstArithmetic::Sdiv:
- case InstArithmetic::Urem:
- case InstArithmetic::Srem: {
- // Check for divide by 0 (ARM normally doesn't trap, but we want it to
- // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
- // register, which will hide a constant source operand. Instead, check
- // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
- if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
- if (C64->getValue() == 0) {
- _trap();
- return;
- }
- } else {
- Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
- Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
- div0Check(IceType_i64, Src1Lo, Src1Hi);
- }
- // Technically, ARM has their own aeabi routines, but we can use the
- // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
- // the more standard __moddi3 for rem.
- const char *HelperName = "";
- switch (Inst->getOp()) {
- default:
- llvm_unreachable("Should have only matched div ops.");
- break;
- case InstArithmetic::Udiv:
- HelperName = H_udiv_i64;
- break;
- case InstArithmetic::Sdiv:
- HelperName = H_sdiv_i64;
- break;
- case InstArithmetic::Urem:
- HelperName = H_urem_i64;
- break;
- case InstArithmetic::Srem:
- HelperName = H_srem_i64;
- break;
- }
- constexpr SizeT MaxSrcs = 2;
- InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);
- Call->addArg(Src0);
- Call->addArg(Src1);
- lowerCall(Call);
- return;
- }
- }
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *Src0RLo = legalizeToReg(loOperand(Src0));
- Variable *Src0RHi = legalizeToReg(hiOperand(Src0));
- Operand *Src1Lo = loOperand(Src1);
- Operand *Src1Hi = hiOperand(Src1);
- Variable *T_Lo = makeReg(DestLo->getType());
- Variable *T_Hi = makeReg(DestHi->getType());
- switch (Inst->getOp()) {
- case InstArithmetic::_num:
- llvm_unreachable("Unknown arithmetic operator");
- return;
- case InstArithmetic::Add:
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
- _adds(T_Lo, Src0RLo, Src1Lo);
- _mov(DestLo, T_Lo);
- _adc(T_Hi, Src0RHi, Src1Hi);
- _mov(DestHi, T_Hi);
- return;
- case InstArithmetic::And:
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
- _and(T_Lo, Src0RLo, Src1Lo);
- _mov(DestLo, T_Lo);
- _and(T_Hi, Src0RHi, Src1Hi);
- _mov(DestHi, T_Hi);
- return;
- case InstArithmetic::Or:
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
- _orr(T_Lo, Src0RLo, Src1Lo);
- _mov(DestLo, T_Lo);
- _orr(T_Hi, Src0RHi, Src1Hi);
- _mov(DestHi, T_Hi);
- return;
- case InstArithmetic::Xor:
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
- _eor(T_Lo, Src0RLo, Src1Lo);
- _mov(DestLo, T_Lo);
- _eor(T_Hi, Src0RHi, Src1Hi);
- _mov(DestHi, T_Hi);
- return;
- case InstArithmetic::Sub:
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);
- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);
- _subs(T_Lo, Src0RLo, Src1Lo);
- _mov(DestLo, T_Lo);
- _sbc(T_Hi, Src0RHi, Src1Hi);
- _mov(DestHi, T_Hi);
- return;
- case InstArithmetic::Mul: {
- // GCC 4.8 does:
- // a=b*c ==>
- // t_acc =(mul) (b.lo * c.hi)
- // t_acc =(mla) (c.lo * b.hi) + t_acc
- // t.hi,t.lo =(umull) b.lo * c.lo
- // t.hi += t_acc
- // a.lo = t.lo
- // a.hi = t.hi
- //
- // LLVM does:
- // t.hi,t.lo =(umull) b.lo * c.lo
- // t.hi =(mla) (b.lo * c.hi) + t.hi
- // t.hi =(mla) (b.hi * c.lo) + t.hi
- // a.lo = t.lo
- // a.hi = t.hi
- //
- // LLVM's lowering has fewer instructions, but more register pressure:
- // t.lo is live from beginning to end, while GCC delays the two-dest
- // instruction till the end, and kills c.hi immediately.
- Variable *T_Acc = makeReg(IceType_i32);
- Variable *T_Acc1 = makeReg(IceType_i32);
- Variable *T_Hi1 = makeReg(IceType_i32);
- Variable *Src1RLo = legalizeToReg(Src1Lo);
- Variable *Src1RHi = legalizeToReg(Src1Hi);
- _mul(T_Acc, Src0RLo, Src1RHi);
- _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
- _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
- _add(T_Hi, T_Hi1, T_Acc1);
- _mov(DestLo, T_Lo);
- _mov(DestHi, T_Hi);
- return;
- }
- case InstArithmetic::Shl: {
- // a=b<<c ==>
- // pnacl-llc does:
- // mov t_b.lo, b.lo
- // mov t_b.hi, b.hi
- // mov t_c.lo, c.lo
- // rsb T0, t_c.lo, #32
- // lsr T1, t_b.lo, T0
- // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
- // sub T2, t_c.lo, #32
- // cmp T2, #0
- // lslge t_a.hi, t_b.lo, T2
- // lsl t_a.lo, t_b.lo, t_c.lo
- // mov a.lo, t_a.lo
- // mov a.hi, t_a.hi
- //
- // GCC 4.8 does:
- // sub t_c1, c.lo, #32
- // lsl t_hi, b.hi, c.lo
- // orr t_hi, t_hi, b.lo, lsl t_c1
- // rsb t_c2, c.lo, #32
- // orr t_hi, t_hi, b.lo, lsr t_c2
- // lsl t_lo, b.lo, c.lo
- // a.lo = t_lo
- // a.hi = t_hi
- //
- // These are incompatible, therefore we mimic pnacl-llc.
- // Can be strength-reduced for constant-shifts, but we don't do that for
- // now.
- // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
- // ARM, shifts only take the lower 8 bits of the shift register, and
- // saturate to the range 0-32, so the negative value will saturate to 32.
- Constant *_32 = Ctx->getConstantInt32(32);
- Constant *_0 = Ctx->getConstantZero(IceType_i32);
- Variable *Src1RLo = legalizeToReg(Src1Lo);
- Variable *T0 = makeReg(IceType_i32);
- Variable *T1 = makeReg(IceType_i32);
- Variable *T2 = makeReg(IceType_i32);
- Variable *TA_Hi = makeReg(IceType_i32);
- Variable *TA_Lo = makeReg(IceType_i32);
- _rsb(T0, Src1RLo, _32);
- _lsr(T1, Src0RLo, T0);
- _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
- OperandARM32::LSL, Src1RLo));
- _sub(T2, Src1RLo, _32);
- _cmp(T2, _0);
- _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
- _set_dest_redefined();
- _lsl(TA_Lo, Src0RLo, Src1RLo);
- _mov(DestLo, TA_Lo);
- _mov(DestHi, TA_Hi);
- return;
- }
- case InstArithmetic::Lshr:
- case InstArithmetic::Ashr: {
- // a=b>>c
- // pnacl-llc does:
- // mov t_b.lo, b.lo
- // mov t_b.hi, b.hi
- // mov t_c.lo, c.lo
- // lsr T0, t_b.lo, t_c.lo
- // rsb T1, t_c.lo, #32
- // orr t_a.lo, T0, t_b.hi, lsl T1
- // sub T2, t_c.lo, #32
- // cmp T2, #0
- // [al]srge t_a.lo, t_b.hi, T2
- // [al]sr t_a.hi, t_b.hi, t_c.lo
- // mov a.lo, t_a.lo
- // mov a.hi, t_a.hi
- //
- // GCC 4.8 does (lsr):
- // rsb t_c1, c.lo, #32
- // lsr t_lo, b.lo, c.lo
- // orr t_lo, t_lo, b.hi, lsl t_c1
- // sub t_c2, c.lo, #32
- // orr t_lo, t_lo, b.hi, lsr t_c2
- // lsr t_hi, b.hi, c.lo
- // mov a.lo, t_lo
- // mov a.hi, t_hi
- //
- // These are incompatible, therefore we mimic pnacl-llc.
- const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
- Constant *_32 = Ctx->getConstantInt32(32);
- Constant *_0 = Ctx->getConstantZero(IceType_i32);
- Variable *Src1RLo = legalizeToReg(Src1Lo);
- Variable *T0 = makeReg(IceType_i32);
- Variable *T1 = makeReg(IceType_i32);
- Variable *T2 = makeReg(IceType_i32);
- Variable *TA_Lo = makeReg(IceType_i32);
- Variable *TA_Hi = makeReg(IceType_i32);
- _lsr(T0, Src0RLo, Src1RLo);
- _rsb(T1, Src1RLo, _32);
- _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
- OperandARM32::LSL, T1));
- _sub(T2, Src1RLo, _32);
- _cmp(T2, _0);
- if (IsAshr) {
- _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
- _set_dest_redefined();
- _asr(TA_Hi, Src0RHi, Src1RLo);
- } else {
- _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
- _set_dest_redefined();
- _lsr(TA_Hi, Src0RHi, Src1RLo);
- }
- _mov(DestLo, TA_Lo);
- _mov(DestHi, TA_Hi);
- return;
- }
- case InstArithmetic::Fadd:
- case InstArithmetic::Fsub:
- case InstArithmetic::Fmul:
- case InstArithmetic::Fdiv:
- case InstArithmetic::Frem:
- llvm_unreachable("FP instruction with i64 type");
- return;
- case InstArithmetic::Udiv:
- case InstArithmetic::Sdiv:
- case InstArithmetic::Urem:
- case InstArithmetic::Srem:
- llvm_unreachable("Call-helper-involved instruction for i64 type "
- "should have already been handled before");
- return;
- }
+ lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
return;
- } else if (isVectorType(Dest->getType())) {
+ }
+
+ if (isVectorType(Dest->getType())) {
// Add a fake def to keep liveness consistent in the meantime.
Variable *T = makeReg(Dest->getType());
Context.insert(InstFakeDef::create(Func, T));
@@ -1694,9 +1813,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
UnimplementedError(Func->getContext()->getFlags());
return;
}
+
// Dest->getType() is a non-i64 scalar.
Variable *Src0R = legalizeToReg(Src0);
Variable *T = makeReg(Dest->getType());
+
// Handle div/rem separately. They require a non-legalized Src1 to inspect
// whether or not Src1 is a non-zero constant. Once legalized it is more
// difficult to determine (constant may be moved to a register).
@@ -1773,7 +1894,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
switch (Inst->getOp()) {
case InstArithmetic::_num:
- llvm_unreachable("Unknown arithmetic operator");
+ llvm::report_fatal_error("Unknown arithmetic operator");
return;
case InstArithmetic::Add:
_add(T, Src0R, Src1RF);
@@ -1823,14 +1944,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Sdiv:
case InstArithmetic::Urem:
case InstArithmetic::Srem:
- llvm_unreachable("Integer div/rem should have been handled earlier.");
+ llvm::report_fatal_error(
+ "Integer div/rem should have been handled earlier.");
return;
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
case InstArithmetic::Fmul:
case InstArithmetic::Fdiv:
case InstArithmetic::Frem:
- llvm_unreachable("Floating point arith should have been handled earlier.");
+ llvm::report_fatal_error(
+ "Floating point arith should have been handled earlier.");
return;
}
}
@@ -1841,40 +1964,39 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
assert(Dest->getType() == Src0->getType());
if (Dest->getType() == IceType_i64) {
Src0 = legalizeUndef(Src0);
- Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
- Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = makeReg(IceType_i32);
- Variable *T_Hi = makeReg(IceType_i32);
+ Variable *T_Lo = makeReg(IceType_i32);
+ auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
_mov(T_Lo, Src0Lo);
_mov(DestLo, T_Lo);
+
+ Variable *T_Hi = makeReg(IceType_i32);
+ auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
_mov(T_Hi, Src0Hi);
_mov(DestHi, T_Hi);
+
+ return;
+ }
+
+ Operand *NewSrc;
+ if (Dest->hasReg()) {
+ // If Dest already has a physical register, then legalize the Src operand
+ // into a Variable with the same register assignment. This especially
+ // helps allow the use of Flex operands.
+ NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
} else {
- Operand *NewSrc;
- if (Dest->hasReg()) {
- // If Dest already has a physical register, then legalize the Src operand
- // into a Variable with the same register assignment. This especially
- // helps allow the use of Flex operands.
- NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
- } else {
- // Dest could be a stack operand. Since we could potentially need to do a
- // Store (and store can only have Register operands), legalize this to a
- // register.
- NewSrc = legalize(Src0, Legal_Reg);
- }
- if (isVectorType(Dest->getType())) {
- Variable *SrcR = legalizeToReg(NewSrc);
- _mov(Dest, SrcR);
- } else if (isFloatingType(Dest->getType())) {
- Variable *SrcR = legalizeToReg(NewSrc);
- _mov(Dest, SrcR);
- } else {
- _mov(Dest, NewSrc);
- }
+ // Dest could be a stack operand. Since we could potentially need to do a
+ // Store (and store can only have Register operands), legalize this to a
+ // register.
+ NewSrc = legalize(Src0, Legal_Reg);
+ }
+
+ if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
+ NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
}
+ _mov(Dest, NewSrc);
}
TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
@@ -2580,6 +2702,18 @@ struct {
FCMPARM32_TABLE
#undef X
};
+
sehr 2015/11/13 21:56:29 Is there a more common place for this sort of func
John 2015/11/13 22:00:41 Maybe. If you think it's useful, you could add fro
John 2015/11/14 00:00:38 Oh, I thought this was Jim. He had the same routin
+bool isFloatingPointZero(Operand *Src) {
+ if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {
+ return F32->getValue() == 0.0f;
+ }
+
+ if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {
+ return F64->getValue() == 0.0;
+ }
+
+ return false;
+}
} // end of anonymous namespace
TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
@@ -2592,8 +2726,12 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
break;
default: {
Variable *Src0R = legalizeToReg(Instr->getSrc(0));
- Variable *Src1R = legalizeToReg(Instr->getSrc(1));
- _vcmp(Src0R, Src1R);
+ Operand *Src1 = Instr->getSrc(1);
+ if (isFloatingPointZero(Src1)) {
+ _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
+ } else {
+ _vcmp(Src0R, legalizeToReg(Src1));
+ }
_vmrs();
assert(Condition < llvm::array_lengthof(TableFcmp));
return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
@@ -2641,13 +2779,102 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
_mov(Dest, T);
}
+TargetARM32::CondWhenTrue
+TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
+ Operand *Src1) {
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < llvm::array_lengthof(TableIcmp64));
+
+ Operand *NonConstOp = nullptr;
+ uint64_t Value;
+ if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
+ Value = C->getValue();
+ NonConstOp = Src0;
+ } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) {
+ Value = C->getValue();
+ NonConstOp = Src1;
+ }
+
+ Variable *Src0RLo, *Src0RHi;
+ Operand *Src1RFLo, *Src1RFHi;
+
+ if (NonConstOp != nullptr) {
+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
+ Value == 0) {
+ Variable *T = makeReg(IceType_i32);
+ _orrs(T, legalizeToReg(loOperand(NonConstOp)),
+ legalize(hiOperand(NonConstOp), Legal_Reg | Legal_Flex));
+ Context.insert(InstFakeUse::create(Func, T));
+ return CondWhenTrue(TableIcmp64[Index].C1);
+ }
-TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
- assert(Inst->getSrc(0)->getType() != IceType_i1);
- assert(Inst->getSrc(1)->getType() != IceType_i1);
+ Src0RLo = legalizeToReg(loOperand(NonConstOp));
+ Src0RHi = legalizeToReg(hiOperand(NonConstOp));
+ if ((Value >> 32) == (Value & 0xFFFFFFFF)) {
+ Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),
+ Legal_Reg | Legal_Flex);
+ Src1RFHi = Src1RFLo;
+ } else {
+ Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),
+ Legal_Reg | Legal_Flex);
+ Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF),
+ Legal_Reg | Legal_Flex);
+ }
- Operand *Src0 = legalizeUndef(Inst->getSrc(0));
- Operand *Src1 = legalizeUndef(Inst->getSrc(1));
+ bool UseRsb = false;
+ if (TableIcmp64[Index].Swapped) {
+ UseRsb = NonConstOp == Src0;
+ } else {
+ UseRsb = NonConstOp == Src1;
+ }
+
+ if (UseRsb) {
+ if (TableIcmp64[Index].IsSigned) {
+ Variable *T = makeReg(IceType_i32);
+ _rsbs(T, Src0RLo, Src1RFLo);
+ Context.insert(InstFakeUse::create(Func, T));
+
+ T = makeReg(IceType_i32);
+ _rscs(T, Src0RHi, Src1RFHi);
+ // We need to add a FakeUse here because liveness gets mad at us (Def
+ // without Use.) Note that flag-setting instructions are considered to
+ // have side effects and, therefore, are not DCE'ed.
+ Context.insert(InstFakeUse::create(Func, T));
+ } else {
+ Variable *T = makeReg(IceType_i32);
+ _rsbs(T, Src0RHi, Src1RFHi);
+ Context.insert(InstFakeUse::create(Func, T));
+
+ T = makeReg(IceType_i32);
+ _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
+ Context.insert(InstFakeUse::create(Func, T));
+ }
+ } else {
+ if (TableIcmp64[Index].IsSigned) {
+ _cmp(Src0RLo, Src1RFLo);
+ Variable *T = makeReg(IceType_i32);
+ _sbcs(T, Src0RHi, Src1RFHi);
+ Context.insert(InstFakeUse::create(Func, T));
+ } else {
+ _cmp(Src0RHi, Src1RFHi);
+ _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
+ }
+ }
+
+ return CondWhenTrue(TableIcmp64[Index].C1);
+ }
+
+ if (TableIcmp64[Index].Swapped) {
+ Src0RLo = legalizeToReg(loOperand(Src1));
+ Src0RHi = legalizeToReg(hiOperand(Src1));
+ Src1RFLo = legalizeToReg(loOperand(Src0));
+ Src1RFHi = legalizeToReg(hiOperand(Src0));
+ } else {
+ Src0RLo = legalizeToReg(loOperand(Src0));
+ Src0RHi = legalizeToReg(hiOperand(Src0));
+ Src1RFLo = legalizeToReg(loOperand(Src1));
+ Src1RFHi = legalizeToReg(hiOperand(Src1));
+ }
// a=icmp cond, b, c ==>
// GCC does:
@@ -2678,38 +2905,28 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
//
// So, we are going with the GCC version since it's usually better (except
// perhaps for eq/ne). We could revisit special-casing eq/ne later.
-
- if (Src0->getType() == IceType_i64) {
- InstIcmp::ICond Conditon = Inst->getCondition();
- size_t Index = static_cast<size_t>(Conditon);
- assert(Index < llvm::array_lengthof(TableIcmp64));
- Variable *Src0Lo, *Src0Hi;
- Operand *Src1LoRF, *Src1HiRF;
- if (TableIcmp64[Index].Swapped) {
- Src0Lo = legalizeToReg(loOperand(Src1));
- Src0Hi = legalizeToReg(hiOperand(Src1));
- Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
- Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
- } else {
- Src0Lo = legalizeToReg(loOperand(Src0));
- Src0Hi = legalizeToReg(hiOperand(Src0));
- Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
- Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
- }
- if (TableIcmp64[Index].IsSigned) {
- Variable *ScratchReg = makeReg(IceType_i32);
- _cmp(Src0Lo, Src1LoRF);
- _sbcs(ScratchReg, Src0Hi, Src1HiRF);
- // ScratchReg isn't going to be used, but we need the side-effect of
- // setting flags from this operation.
- Context.insert(InstFakeUse::create(Func, ScratchReg));
- } else {
- _cmp(Src0Hi, Src1HiRF);
- _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
- }
- return CondWhenTrue(TableIcmp64[Index].C1);
+ if (TableIcmp64[Index].IsSigned) {
+ Variable *ScratchReg = makeReg(IceType_i32);
+ _cmp(Src0RLo, Src1RFLo);
+ _sbcs(ScratchReg, Src0RHi, Src1RFHi);
+ // ScratchReg isn't going to be used, but we need the side-effect of
+ // setting flags from this operation.
+ Context.insert(InstFakeUse::create(Func, ScratchReg));
+ } else {
+ _cmp(Src0RHi, Src1RFHi);
+ _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
}
+ return CondWhenTrue(TableIcmp64[Index].C1);
+}
+
+TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
+ assert(Inst->getSrc(0)->getType() != IceType_i1);
+ assert(Inst->getSrc(1)->getType() != IceType_i1);
+ Operand *Src0 = legalizeUndef(Inst->getSrc(0));
+ Operand *Src1 = legalizeUndef(Inst->getSrc(1));
+
+ InstIcmp::ICond Condition = Inst->getCondition();
// a=icmp cond b, c ==>
// GCC does:
// <u/s>xtb tb, b
@@ -2739,27 +2956,94 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {
//
// We'll go with the LLVM way for now, since it's shorter and has just as few
// dependencies.
- int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
- assert(ShiftAmt >= 0);
- Constant *ShiftConst = nullptr;
- Variable *Src0R = nullptr;
- if (ShiftAmt) {
- ShiftConst = Ctx->getConstantInt32(ShiftAmt);
- Src0R = makeReg(IceType_i32);
- _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
- } else {
- Src0R = legalizeToReg(Src0);
+ Operand *NonConstOp = nullptr;
+ int32_t Value;
+ if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
+ Value = C->getValue();
+ NonConstOp = Src0;
+ } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) {
+ Value = C->getValue();
+ NonConstOp = Src1;
+ }
+
+ switch (Src0->getType()) {
+ default:
+ llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
+ case IceType_i64:
+ return lowerInt64IcmpCond(Condition, Src0, Src1);
+ case IceType_i8:
+ case IceType_i16: {
+ int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
+ assert(ShAmt >= 0);
+
+ if (NonConstOp != nullptr) {
+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
+ Value == 0) {
+ Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);
+ Variable *T = makeReg(IceType_i32);
+ _lsls(T, legalizeToReg(NonConstOp), ShAmtOp);
+ Context.insert(InstFakeUse::create(Func, T));
+ return CondWhenTrue(getIcmp32Mapping(Condition));
+ }
+ Variable *ConstR = makeReg(IceType_i32);
+ _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt),
+ Legal_Reg | Legal_Flex));
+ Operand *NonConstF = OperandARM32FlexReg::create(
+ Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL,
+ Ctx->getConstantInt32(ShAmt));
+
+ if (Src1 == NonConstOp) {
+ _cmp(ConstR, NonConstF);
+ } else {
+ Variable *T = makeReg(IceType_i32);
+ _rsbs(T, ConstR, NonConstF);
+ Context.insert(InstFakeUse::create(Func, T));
+ }
+ return CondWhenTrue(getIcmp32Mapping(Condition));
+ }
+
+ Variable *Src0R = makeReg(IceType_i32);
+ Operand *ShAmtF =
+ legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex);
+ _lsl(Src0R, legalizeToReg(Src0), ShAmtF);
+
+ Variable *Src1R = legalizeToReg(Src1);
+ OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(
+ Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);
+ _cmp(Src0R, Src1F);
+ return CondWhenTrue(getIcmp32Mapping(Condition));
}
- if (ShiftAmt) {
+ case IceType_i32: {
+ if (NonConstOp != nullptr) {
+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
+ Value == 0) {
+ Variable *T = makeReg(IceType_i32);
+ Variable *OpR = legalizeToReg(NonConstOp);
+ _orrs(T, OpR, OpR);
+ Context.insert(InstFakeUse::create(Func, T));
+ return CondWhenTrue(getIcmp32Mapping(Condition));
+ }
+
+ Operand *ConstRF =
+ legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
+ Variable *NonConstR = legalizeToReg(NonConstOp);
+
+ if (Src0 == NonConstOp) {
+ _cmp(NonConstR, ConstRF);
+ } else {
+ Variable *T = makeReg(IceType_i32);
+ _rsbs(T, NonConstR, ConstRF);
+ Context.insert(InstFakeUse::create(Func, T));
+ }
+ return CondWhenTrue(getIcmp32Mapping(Condition));
+ }
+
+ Variable *Src0R = legalizeToReg(Src0);
Variable *Src1R = legalizeToReg(Src1);
- OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(
- Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);
- _cmp(Src0R, Src1RShifted);
- } else {
- Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
- _cmp(Src0R, Src1RF);
+ _cmp(Src0R, Src1R);
+ return CondWhenTrue(getIcmp32Mapping(Condition));
+ }
}
- return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));
}
void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
@@ -4254,13 +4538,15 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
return Reg;
} else {
assert(isScalarFloatingType(Ty));
+ uint32_t ModifiedImm;
+ if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
+ Variable *T = makeReg(Ty, RegNum);
+ _mov(T,
+ OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
+ return T;
+ }
+
// Load floats/doubles from literal pool.
- // TODO(jvoung): Allow certain immediates to be encoded directly in an
- // operand. See Table A7-18 of the ARM manual: "Floating-point modified
- // immediate constants". Or, for 32-bit floating point numbers, just
- // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG
- // instead of using a movw/movt pair to get the const-pool address then
- // loading to SREG.
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);

Powered by Google App Engine
This is Rietveld 408576698