src/IceTargetLoweringARM32.cpp - Issue 1438773004: Subzero. ARM32. Improve constant lowering.

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1438773004: Subzero. ARM32. Improve constant lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Fixes the lit tests. Double is too precise. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringARM32.cpp

diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index fcb865b4c0c54038b14070a8b259d22e781083d7..83be39e8875b80ef03db1a066b49ead64765f7aa 100644

--- a/src/IceTargetLoweringARM32.cpp

+++ b/src/IceTargetLoweringARM32.cpp

@@ -1297,29 +1297,26 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {

Variable *SrcLoReg = legalizeToReg(SrcLo);

switch (Ty) {

default:

- llvm_unreachable("Unexpected type");

- case IceType_i8: {

- Operand *Mask =

- legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex);

- _tst(SrcLoReg, Mask);

- break;

- }

+ llvm::report_fatal_error("Unexpected type");

+ case IceType_i8:

case IceType_i16: {

- Operand *Mask =

- legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex);

- _tst(SrcLoReg, Mask);

- break;

- }

+ Operand *ShAmtF =

+ legalize(Ctx->getConstantInt32(32 - getScalarIntBitWidth(Ty)),

+ Legal_Reg | Legal_Flex);

+ Variable *T = makeReg(IceType_i32);

+ _lsls(T, SrcLoReg, ShAmtF);

+ Context.insert(InstFakeUse::create(Func, T));

+ } break;

case IceType_i32: {

_tst(SrcLoReg, SrcLoReg);

break;

}

case IceType_i64: {

- Variable *ScratchReg = makeReg(IceType_i32);

- _orrs(ScratchReg, SrcLoReg, SrcHi);

- // ScratchReg isn't going to be used, but we need the side-effect of

- // setting flags from this operation.

- Context.insert(InstFakeUse::create(Func, ScratchReg));

+ Variable *T = makeReg(IceType_i32);

+ _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));

+ // T isn't going to be used, but we need the side-effect of setting flags

+ // from this operation.

+ Context.insert(InstFakeUse::create(Func, T));

}

InstARM32Label *Label = InstARM32Label::create(Func, this);

@@ -1404,6 +1401,389 @@ TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Inst) {

return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;

}

+void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,

+ Variable *Dest, Operand *Src0,

+ Operand *Src1) {

+ // These helper-call-involved instructions are lowered in this separate

+ // switch. This is because we would otherwise assume that we need to

+ // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

+ // helper calls, and such unused/redundant instructions will fail liveness

+ // analysis under -Om1 setting.

+ switch (Op) {

+ default:

+ break;

+ case InstArithmetic::Udiv:

+ case InstArithmetic::Sdiv:

+ case InstArithmetic::Urem:

+ case InstArithmetic::Srem: {

+ // Check for divide by 0 (ARM normally doesn't trap, but we want it to

+ // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

+ // register, which will hide a constant source operand. Instead, check

+ // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

+ if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

+ if (C64->getValue() == 0) {

+ _trap();

+ return;

+ }

+ } else {

+ Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);

+ Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);

+ div0Check(IceType_i64, Src1Lo, Src1Hi);

+ }

+ // Technically, ARM has their own aeabi routines, but we can use the

sehr 2015/11/13 21:56:29 either "has its" or "have their".

John 2015/11/14 00:00:38 For a moment I thought this was Jim. :) Done.

+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

+ // the more standard __moddi3 for rem.

+ const char *HelperName = "";

+ switch (Op) {

+ default:

+ llvm::report_fatal_error("Should have only matched div ops.");

+ break;

+ case InstArithmetic::Udiv:

+ HelperName = H_udiv_i64;

+ break;

+ case InstArithmetic::Sdiv:

+ HelperName = H_sdiv_i64;

+ break;

+ case InstArithmetic::Urem:

+ HelperName = H_urem_i64;

+ break;

+ case InstArithmetic::Srem:

+ HelperName = H_srem_i64;

+ break;

+ }

+ constexpr SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ lowerCall(Call);

+ return;

+ }

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *Src0RLo = nullptr;

+ Variable *Src0RHi = nullptr;

+ // Src0Hi is not always used got Shl, and Src0Lo is not always used for Lhsr.

Jim Stichnoth 2015/11/16 13:56:10 s/got/for/ ? Lshr

+ if (Op != InstArithmetic::Ashr && Op != InstArithmetic::Lshr) {

+ Src0RLo = legalizeToReg(loOperand(Src0));

+ }

+ if (Op != InstArithmetic::Shl) {

+ Src0RHi = legalizeToReg(hiOperand(Src0));

+ }

+ Operand *Src1Lo = loOperand(Src1);

+ Operand *Src1Hi = hiOperand(Src1);

+ Variable *T_Lo = makeReg(DestLo->getType());

+ Variable *T_Hi = makeReg(DestHi->getType());

+ switch (Op) {

+ case InstArithmetic::_num:

+ llvm::report_fatal_error("Unknown arithmetic operator");

+ return;

+ case InstArithmetic::Add:

+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

+ _adds(T_Lo, Src0RLo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _adc(T_Hi, Src0RHi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ return;

+ case InstArithmetic::And:

+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

+ _and(T_Lo, Src0RLo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _and(T_Hi, Src0RHi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ return;

+ case InstArithmetic::Or:

+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

+ _orr(T_Lo, Src0RLo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _orr(T_Hi, Src0RHi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ return;

+ case InstArithmetic::Xor:

+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

+ _eor(T_Lo, Src0RLo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _eor(T_Hi, Src0RHi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ return;

+ case InstArithmetic::Sub:

+ Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

+ Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

+ _subs(T_Lo, Src0RLo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _sbc(T_Hi, Src0RHi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ return;

+ case InstArithmetic::Mul: {

+ // GCC 4.8 does:

+ // a=b*c ==>

+ // t_acc =(mul) (b.lo * c.hi)

+ // t_acc =(mla) (c.lo * b.hi) + t_acc

+ // t.hi,t.lo =(umull) b.lo * c.lo

+ // t.hi += t_acc

+ // a.lo = t.lo

+ // a.hi = t.hi

+ //

+ // LLVM does:

+ // t.hi,t.lo =(umull) b.lo * c.lo

+ // t.hi =(mla) (b.lo * c.hi) + t.hi

+ // t.hi =(mla) (b.hi * c.lo) + t.hi

+ // a.lo = t.lo

+ // a.hi = t.hi

+ //

+ // LLVM's lowering has fewer instructions, but more register pressure:

+ // t.lo is live from beginning to end, while GCC delays the two-dest

+ // instruction till the end, and kills c.hi immediately.

+ Variable *T_Acc = makeReg(IceType_i32);

+ Variable *T_Acc1 = makeReg(IceType_i32);

+ Variable *T_Hi1 = makeReg(IceType_i32);

+ Variable *Src1RLo = legalizeToReg(Src1Lo);

+ Variable *Src1RHi = legalizeToReg(Src1Hi);

+ _mul(T_Acc, Src0RLo, Src1RHi);

+ _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

+ _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

+ _add(T_Hi, T_Hi1, T_Acc1);

+ _mov(DestLo, T_Lo);

+ _mov(DestHi, T_Hi);

+ return;

+ }

+ case InstArithmetic::Shl: {

+ assert(Src0RLo != nullptr);

+ if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {

+ // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

+ const int32_t ShAmtImm = C->getValue() & 0x3F;

+ if (ShAmtImm == 0) {

+ Src0RHi = legalizeToReg(hiOperand(Src0));

+ _mov(DestLo, Src0RLo);

+ _mov(DestHi, Src0RHi);

+ return;

+ }

+ if (ShAmtImm >= 32) {

+ if (ShAmtImm == 32) {

+ _mov(DestHi, Src0RLo);

+ } else {

+ Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

+ Legal_Reg | Legal_Flex);

+ _lsl(T_Hi, Src0RLo, ShAmtOp);

+ _mov(DestHi, T_Hi);

+ }

+ Operand *_0 =

+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);

+ _mov(T_Lo, _0);

+ _mov(DestLo, T_Lo);

+ return;

+ }

+ Src0RHi = legalizeToReg(hiOperand(Src0));

+ Operand *ShAmtOp =

+ legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);

+ Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

+ Legal_Reg | Legal_Flex);

+ _lsl(T_Hi, Src0RHi, ShAmtOp);

+ _orr(T_Hi, T_Hi,

+ OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

+ OperandARM32::LSR, ComplShAmtOp));

+ _mov(DestHi, T_Hi);

+ _lsl(T_Lo, Src0RLo, ShAmtOp);

+ _mov(DestLo, T_Lo);

+ return;

+ }

+ // a=b<<c ==>

+ // pnacl-llc does:

+ // mov t_b.lo, b.lo

+ // mov t_b.hi, b.hi

+ // mov t_c.lo, c.lo

+ // rsb T0, t_c.lo, #32

+ // lsr T1, t_b.lo, T0

+ // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

+ // sub T2, t_c.lo, #32

+ // cmp T2, #0

+ // lslge t_a.hi, t_b.lo, T2

+ // lsl t_a.lo, t_b.lo, t_c.lo

+ // mov a.lo, t_a.lo

+ // mov a.hi, t_a.hi

+ //

+ // GCC 4.8 does:

+ // sub t_c1, c.lo, #32

+ // lsl t_hi, b.hi, c.lo

+ // orr t_hi, t_hi, b.lo, lsl t_c1

+ // rsb t_c2, c.lo, #32

+ // orr t_hi, t_hi, b.lo, lsr t_c2

+ // lsl t_lo, b.lo, c.lo

+ // a.lo = t_lo

+ // a.hi = t_hi

+ //

+ // These are incompatible, therefore we mimic pnacl-llc.

+ // Can be strength-reduced for constant-shifts, but we don't do that for

+ // now.

+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

+ // ARM, shifts only take the lower 8 bits of the shift register, and

+ // saturate to the range 0-32, so the negative value will saturate to 32.

+ Constant *_32 = Ctx->getConstantInt32(32);

+ Constant *_0 = Ctx->getConstantZero(IceType_i32);

+ Src0RHi = legalizeToReg(hiOperand(Src0));

+ Variable *Src1RLo = legalizeToReg(Src1Lo);

+ Variable *T0 = makeReg(IceType_i32);

+ Variable *T1 = makeReg(IceType_i32);

+ Variable *T2 = makeReg(IceType_i32);

+ Variable *TA_Hi = makeReg(IceType_i32);

+ Variable *TA_Lo = makeReg(IceType_i32);

+ _rsb(T0, Src1RLo, _32);

+ _lsr(T1, Src0RLo, T0);

+ _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

+ OperandARM32::LSL, Src1RLo));

+ _sub(T2, Src1RLo, _32);

+ _cmp(T2, _0);

+ _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

+ _set_dest_redefined();

+ _lsl(TA_Lo, Src0RLo, Src1RLo);

+ _mov(DestLo, TA_Lo);

+ _mov(DestHi, TA_Hi);

+ return;

+ }

+ case InstArithmetic::Lshr:

+ case InstArithmetic::Ashr: {

+ assert(Src0RHi != nullptr);

+ const bool ASR = Op == InstArithmetic::Ashr;

+ if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {

+ // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.

+ const int32_t ShAmtImm = C->getValue() & 0x3F;

+ if (ShAmtImm == 0) {

+ Src0RLo = legalizeToReg(loOperand(Src0));

+ _mov(DestLo, Src0RLo);

+ _mov(DestHi, Src0RHi);

+ return;

+ }

+ if (ShAmtImm >= 32) {

+ if (ShAmtImm == 32) {

+ _mov(DestLo, Src0RHi);

+ } else {

+ Operand *ShAmtOp = legalize(Ctx->getConstantInt32(ShAmtImm - 32),

+ Legal_Reg | Legal_Flex);

+ if (ASR) {

+ _asr(T_Lo, Src0RHi, ShAmtOp);

+ } else {

+ _lsr(T_Lo, Src0RHi, ShAmtOp);

+ }

+ _mov(DestLo, T_Lo);

+ }

+ if (ASR) {

+ Operand *_31 = legalize(Ctx->getConstantZero(IceType_i32),

+ Legal_Reg | Legal_Flex);

+ _asr(T_Hi, Src0RHi, _31);

+ } else {

+ Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),

+ Legal_Reg | Legal_Flex);

+ _mov(T_Hi, _0);

+ }

+ _mov(DestHi, T_Hi);

+ return;

+ }

+ Src0RLo = legalizeToReg(loOperand(Src0));

+ Operand *ShAmtOp =

+ legalize(Ctx->getConstantInt32(ShAmtImm), Legal_Reg | Legal_Flex);

+ Operand *ComplShAmtOp = legalize(Ctx->getConstantInt32(32 - ShAmtImm),

+ Legal_Reg | Legal_Flex);

+ _lsr(T_Lo, Src0RLo, ShAmtOp);

+ _orr(T_Lo, T_Lo,

+ OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

+ OperandARM32::LSL, ComplShAmtOp));

+ _mov(DestLo, T_Lo);

+ if (ASR) {

+ _asr(T_Hi, Src0RHi, ShAmtOp);

+ } else {

+ _lsr(T_Hi, Src0RHi, ShAmtOp);

+ }

+ _mov(DestHi, T_Hi);

+ return;

+ }

+ // a=b>>c

+ // pnacl-llc does:

+ // mov t_b.lo, b.lo

+ // mov t_b.hi, b.hi

+ // mov t_c.lo, c.lo

+ // lsr T0, t_b.lo, t_c.lo

+ // rsb T1, t_c.lo, #32

+ // orr t_a.lo, T0, t_b.hi, lsl T1

+ // sub T2, t_c.lo, #32

+ // cmp T2, #0

+ // [al]srge t_a.lo, t_b.hi, T2

+ // [al]sr t_a.hi, t_b.hi, t_c.lo

+ // mov a.lo, t_a.lo

+ // mov a.hi, t_a.hi

+ //

+ // GCC 4.8 does (lsr):

+ // rsb t_c1, c.lo, #32

+ // lsr t_lo, b.lo, c.lo

+ // orr t_lo, t_lo, b.hi, lsl t_c1

+ // sub t_c2, c.lo, #32

+ // orr t_lo, t_lo, b.hi, lsr t_c2

+ // lsr t_hi, b.hi, c.lo

+ // mov a.lo, t_lo

+ // mov a.hi, t_hi

+ //

+ // These are incompatible, therefore we mimic pnacl-llc.

+ const bool IsAshr = Op == InstArithmetic::Ashr;

+ Constant *_32 = Ctx->getConstantInt32(32);

+ Constant *_0 = Ctx->getConstantZero(IceType_i32);

+ Src0RLo = legalizeToReg(loOperand(Src0));

+ Variable *Src1RLo = legalizeToReg(Src1Lo);

+ Variable *T0 = makeReg(IceType_i32);

+ Variable *T1 = makeReg(IceType_i32);

+ Variable *T2 = makeReg(IceType_i32);

+ Variable *TA_Lo = makeReg(IceType_i32);

+ Variable *TA_Hi = makeReg(IceType_i32);

+ _lsr(T0, Src0RLo, Src1RLo);

+ _rsb(T1, Src1RLo, _32);

+ _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

+ OperandARM32::LSL, T1));

+ _sub(T2, Src1RLo, _32);

+ _cmp(T2, _0);

+ if (IsAshr) {

+ _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

+ _set_dest_redefined();

+ _asr(TA_Hi, Src0RHi, Src1RLo);

+ } else {

+ _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

+ _set_dest_redefined();

+ _lsr(TA_Hi, Src0RHi, Src1RLo);

+ }

+ _mov(DestLo, TA_Lo);

+ _mov(DestHi, TA_Hi);

+ return;

+ }

+ case InstArithmetic::Fadd:

+ case InstArithmetic::Fsub:

+ case InstArithmetic::Fmul:

+ case InstArithmetic::Fdiv:

+ case InstArithmetic::Frem:

+ llvm::report_fatal_error("FP instruction with i64 type");

+ return;

+ case InstArithmetic::Udiv:

+ case InstArithmetic::Sdiv:

+ case InstArithmetic::Urem:

+ case InstArithmetic::Srem:

+ llvm::report_fatal_error("Call-helper-involved instruction for i64 type "

+ "should have already been handled before");

+ return;

+ }

void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

Variable *Dest = Inst->getDest();

if (Dest->getType() == IceType_i1) {

@@ -1421,272 +1801,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

Operand *Src0 = legalizeUndef(Inst->getSrc(0));

Operand *Src1 = legalizeUndef(Inst->getSrc(1));

if (Dest->getType() == IceType_i64) {

- // These helper-call-involved instructions are lowered in this separate

- // switch. This is because we would otherwise assume that we need to

- // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

- // helper calls, and such unused/redundant instructions will fail liveness

- // analysis under -Om1 setting.

- switch (Inst->getOp()) {

- default:

- break;

- case InstArithmetic::Udiv:

- case InstArithmetic::Sdiv:

- case InstArithmetic::Urem:

- case InstArithmetic::Srem: {

- // Check for divide by 0 (ARM normally doesn't trap, but we want it to

- // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

- // register, which will hide a constant source operand. Instead, check

- // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

- if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

- if (C64->getValue() == 0) {

- _trap();

- return;

- }

- } else {

- Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);

- Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);

- div0Check(IceType_i64, Src1Lo, Src1Hi);

- }

- // Technically, ARM has their own aeabi routines, but we can use the

- // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

- // the more standard __moddi3 for rem.

- const char *HelperName = "";

- switch (Inst->getOp()) {

- default:

- llvm_unreachable("Should have only matched div ops.");

- break;

- case InstArithmetic::Udiv:

- HelperName = H_udiv_i64;

- break;

- case InstArithmetic::Sdiv:

- HelperName = H_sdiv_i64;

- break;

- case InstArithmetic::Urem:

- HelperName = H_urem_i64;

- break;

- case InstArithmetic::Srem:

- HelperName = H_srem_i64;

- break;

- }

- constexpr SizeT MaxSrcs = 2;

- InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs);

- Call->addArg(Src0);

- Call->addArg(Src1);

- lowerCall(Call);

- return;

- }

- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

- Variable *Src0RLo = legalizeToReg(loOperand(Src0));

- Variable *Src0RHi = legalizeToReg(hiOperand(Src0));

- Operand *Src1Lo = loOperand(Src1);

- Operand *Src1Hi = hiOperand(Src1);

- Variable *T_Lo = makeReg(DestLo->getType());

- Variable *T_Hi = makeReg(DestHi->getType());

- switch (Inst->getOp()) {

- case InstArithmetic::_num:

- llvm_unreachable("Unknown arithmetic operator");

- return;

- case InstArithmetic::Add:

- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

- _adds(T_Lo, Src0RLo, Src1Lo);

- _mov(DestLo, T_Lo);

- _adc(T_Hi, Src0RHi, Src1Hi);

- _mov(DestHi, T_Hi);

- return;

- case InstArithmetic::And:

- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

- _and(T_Lo, Src0RLo, Src1Lo);

- _mov(DestLo, T_Lo);

- _and(T_Hi, Src0RHi, Src1Hi);

- _mov(DestHi, T_Hi);

- return;

- case InstArithmetic::Or:

- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

- _orr(T_Lo, Src0RLo, Src1Lo);

- _mov(DestLo, T_Lo);

- _orr(T_Hi, Src0RHi, Src1Hi);

- _mov(DestHi, T_Hi);

- return;

- case InstArithmetic::Xor:

- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

- _eor(T_Lo, Src0RLo, Src1Lo);

- _mov(DestLo, T_Lo);

- _eor(T_Hi, Src0RHi, Src1Hi);

- _mov(DestHi, T_Hi);

- return;

- case InstArithmetic::Sub:

- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Flex);

- Src1Hi = legalize(Src1Hi, Legal_Reg | Legal_Flex);

- _subs(T_Lo, Src0RLo, Src1Lo);

- _mov(DestLo, T_Lo);

- _sbc(T_Hi, Src0RHi, Src1Hi);

- _mov(DestHi, T_Hi);

- return;

- case InstArithmetic::Mul: {

- // GCC 4.8 does:

- // a=b*c ==>

- // t_acc =(mul) (b.lo * c.hi)

- // t_acc =(mla) (c.lo * b.hi) + t_acc

- // t.hi,t.lo =(umull) b.lo * c.lo

- // t.hi += t_acc

- // a.lo = t.lo

- // a.hi = t.hi

- //

- // LLVM does:

- // t.hi,t.lo =(umull) b.lo * c.lo

- // t.hi =(mla) (b.lo * c.hi) + t.hi

- // t.hi =(mla) (b.hi * c.lo) + t.hi

- // a.lo = t.lo

- // a.hi = t.hi

- //

- // LLVM's lowering has fewer instructions, but more register pressure:

- // t.lo is live from beginning to end, while GCC delays the two-dest

- // instruction till the end, and kills c.hi immediately.

- Variable *T_Acc = makeReg(IceType_i32);

- Variable *T_Acc1 = makeReg(IceType_i32);

- Variable *T_Hi1 = makeReg(IceType_i32);

- Variable *Src1RLo = legalizeToReg(Src1Lo);

- Variable *Src1RHi = legalizeToReg(Src1Hi);

- _mul(T_Acc, Src0RLo, Src1RHi);

- _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);

- _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);

- _add(T_Hi, T_Hi1, T_Acc1);

- _mov(DestLo, T_Lo);

- _mov(DestHi, T_Hi);

- return;

- }

- case InstArithmetic::Shl: {

- // a=b<<c ==>

- // pnacl-llc does:

- // mov t_b.lo, b.lo

- // mov t_b.hi, b.hi

- // mov t_c.lo, c.lo

- // rsb T0, t_c.lo, #32

- // lsr T1, t_b.lo, T0

- // orr t_a.hi, T1, t_b.hi, lsl t_c.lo

- // sub T2, t_c.lo, #32

- // cmp T2, #0

- // lslge t_a.hi, t_b.lo, T2

- // lsl t_a.lo, t_b.lo, t_c.lo

- // mov a.lo, t_a.lo

- // mov a.hi, t_a.hi

- //

- // GCC 4.8 does:

- // sub t_c1, c.lo, #32

- // lsl t_hi, b.hi, c.lo

- // orr t_hi, t_hi, b.lo, lsl t_c1

- // rsb t_c2, c.lo, #32

- // orr t_hi, t_hi, b.lo, lsr t_c2

- // lsl t_lo, b.lo, c.lo

- // a.lo = t_lo

- // a.hi = t_hi

- //

- // These are incompatible, therefore we mimic pnacl-llc.

- // Can be strength-reduced for constant-shifts, but we don't do that for

- // now.

- // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

- // ARM, shifts only take the lower 8 bits of the shift register, and

- // saturate to the range 0-32, so the negative value will saturate to 32.

- Constant *_32 = Ctx->getConstantInt32(32);

- Constant *_0 = Ctx->getConstantZero(IceType_i32);

- Variable *Src1RLo = legalizeToReg(Src1Lo);

- Variable *T0 = makeReg(IceType_i32);

- Variable *T1 = makeReg(IceType_i32);

- Variable *T2 = makeReg(IceType_i32);

- Variable *TA_Hi = makeReg(IceType_i32);

- Variable *TA_Lo = makeReg(IceType_i32);

- _rsb(T0, Src1RLo, _32);

- _lsr(T1, Src0RLo, T0);

- _orr(TA_Hi, T1, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

- OperandARM32::LSL, Src1RLo));

- _sub(T2, Src1RLo, _32);

- _cmp(T2, _0);

- _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);

- _set_dest_redefined();

- _lsl(TA_Lo, Src0RLo, Src1RLo);

- _mov(DestLo, TA_Lo);

- _mov(DestHi, TA_Hi);

- return;

- }

- case InstArithmetic::Lshr:

- case InstArithmetic::Ashr: {

- // a=b>>c

- // pnacl-llc does:

- // mov t_b.lo, b.lo

- // mov t_b.hi, b.hi

- // mov t_c.lo, c.lo

- // lsr T0, t_b.lo, t_c.lo

- // rsb T1, t_c.lo, #32

- // orr t_a.lo, T0, t_b.hi, lsl T1

- // sub T2, t_c.lo, #32

- // cmp T2, #0

- // [al]srge t_a.lo, t_b.hi, T2

- // [al]sr t_a.hi, t_b.hi, t_c.lo

- // mov a.lo, t_a.lo

- // mov a.hi, t_a.hi

- //

- // GCC 4.8 does (lsr):

- // rsb t_c1, c.lo, #32

- // lsr t_lo, b.lo, c.lo

- // orr t_lo, t_lo, b.hi, lsl t_c1

- // sub t_c2, c.lo, #32

- // orr t_lo, t_lo, b.hi, lsr t_c2

- // lsr t_hi, b.hi, c.lo

- // mov a.lo, t_lo

- // mov a.hi, t_hi

- //

- // These are incompatible, therefore we mimic pnacl-llc.

- const bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;

- Constant *_32 = Ctx->getConstantInt32(32);

- Constant *_0 = Ctx->getConstantZero(IceType_i32);

- Variable *Src1RLo = legalizeToReg(Src1Lo);

- Variable *T0 = makeReg(IceType_i32);

- Variable *T1 = makeReg(IceType_i32);

- Variable *T2 = makeReg(IceType_i32);

- Variable *TA_Lo = makeReg(IceType_i32);

- Variable *TA_Hi = makeReg(IceType_i32);

- _lsr(T0, Src0RLo, Src1RLo);

- _rsb(T1, Src1RLo, _32);

- _orr(TA_Lo, T0, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

- OperandARM32::LSL, T1));

- _sub(T2, Src1RLo, _32);

- _cmp(T2, _0);

- if (IsAshr) {

- _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);

- _set_dest_redefined();

- _asr(TA_Hi, Src0RHi, Src1RLo);

- } else {

- _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);

- _set_dest_redefined();

- _lsr(TA_Hi, Src0RHi, Src1RLo);

- }

- _mov(DestLo, TA_Lo);

- _mov(DestHi, TA_Hi);

- return;

- }

- case InstArithmetic::Fadd:

- case InstArithmetic::Fsub:

- case InstArithmetic::Fmul:

- case InstArithmetic::Fdiv:

- case InstArithmetic::Frem:

- llvm_unreachable("FP instruction with i64 type");

- return;

- case InstArithmetic::Udiv:

- case InstArithmetic::Sdiv:

- case InstArithmetic::Urem:

- case InstArithmetic::Srem:

- llvm_unreachable("Call-helper-involved instruction for i64 type "

- "should have already been handled before");

- return;

- }

+ lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);

return;

- } else if (isVectorType(Dest->getType())) {

+ }

+ if (isVectorType(Dest->getType())) {

// Add a fake def to keep liveness consistent in the meantime.

Variable *T = makeReg(Dest->getType());

Context.insert(InstFakeDef::create(Func, T));

@@ -1694,9 +1813,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

UnimplementedError(Func->getContext()->getFlags());

return;

}

// Dest->getType() is a non-i64 scalar.

Variable *Src0R = legalizeToReg(Src0);

Variable *T = makeReg(Dest->getType());

// Handle div/rem separately. They require a non-legalized Src1 to inspect

// whether or not Src1 is a non-zero constant. Once legalized it is more

// difficult to determine (constant may be moved to a register).

@@ -1773,7 +1894,7 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);

switch (Inst->getOp()) {

case InstArithmetic::_num:

- llvm_unreachable("Unknown arithmetic operator");

+ llvm::report_fatal_error("Unknown arithmetic operator");

return;

case InstArithmetic::Add:

_add(T, Src0R, Src1RF);

@@ -1823,14 +1944,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

case InstArithmetic::Sdiv:

case InstArithmetic::Urem:

case InstArithmetic::Srem:

- llvm_unreachable("Integer div/rem should have been handled earlier.");

+ llvm::report_fatal_error(

+ "Integer div/rem should have been handled earlier.");

return;

case InstArithmetic::Fadd:

case InstArithmetic::Fsub:

case InstArithmetic::Fmul:

case InstArithmetic::Fdiv:

case InstArithmetic::Frem:

- llvm_unreachable("Floating point arith should have been handled earlier.");

+ llvm::report_fatal_error(

+ "Floating point arith should have been handled earlier.");

return;

}

@@ -1841,40 +1964,39 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {

assert(Dest->getType() == Src0->getType());

if (Dest->getType() == IceType_i64) {

Src0 = legalizeUndef(Src0);

- Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);

- Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);

- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

- Variable *T_Lo = makeReg(IceType_i32);

- Variable *T_Hi = makeReg(IceType_i32);

+ Variable *T_Lo = makeReg(IceType_i32);

+ auto *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);

_mov(T_Lo, Src0Lo);

_mov(DestLo, T_Lo);

+ Variable *T_Hi = makeReg(IceType_i32);

+ auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);

_mov(T_Hi, Src0Hi);

_mov(DestHi, T_Hi);

+ return;

+ }

+ Operand *NewSrc;

+ if (Dest->hasReg()) {

+ // If Dest already has a physical register, then legalize the Src operand

+ // into a Variable with the same register assignment. This especially

+ // helps allow the use of Flex operands.

+ NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());

} else {

- Operand *NewSrc;

- if (Dest->hasReg()) {

- // If Dest already has a physical register, then legalize the Src operand

- // into a Variable with the same register assignment. This especially

- // helps allow the use of Flex operands.

- NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());

- } else {

- // Dest could be a stack operand. Since we could potentially need to do a

- // Store (and store can only have Register operands), legalize this to a

- // register.

- NewSrc = legalize(Src0, Legal_Reg);

- }

- if (isVectorType(Dest->getType())) {

- Variable *SrcR = legalizeToReg(NewSrc);

- _mov(Dest, SrcR);

- } else if (isFloatingType(Dest->getType())) {

- Variable *SrcR = legalizeToReg(NewSrc);

- _mov(Dest, SrcR);

- } else {

- _mov(Dest, NewSrc);

- }

+ // Dest could be a stack operand. Since we could potentially need to do a

+ // Store (and store can only have Register operands), legalize this to a

+ // register.

+ NewSrc = legalize(Src0, Legal_Reg);

+ }

+ if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {

+ NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);

}

+ _mov(Dest, NewSrc);

}

TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(

@@ -2580,6 +2702,18 @@ struct {

FCMPARM32_TABLE

#undef X

};

sehr 2015/11/13 21:56:29 Is there a more common place for this sort of func

John 2015/11/13 22:00:41 Maybe. If you think it's useful, you could add fro

John 2015/11/14 00:00:38 Oh, I thought this was Jim. He had the same routin

+bool isFloatingPointZero(Operand *Src) {

+ if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) {

+ return F32->getValue() == 0.0f;

+ }

+ if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) {

+ return F64->getValue() == 0.0;

+ }

+ return false;

} // end of anonymous namespace

TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {

@@ -2592,8 +2726,12 @@ TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {

break;

default: {

Variable *Src0R = legalizeToReg(Instr->getSrc(0));

- Variable *Src1R = legalizeToReg(Instr->getSrc(1));

- _vcmp(Src0R, Src1R);

+ Operand *Src1 = Instr->getSrc(1);

+ if (isFloatingPointZero(Src1)) {

+ _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));

+ } else {

+ _vcmp(Src0R, legalizeToReg(Src1));

+ }

_vmrs();

assert(Condition < llvm::array_lengthof(TableFcmp));

return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);

@@ -2641,13 +2779,102 @@ void TargetARM32::lowerFcmp(const InstFcmp *Instr) {

_mov(Dest, T);

}

+TargetARM32::CondWhenTrue

+TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,

+ Operand *Src1) {

+ size_t Index = static_cast<size_t>(Condition);

+ assert(Index < llvm::array_lengthof(TableIcmp64));

+ Operand *NonConstOp = nullptr;

+ uint64_t Value;

+ if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {

+ Value = C->getValue();

+ NonConstOp = Src0;

+ } else if (const auto *C = llvm::dyn_cast<ConstantInteger64>(Src0)) {

+ Value = C->getValue();

+ NonConstOp = Src1;

+ }

+ Variable *Src0RLo, *Src0RHi;

+ Operand *Src1RFLo, *Src1RFHi;

+ if (NonConstOp != nullptr) {

+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&

+ Value == 0) {

+ Variable *T = makeReg(IceType_i32);

+ _orrs(T, legalizeToReg(loOperand(NonConstOp)),

+ legalize(hiOperand(NonConstOp), Legal_Reg | Legal_Flex));

+ Context.insert(InstFakeUse::create(Func, T));

+ return CondWhenTrue(TableIcmp64[Index].C1);

+ }

-TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

- assert(Inst->getSrc(0)->getType() != IceType_i1);

- assert(Inst->getSrc(1)->getType() != IceType_i1);

+ Src0RLo = legalizeToReg(loOperand(NonConstOp));

+ Src0RHi = legalizeToReg(hiOperand(NonConstOp));

+ if ((Value >> 32) == (Value & 0xFFFFFFFF)) {

+ Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),

+ Legal_Reg | Legal_Flex);

+ Src1RFHi = Src1RFLo;

+ } else {

+ Src1RFLo = legalize(Ctx->getConstantInt32(Value & 0xFFFFFFFF),

+ Legal_Reg | Legal_Flex);

+ Src1RFHi = legalize(Ctx->getConstantInt32((Value >> 32) & 0xFFFFFFFF),

+ Legal_Reg | Legal_Flex);

+ }

- Operand *Src0 = legalizeUndef(Inst->getSrc(0));

- Operand *Src1 = legalizeUndef(Inst->getSrc(1));

+ bool UseRsb = false;

+ if (TableIcmp64[Index].Swapped) {

+ UseRsb = NonConstOp == Src0;

+ } else {

+ UseRsb = NonConstOp == Src1;

+ }

+ if (UseRsb) {

+ if (TableIcmp64[Index].IsSigned) {

+ Variable *T = makeReg(IceType_i32);

+ _rsbs(T, Src0RLo, Src1RFLo);

+ Context.insert(InstFakeUse::create(Func, T));

+ T = makeReg(IceType_i32);

+ _rscs(T, Src0RHi, Src1RFHi);

+ // We need to add a FakeUse here because liveness gets mad at us (Def

+ // without Use.) Note that flag-setting instructions are considered to

+ // have side effects and, therefore, are not DCE'ed.

+ Context.insert(InstFakeUse::create(Func, T));

+ } else {

+ Variable *T = makeReg(IceType_i32);

+ _rsbs(T, Src0RHi, Src1RFHi);

+ Context.insert(InstFakeUse::create(Func, T));

+ T = makeReg(IceType_i32);

+ _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);

+ Context.insert(InstFakeUse::create(Func, T));

+ }

+ } else {

+ if (TableIcmp64[Index].IsSigned) {

+ _cmp(Src0RLo, Src1RFLo);

+ Variable *T = makeReg(IceType_i32);

+ _sbcs(T, Src0RHi, Src1RFHi);

+ Context.insert(InstFakeUse::create(Func, T));

+ } else {

+ _cmp(Src0RHi, Src1RFHi);

+ _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

+ }

+ return CondWhenTrue(TableIcmp64[Index].C1);

+ }

+ if (TableIcmp64[Index].Swapped) {

+ Src0RLo = legalizeToReg(loOperand(Src1));

+ Src0RHi = legalizeToReg(hiOperand(Src1));

+ Src1RFLo = legalizeToReg(loOperand(Src0));

+ Src1RFHi = legalizeToReg(hiOperand(Src0));

+ } else {

+ Src0RLo = legalizeToReg(loOperand(Src0));

+ Src0RHi = legalizeToReg(hiOperand(Src0));

+ Src1RFLo = legalizeToReg(loOperand(Src1));

+ Src1RFHi = legalizeToReg(hiOperand(Src1));

+ }

// a=icmp cond, b, c ==>

// GCC does:

@@ -2678,38 +2905,28 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

// So, we are going with the GCC version since it's usually better (except

// perhaps for eq/ne). We could revisit special-casing eq/ne later.

- if (Src0->getType() == IceType_i64) {

- InstIcmp::ICond Conditon = Inst->getCondition();

- size_t Index = static_cast<size_t>(Conditon);

- assert(Index < llvm::array_lengthof(TableIcmp64));

- Variable *Src0Lo, *Src0Hi;

- Operand *Src1LoRF, *Src1HiRF;

- if (TableIcmp64[Index].Swapped) {

- Src0Lo = legalizeToReg(loOperand(Src1));

- Src0Hi = legalizeToReg(hiOperand(Src1));

- Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);

- Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);

- } else {

- Src0Lo = legalizeToReg(loOperand(Src0));

- Src0Hi = legalizeToReg(hiOperand(Src0));

- Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);

- Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);

- }

- if (TableIcmp64[Index].IsSigned) {

- Variable *ScratchReg = makeReg(IceType_i32);

- _cmp(Src0Lo, Src1LoRF);

- _sbcs(ScratchReg, Src0Hi, Src1HiRF);

- // ScratchReg isn't going to be used, but we need the side-effect of

- // setting flags from this operation.

- Context.insert(InstFakeUse::create(Func, ScratchReg));

- } else {

- _cmp(Src0Hi, Src1HiRF);

- _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);

- }

- return CondWhenTrue(TableIcmp64[Index].C1);

+ if (TableIcmp64[Index].IsSigned) {

+ Variable *ScratchReg = makeReg(IceType_i32);

+ _cmp(Src0RLo, Src1RFLo);

+ _sbcs(ScratchReg, Src0RHi, Src1RFHi);

+ // ScratchReg isn't going to be used, but we need the side-effect of

+ // setting flags from this operation.

+ Context.insert(InstFakeUse::create(Func, ScratchReg));

+ } else {

+ _cmp(Src0RHi, Src1RFHi);

+ _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);

}

+ return CondWhenTrue(TableIcmp64[Index].C1);

+TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

+ assert(Inst->getSrc(0)->getType() != IceType_i1);

+ assert(Inst->getSrc(1)->getType() != IceType_i1);

+ Operand *Src0 = legalizeUndef(Inst->getSrc(0));

+ Operand *Src1 = legalizeUndef(Inst->getSrc(1));

+ InstIcmp::ICond Condition = Inst->getCondition();

// a=icmp cond b, c ==>

// GCC does:

// <u/s>xtb tb, b

@@ -2739,27 +2956,94 @@ TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Inst) {

// We'll go with the LLVM way for now, since it's shorter and has just as few

// dependencies.

- int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());

- assert(ShiftAmt >= 0);

- Constant *ShiftConst = nullptr;

- Variable *Src0R = nullptr;

- if (ShiftAmt) {

- ShiftConst = Ctx->getConstantInt32(ShiftAmt);

- Src0R = makeReg(IceType_i32);

- _lsl(Src0R, legalizeToReg(Src0), ShiftConst);

- } else {

- Src0R = legalizeToReg(Src0);

+ Operand *NonConstOp = nullptr;

+ int32_t Value;

+ if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

+ Value = C->getValue();

+ NonConstOp = Src0;

+ } else if (const auto *C = llvm::dyn_cast<ConstantInteger32>(Src0)) {

+ Value = C->getValue();

+ NonConstOp = Src1;

+ }

+ switch (Src0->getType()) {

+ default:

+ llvm::report_fatal_error("Unhandled type in lowerIcmpCond");

+ case IceType_i64:

+ return lowerInt64IcmpCond(Condition, Src0, Src1);

+ case IceType_i8:

+ case IceType_i16: {

+ int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());

+ assert(ShAmt >= 0);

+ if (NonConstOp != nullptr) {

+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&

+ Value == 0) {

+ Operand *ShAmtOp = Ctx->getConstantInt32(ShAmt);

+ Variable *T = makeReg(IceType_i32);

+ _lsls(T, legalizeToReg(NonConstOp), ShAmtOp);

+ Context.insert(InstFakeUse::create(Func, T));

+ return CondWhenTrue(getIcmp32Mapping(Condition));

+ }

+ Variable *ConstR = makeReg(IceType_i32);

+ _mov(ConstR, legalize(Ctx->getConstantInt32(Value << ShAmt),

+ Legal_Reg | Legal_Flex));

+ Operand *NonConstF = OperandARM32FlexReg::create(

+ Func, IceType_i32, legalizeToReg(NonConstOp), OperandARM32::LSL,

+ Ctx->getConstantInt32(ShAmt));

+ if (Src1 == NonConstOp) {

+ _cmp(ConstR, NonConstF);

+ } else {

+ Variable *T = makeReg(IceType_i32);

+ _rsbs(T, ConstR, NonConstF);

+ Context.insert(InstFakeUse::create(Func, T));

+ }

+ return CondWhenTrue(getIcmp32Mapping(Condition));

+ }

+ Variable *Src0R = makeReg(IceType_i32);

+ Operand *ShAmtF =

+ legalize(Ctx->getConstantInt32(ShAmt), Legal_Reg | Legal_Flex);

+ _lsl(Src0R, legalizeToReg(Src0), ShAmtF);

+ Variable *Src1R = legalizeToReg(Src1);

+ OperandARM32FlexReg *Src1F = OperandARM32FlexReg::create(

+ Func, IceType_i32, Src1R, OperandARM32::LSL, ShAmtF);

+ _cmp(Src0R, Src1F);

+ return CondWhenTrue(getIcmp32Mapping(Condition));

}

- if (ShiftAmt) {

+ case IceType_i32: {

+ if (NonConstOp != nullptr) {

+ if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&

+ Value == 0) {

+ Variable *T = makeReg(IceType_i32);

+ Variable *OpR = legalizeToReg(NonConstOp);

+ _orrs(T, OpR, OpR);

+ Context.insert(InstFakeUse::create(Func, T));

+ return CondWhenTrue(getIcmp32Mapping(Condition));

+ }

+ Operand *ConstRF =

+ legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);

+ Variable *NonConstR = legalizeToReg(NonConstOp);

+ if (Src0 == NonConstOp) {

+ _cmp(NonConstR, ConstRF);

+ } else {

+ Variable *T = makeReg(IceType_i32);

+ _rsbs(T, NonConstR, ConstRF);

+ Context.insert(InstFakeUse::create(Func, T));

+ }

+ return CondWhenTrue(getIcmp32Mapping(Condition));

+ }

+ Variable *Src0R = legalizeToReg(Src0);

Variable *Src1R = legalizeToReg(Src1);

- OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create(

- Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst);

- _cmp(Src0R, Src1RShifted);

- } else {

- Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);

- _cmp(Src0R, Src1RF);

+ _cmp(Src0R, Src1R);

+ return CondWhenTrue(getIcmp32Mapping(Condition));

+ }

}

- return CondWhenTrue(getIcmp32Mapping(Inst->getCondition()));

}

void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

@@ -4254,13 +4538,15 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

return Reg;

} else {

assert(isScalarFloatingType(Ty));

+ uint32_t ModifiedImm;

+ if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {

+ Variable *T = makeReg(Ty, RegNum);

+ _mov(T,

+ OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));

+ return T;

+ }

// Load floats/doubles from literal pool.

- // TODO(jvoung): Allow certain immediates to be encoded directly in an

- // operand. See Table A7-18 of the ARM manual: "Floating-point modified

- // immediate constants". Or, for 32-bit floating point numbers, just

- // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG

- // instead of using a movw/movt pair to get the const-pool address then

- // loading to SREG.

std::string Buffer;

llvm::raw_string_ostream StrBuf(Buffer);

llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);

« src/IceInstARM32.cpp ('K') | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('j') | no next file with comments »