src/IceTargetLoweringARM32.cpp - Issue 1143323013: Subzero ARM32: Lower shift and zext, sext, and trunc.

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1143323013: Subzero ARM32: Lower shift and zext, sext, and trunc. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: clean up a comment Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringARM32.cpp

diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index 37f5e19ed65dee937cd8b01f31877211192f4541..228e32d0aaab41f82d2b9d8e76941e052e5f28f9 100644

--- a/src/IceTargetLoweringARM32.cpp

+++ b/src/IceTargetLoweringARM32.cpp

@@ -1069,9 +1069,90 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

_mov(DestLo, T_Lo);

_mov(DestHi, T_Hi);

} break;

- case InstArithmetic::Shl:

+ case InstArithmetic::Shl: {

+ // a=b<<c ==>

+ // GCC 4.8 does:

+ // sub t_c1, c.lo, #32

+ // lsl t_hi, b.hi, c.lo

+ // orr t_hi, t_hi, b.lo, lsl t_c1

+ // rsb t_c2, c.lo, #32

+ // orr t_hi, t_hi, b.lo, lsr t_c2

+ // lsl t_lo, b.lo, c.lo

+ // a.lo = t_lo

+ // a.hi = t_hi

+ // Can be strength-reduced for constant-shifts, but we don't do

+ // that for now.

+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.

+ // On ARM, shifts only take the lower 8 bits of the shift register,

+ // and saturate to the range 0-32, so the negative value will

+ // saturate to 32.

+ Variable *T_Hi = makeReg(IceType_i32);

+ Variable *Src1RLo = legalizeToVar(Src1Lo);

+ Constant *ThirtyTwo = Ctx->getConstantInt32(32);

+ Variable *T_C1 = makeReg(IceType_i32);

+ Variable *T_C2 = makeReg(IceType_i32);

+ _sub(T_C1, Src1RLo, ThirtyTwo);

+ _lsl(T_Hi, Src0RHi, Src1RLo);

+ _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

+ OperandARM32::LSL, T_C1));

+ _rsb(T_C2, Src1RLo, ThirtyTwo);

+ _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

+ OperandARM32::LSR, T_C2));

+ _mov(DestHi, T_Hi);

+ Variable *T_Lo = makeReg(IceType_i32);

+ // _mov seems to sometimes have better register preferencing than lsl.

+ // Otherwise mov w/ lsl shifted register is a pseudo-instruction

+ // that maps to lsl.

+ _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

+ OperandARM32::LSL, Src1RLo));

+ _mov(DestLo, T_Lo);

+ } break;

Jim Stichnoth 2015/06/04 22:10:59 I've been thinking (in the x86-32 version and also

jvoung (off chromium) 2015/06/04 23:15:14 That sounds good to me, but will leave for a separ

case InstArithmetic::Lshr:

- case InstArithmetic::Ashr:

+ // a=b>>c (unsigned) ==>

+ // GCC 4.8 does:

+ // rsb t_c1, c.lo, #32

+ // lsr t_lo, b.lo, c.lo

+ // orr t_lo, t_lo, b.hi, lsl t_c1

+ // sub t_c2, c.lo, #32

+ // orr t_lo, t_lo, b.hi, lsr t_c2

+ // lsr t_hi, b.hi, c.lo

+ // a.lo = t_lo

+ // a.hi = t_hi

+ case InstArithmetic::Ashr: {

+ // a=b>>c (signed) ==> ...

+ // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,

+ // and the next orr should be conditioned on PLUS. The last two

+ // right shifts should also be arithmetic.

+ bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;

+ Variable *T_Lo = makeReg(IceType_i32);

+ Variable *Src1RLo = legalizeToVar(Src1Lo);

+ Constant *ThirtyTwo = Ctx->getConstantInt32(32);

+ Variable *T_C1 = makeReg(IceType_i32);

+ Variable *T_C2 = makeReg(IceType_i32);

+ _rsb(T_C1, Src1RLo, ThirtyTwo);

+ _lsr(T_Lo, Src0RLo, Src1RLo);

+ _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

+ OperandARM32::LSL, T_C1));

+ OperandARM32::ShiftKind RShiftKind;

+ CondARM32::Cond Pred;

+ if (IsAshr) {

+ _subs(T_C2, Src1RLo, ThirtyTwo);

+ RShiftKind = OperandARM32::ASR;

+ Pred = CondARM32::PL;

+ } else {

+ _sub(T_C2, Src1RLo, ThirtyTwo);

+ RShiftKind = OperandARM32::LSR;

+ Pred = CondARM32::AL;

+ }

+ _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

+ RShiftKind, T_C2),

+ Pred);

+ _mov(DestLo, T_Lo);

+ Variable *T_Hi = makeReg(IceType_i32);

+ _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,

+ RShiftKind, Src1RLo));

+ _mov(DestHi, T_Hi);

+ } break;

case InstArithmetic::Udiv:

case InstArithmetic::Sdiv:

case InstArithmetic::Urem:

@@ -1122,13 +1203,16 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

_mov(Dest, T);

} break;

case InstArithmetic::Shl:

- UnimplementedError(Func->getContext()->getFlags());

+ _lsl(T, Src0R, Src1);

+ _mov(Dest, T);

break;

case InstArithmetic::Lshr:

- UnimplementedError(Func->getContext()->getFlags());

+ _lsr(T, Src0R, Src1);

+ _mov(Dest, T);

break;

case InstArithmetic::Ashr:

- UnimplementedError(Func->getContext()->getFlags());

+ _asr(T, Src0R, Src1);

+ _mov(Dest, T);

break;

case InstArithmetic::Udiv:

UnimplementedError(Func->getContext()->getFlags());

@@ -1311,20 +1395,123 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

void TargetARM32::lowerCast(const InstCast *Inst) {

InstCast::OpKind CastKind = Inst->getCastKind();

+ Variable *Dest = Inst->getDest();

+ Operand *Src0 = Inst->getSrc(0);

switch (CastKind) {

default:

Func->setError("Cast type not supported");

return;

case InstCast::Sext: {

- UnimplementedError(Func->getContext()->getFlags());

+ if (isVectorType(Dest->getType())) {

+ UnimplementedError(Func->getContext()->getFlags());

+ } else if (Dest->getType() == IceType_i64) {

+ // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2

+ Constant *Shift = Ctx->getConstantInt32(31);

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *T_Lo = makeReg(DestLo->getType());

+ if (Src0->getType() == IceType_i32) {

+ Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);

+ _mov(T_Lo, Src0RF);

+ } else if (Src0->getType() == IceType_i1) {

+ Variable *Src0R = legalizeToVar(Src0);

+ _lsl(T_Lo, Src0R, Shift);

+ _asr(T_Lo, T_Lo, Shift);

+ } else {

+ Variable *Src0R = legalizeToVar(Src0);

+ _sxt(T_Lo, Src0R);

+ }

+ _mov(DestLo, T_Lo);

+ Variable *T_Hi = makeReg(DestHi->getType());

+ if (Src0->getType() != IceType_i1) {

+ _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,

+ OperandARM32::ASR, Shift));

+ } else {

+ // For i1, the asr instruction is already done above.

+ _mov(T_Hi, T_Lo);

+ }

+ _mov(DestHi, T_Hi);

+ } else if (Src0->getType() == IceType_i1) {

+ // GPR registers are 32-bit, so just use 31 as dst_bitwidth - 1.

+ // lsl t1, src_reg, 31

+ // asr t1, t1, 31

+ // dst = t1

+ Variable *Src0R = legalizeToVar(Src0);

+ Constant *ShiftAmount = Ctx->getConstantInt32(31);

Jim Stichnoth 2015/06/04 22:10:59 Maybe use the same variable name for ShiftAmount a

jvoung (off chromium) 2015/06/04 23:15:14 Done -- picked middle ground of ShiftAmt =)

+ Variable *T = makeReg(Dest->getType());

+ _lsl(T, Src0R, ShiftAmount);

+ _asr(T, T, ShiftAmount);

+ _mov(Dest, T);

+ } else {

+ // t1 = sxt src; dst = t1

+ Variable *Src0R = legalizeToVar(Src0);

+ Variable *T = makeReg(Dest->getType());

+ _sxt(T, Src0R);

+ _mov(Dest, T);

+ }

break;

}

case InstCast::Zext: {

- UnimplementedError(Func->getContext()->getFlags());

+ if (isVectorType(Dest->getType())) {

+ UnimplementedError(Func->getContext()->getFlags());

+ } else if (Dest->getType() == IceType_i64) {

+ // t1=uxtb src; dst.lo=t1; dst.hi=0

+ Constant *Zero = Ctx->getConstantZero(IceType_i32);

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *T_Lo = makeReg(DestLo->getType());

+ // i32 and i1 can just take up the whole register.

+ // i32 doesn't need uxt, while i1 will have an and mask later anyway.

+ if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {

+ Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);

+ _mov(T_Lo, Src0RF);

+ } else {

+ Variable *Src0R = legalizeToVar(Src0);

+ _uxt(T_Lo, Src0R);

+ }

+ if (Src0->getType() == IceType_i1) {

+ Constant *One = Ctx->getConstantInt32(1);

+ _and(T_Lo, T_Lo, One);

+ }

+ _mov(DestLo, T_Lo);

+ Variable *T_Hi = makeReg(DestLo->getType());

+ _mov(T_Hi, Zero);

+ _mov(DestHi, T_Hi);

+ } else if (Src0->getType() == IceType_i1) {

+ // t = Src0; t &= 1; Dest = t

+ Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);

+ Constant *One = Ctx->getConstantInt32(1);

+ Variable *T = makeReg(Dest->getType());

+ // Just use _mov instead of _uxt since all registers are 32-bit.

+ // _uxt requires the source to be a register so could have required

+ // a _mov from legalize anyway.

+ _mov(T, Src0RF);

+ _and(T, T, One);

+ _mov(Dest, T);

+ } else {

+ // t1 = uxt src; dst = t1

+ Variable *Src0R = legalizeToVar(Src0);

+ Variable *T = makeReg(Dest->getType());

+ _uxt(T, Src0R);

+ _mov(Dest, T);

+ }

break;

}

case InstCast::Trunc: {

- UnimplementedError(Func->getContext()->getFlags());

+ if (isVectorType(Dest->getType())) {

+ UnimplementedError(Func->getContext()->getFlags());

+ } else {

+ Operand *Src0 = Inst->getSrc(0);

+ if (Src0->getType() == IceType_i64)

+ Src0 = loOperand(Src0);

+ Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);

+ // t1 = trunc Src0RF; Dest = t1

+ Variable *T = makeReg(Dest->getType());

+ _mov(T, Src0RF);

+ if (Dest->getType() == IceType_i1)

+ _and(T, T, Ctx->getConstantInt1(1));

+ _mov(Dest, T);

+ }

break;

}

case InstCast::Fptrunc:

@@ -1348,6 +1535,12 @@ void TargetARM32::lowerCast(const InstCast *Inst) {

break;

}

case InstCast::Bitcast: {

+ Operand *Src0 = Inst->getSrc(0);

+ if (Dest->getType() == Src0->getType()) {

+ InstAssign *Assign = InstAssign::create(Func, Dest, Src0);

+ lowerAssign(Assign);

+ return;

+ }

UnimplementedError(Func->getContext()->getFlags());

break;

}

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('j') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('J')