Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index 73eb77c0f841b873fb542cbee4b895a3c82353d7..d6384aeec6a78d372a9ad12e729861bf9b751899 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -40,6 +40,85 @@ void UnimplementedError(const ClFlags &Flags) { |
| } |
| } |
| +// The following table summarizes the logic for lowering the icmp instruction |
| +// for i32 and narrower types. Each icmp condition has a clear mapping to an |
| +// ARM32 conditional move instruction. |
| + |
| +const struct TableIcmp32_ { |
| + CondARM32::Cond Mapping; |
| +} TableIcmp32[] = { |
| +#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ |
| + { CondARM32::C_32 } \ |
| + , |
| + ICMPARM32_TABLE |
| +#undef X |
| +}; |
| +const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); |
| + |
| +// The following table summarizes the logic for lowering the icmp instruction |
| +// for the i64 type. Two conditional moves are needed for setting to 1 or 0. |
| +// The operands may need to be swapped, and there is a slight difference |
| +// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). |
| +const struct TableIcmp64_ { |
| + bool IsSigned; |
| + bool Swapped; |
| + CondARM32::Cond C1, C2; |
| +} TableIcmp64[] = { |
| +#define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ |
| + { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ |
| + , |
| + ICMPARM32_TABLE |
| +#undef X |
| +}; |
| +const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); |
| + |
| +CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| + size_t Index = static_cast<size_t>(Cond); |
| + assert(Index < TableIcmp32Size); |
| + return TableIcmp32[Index].Mapping; |
| +} |
| + |
| +// In some cases, there are x-macros tables for both high-level and |
| +// low-level instructions/operands that use the same enum key value. |
| +// The tables are kept separate to maintain a proper separation |
| +// between abstraction layers. There is a risk that the tables could |
| +// get out of sync if enum values are reordered or if entries are |
| +// added or deleted. The following dummy namespaces use |
| +// static_asserts to ensure everything is kept in sync. |
| + |
| +// Validate the enum values in ICMPARM32_TABLE. |
| +namespace dummy1 { |
| +// Define a temporary set of enum values based on low-level table |
| +// entries. |
| +enum _tmp_enum { |
| +#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, |
| + ICMPARM32_TABLE |
| +#undef X |
| + _num |
| +}; |
| +// Define a set of constants based on high-level table entries. |
| +#define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
| +ICEINSTICMP_TABLE |
| +#undef X |
| +// Define a set of constants based on low-level table entries, and |
| +// ensure the table entry keys are consistent. |
| +#define X(val, signed, swapped64, C_32, C1_64, C2_64) \ |
| + static const int _table2_##val = _tmp_##val; \ |
| + static_assert( \ |
| + _table1_##val == _table2_##val, \ |
| + "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); |
| +ICMPARM32_TABLE |
| +#undef X |
| +// Repeat the static asserts with respect to the high-level table |
| +// entries in case the high-level table has extra entries. |
| +#define X(tag, str) \ |
| + static_assert( \ |
| + _table1_##tag == _table2_##tag, \ |
| + "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); |
| +ICEINSTICMP_TABLE |
| +#undef X |
| +} // end of namespace dummy1 |
| + |
| // The maximum number of arguments to pass in GPR registers. |
| const uint32_t ARM32_MAX_GPR_ARG = 4; |
| @@ -218,9 +297,9 @@ void TargetARM32::translateOm1() { |
| } |
| bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| - (void)I; |
| - (void)NextNode; |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { |
| + return Br->optimizeBranch(NextNode); |
| + } |
| return false; |
| } |
| @@ -750,13 +829,109 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
| } |
| void TargetARM32::lowerBr(const InstBr *Inst) { |
| - (void)Inst; |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + if (Inst->isUnconditional()) { |
| + _br(Inst->getTargetUnconditional()); |
| + return; |
| + } |
| + Operand *Cond = Inst->getCondition(); |
| + // TODO(jvoung): Handle folding opportunities. |
| + |
| + Variable *Src0R = legalizeToVar(Cond); |
| + Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| + _cmp(Src0R, Zero); |
| + _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| } |
| -void TargetARM32::lowerCall(const InstCall *Inst) { |
| - (void)Inst; |
| - UnimplementedError(Func->getContext()->getFlags()); |
| +void TargetARM32::lowerCall(const InstCall *Instr) { |
| + // TODO(jvoung): assign arguments to registers and stack. Also reserve stack. |
| + if (Instr->getNumArgs()) { |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + } |
| + |
| + // Generate the call instruction. Assign its result to a temporary |
| + // with high register allocation weight. |
| + Variable *Dest = Instr->getDest(); |
| + // ReturnReg doubles as ReturnRegLo as necessary. |
| + Variable *ReturnReg = nullptr; |
| + Variable *ReturnRegHi = nullptr; |
| + if (Dest) { |
| + switch (Dest->getType()) { |
| + case IceType_NUM: |
| + llvm_unreachable("Invalid Call dest type"); |
| + break; |
| + case IceType_void: |
| + break; |
| + case IceType_i1: |
| + case IceType_i8: |
| + case IceType_i16: |
| + case IceType_i32: |
| + ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0); |
| + break; |
| + case IceType_i64: |
| + ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0); |
| + ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); |
| + break; |
| + case IceType_f32: |
| + case IceType_f64: |
| + // Use S and D regs. |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + break; |
| + case IceType_v4i1: |
| + case IceType_v8i1: |
| + case IceType_v16i1: |
| + case IceType_v16i8: |
| + case IceType_v8i16: |
| + case IceType_v4i32: |
| + case IceType_v4f32: |
| + // Use Q regs. |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + break; |
| + } |
| + } |
| + Operand *CallTarget = Instr->getCallTarget(); |
| + // Allow ConstantRelocatable to be left alone as a direct call, |
| + // but force other constants like ConstantInteger32 to be in |
| + // a register and make it an indirect call. |
| + if (!llvm::isa<ConstantRelocatable>(CallTarget)) { |
| + CallTarget = legalize(CallTarget, Legal_Reg); |
| + } |
| + Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); |
| + Context.insert(NewCall); |
| + if (ReturnRegHi) |
| + Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| + |
| + // Insert a register-kill pseudo instruction. |
| + Context.insert(InstFakeKill::create(Func, NewCall)); |
| + |
| + // Generate a FakeUse to keep the call live if necessary. |
| + if (Instr->hasSideEffects() && ReturnReg) { |
| + Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| + Context.insert(FakeUse); |
| + } |
| + |
| + if (!Dest) |
| + return; |
| + |
| + // Assign the result of the call to Dest. |
| + if (ReturnReg) { |
| + if (ReturnRegHi) { |
| + assert(Dest->getType() == IceType_i64); |
| + split64(Dest); |
| + Variable *DestLo = Dest->getLo(); |
| + Variable *DestHi = Dest->getHi(); |
| + _mov(DestLo, ReturnReg); |
| + _mov(DestHi, ReturnRegHi); |
| + } else { |
| + assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
| + Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| + isVectorType(Dest->getType())); |
| + if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + } else { |
| + _mov(Dest, ReturnReg); |
| + } |
| + } |
| + } |
| } |
| void TargetARM32::lowerCast(const InstCast *Inst) { |
| @@ -815,8 +990,132 @@ void TargetARM32::lowerFcmp(const InstFcmp *Inst) { |
| } |
| void TargetARM32::lowerIcmp(const InstIcmp *Inst) { |
| - (void)Inst; |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + Variable *Dest = Inst->getDest(); |
| + Operand *Src0 = Inst->getSrc(0); |
| + Operand *Src1 = Inst->getSrc(1); |
| + |
| + if (isVectorType(Dest->getType())) { |
| + UnimplementedError(Func->getContext()->getFlags()); |
| + return; |
| + } |
| + |
| + // a=icmp cond, b, c ==> |
| + // GCC does: |
| + // cmp b.hi, c.hi or cmp b.lo, c.lo |
| + // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
| + // mov.<C1> t, #1 mov.<C1> t, #1 |
| + // mov.<C2> t, #0 mov.<C2> t, #0 |
| + // mov a, t mov a, t |
| + // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
| + // is used for signed compares. In some cases, b and c need to be swapped |
| + // as well. |
| + // |
| + // LLVM does: |
| + // for EQ and NE: |
| + // eor t1, b.hi, c.hi |
| + // eor t2, b.lo, c.hi |
| + // orrs t, t1, t2 |
| + // mov.<C> t, #1 |
| + // mov a, t |
| + // |
| + // that's nice in that it's just as short but has fewer dependencies |
| + // for better ILP at the cost of more registers. |
| + // |
| + // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with |
| + // two unconditional mov #0, two cmps, two conditional mov #1, |
| + // and one conditonal reg mov. That has few dependencies for good ILP, |
| + // but is a longer sequence. |
| + // |
| + // So, we are going with the GCC version since it's usually better (except |
| + // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
| + Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| + Constant *One = Ctx->getConstantInt32(1); |
| + if (Src0->getType() == IceType_i64) { |
| + InstIcmp::ICond Conditon = Inst->getCondition(); |
| + size_t Index = static_cast<size_t>(Conditon); |
| + assert(Index < TableIcmp64Size); |
| + Variable *Src0Lo, *Src0Hi; |
| + Operand *Src1LoRF, *Src1HiRF; |
| + if (TableIcmp64[Index].Swapped) { |
| + Src0Lo = legalizeToVar(loOperand(Src1)); |
| + Src0Hi = legalizeToVar(hiOperand(Src1)); |
| + Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
| + Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
| + } else { |
| + Src0Lo = legalizeToVar(loOperand(Src0)); |
| + Src0Hi = legalizeToVar(hiOperand(Src0)); |
| + Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
| + Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
| + } |
| + Variable *T = makeReg(IceType_i32); |
| + if (TableIcmp64[Index].IsSigned) { |
| + Variable *ScratchReg = makeReg(IceType_i32); |
| + _cmp(Src0Lo, Src1LoRF); |
| + _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
| + } else { |
| + _cmp(Src0Hi, Src1HiRF); |
| + _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
| + } |
| + _mov(T, One, TableIcmp64[Index].C1); |
| + _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); |
| + _mov(Dest, T); |
| + return; |
| + } |
| + |
| + // a=icmp cond b, c ==> |
| + // GCC does: |
| + // <u/s>xtb tb, b |
| + // <u/s>xtb tc, c |
| + // cmp tb, tc |
| + // mov.C1 t, #0 |
| + // mov.C2 t, #1 |
| + // mov a, t |
| + // where the unsigned/sign extension is not needed for 32-bit. |
| + // They also have special cases for EQ and NE. E.g., for NE: |
| + // <extend to tb, tc> |
| + // subs t, tb, tc |
| + // movne t, #1 |
| + // mov a, t |
| + // |
| + // LLVM does: |
| + // lsl tb, b, #<N> |
| + // mov t, #0 |
| + // cmp tb, c, lsl #<N> |
| + // mov.<C> t, #1 |
| + // mov a, t |
| + // |
| + // the left shift is by 0, 16, or 24, which allows the comparison to focus |
| + // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned). |
| + // For the unsigned case, for some reason it does similar to GCC and does |
| + // a uxtb first. It's not clear to me why that special-casing is needed. |
|
jvoung (off chromium)
2015/05/21 22:52:03
I omitted the uxtb -- haven't run any cross tests
|
| + // |
| + // We'll go with the LLVM way for now, since it's shorter and has just as |
| + // few dependencies. |
| + int32_t ShiftAmount = 32 - getScalarIntBitWidth(Src0->getType()); |
| + assert(ShiftAmount >= 0); |
| + Constant *ShiftConst = nullptr; |
| + Variable *Src0R = nullptr; |
| + Variable *T = makeReg(IceType_i32); |
| + if (ShiftAmount) { |
| + ShiftConst = Ctx->getConstantInt32(ShiftAmount); |
| + Src0R = makeReg(IceType_i32); |
| + _lsl(Src0R, legalizeToVar(Src0), ShiftConst); |
| + } else { |
| + Src0R = legalizeToVar(Src0); |
| + } |
| + _mov(T, Zero); |
| + if (ShiftAmount) { |
| + Variable *Src1R = legalizeToVar(Src1); |
| + OperandARM32FlexReg *Src1RShifted = OperandARM32FlexReg::create( |
| + Func, IceType_i32, Src1R, OperandARM32::LSL, ShiftConst); |
| + _cmp(Src0R, Src1RShifted); |
| + } else { |
| + Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
| + _cmp(Src0R, Src1RF); |
| + } |
| + _mov_nonkillable(T, One, getIcmp32Mapping(Inst->getCondition())); |
| + _mov(Dest, T); |
| + return; |
| } |
| void TargetARM32::lowerInsertElement(const InstInsertElement *Inst) { |
| @@ -986,7 +1285,7 @@ void TargetARM32::lowerRet(const InstRet *Inst) { |
| UnimplementedError(Func->getContext()->getFlags()); |
| } else { |
| Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); |
| - _mov(Reg, Src0F, RegARM32::Reg_r0); |
| + _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); |
| } |
| } |
| // Add a ret instruction even if sandboxing is enabled, because |