Chromium Code Reviews| Index: src/IceTargetLoweringARM32.cpp |
| diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
| index b1081b3f1ef80e1f278788880ba81e7743f5d1e7..80cad94643ebabe6751099d5de5215734c8dfba0 100644 |
| --- a/src/IceTargetLoweringARM32.cpp |
| +++ b/src/IceTargetLoweringARM32.cpp |
| @@ -141,21 +141,34 @@ uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
| return Utils::applyAlignment(Value, typeAlignInBytes); |
| } |
| +// Conservatively check if at compile time, we know that the operand is |
| +// definitely non-zero. |
| +bool operandMustNotBeZero(const Operand *Op) { |
|
Jim Stichnoth
2015/06/30 14:14:09
May want to tweak the function name to express the
jvoung (off chromium)
2015/06/30 16:58:08
Done.
|
| + if (auto *Const = llvm::dyn_cast<ConstantInteger32>(Op)) { |
| + return Const->getValue() != 0; |
| + } |
| + return false; |
| +} |
| + |
| } // end of anonymous namespace |
| -TargetARM32::TargetARM32(Cfg *Func) : TargetLowering(Func) { |
| +TargetARM32Features::TargetARM32Features(const ClFlags &Flags) { |
| static_assert( |
| (ARM32InstructionSet::End - ARM32InstructionSet::Begin) == |
| (TargetInstructionSet::ARM32InstructionSet_End - |
| TargetInstructionSet::ARM32InstructionSet_Begin), |
| "ARM32InstructionSet range different from TargetInstructionSet"); |
| - if (Func->getContext()->getFlags().getTargetInstructionSet() != |
| + if (Flags.getTargetInstructionSet() != |
| TargetInstructionSet::BaseInstructionSet) { |
| InstructionSet = static_cast<ARM32InstructionSet>( |
| - (Func->getContext()->getFlags().getTargetInstructionSet() - |
| + (Flags.getTargetInstructionSet() - |
| TargetInstructionSet::ARM32InstructionSet_Begin) + |
| ARM32InstructionSet::Begin); |
| } |
| +} |
| + |
| +TargetARM32::TargetARM32(Cfg *Func) |
| + : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { |
| // TODO: Don't initialize IntegerRegisters and friends every time. |
| // Instead, initialize in some sort of static initializer for the |
| // class. |
| @@ -1009,6 +1022,87 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { |
| _mov(Dest, SP); |
| } |
| +InstARM32Label *TargetARM32::beginDiv0Check(Type Ty, Operand *SrcLo, |
| + Operand *SrcHi) { |
| + bool ElideCheck = operandMustNotBeZero(SrcLo); |
|
Jim Stichnoth
2015/06/30 14:14:09
What do you think about starting the function like
jvoung (off chromium)
2015/06/30 16:58:08
Done.
|
| + if (Ty == IceType_i64) { |
| + ElideCheck = ElideCheck || operandMustNotBeZero(SrcHi); |
| + } |
| + if (ElideCheck) |
| + return nullptr; |
| + InstARM32Label *Label = InstARM32Label::create(Func, this); |
| + Variable *SrcLoReg = legalizeToVar(SrcLo); |
| + switch (Ty) { |
| + default: |
| + llvm_unreachable("Unexpected type"); |
| + case IceType_i8: { |
| + Operand *Mask = |
| + legalize(Ctx->getConstantInt32(0xFF), Legal_Reg | Legal_Flex); |
| + _tst(SrcLoReg, Mask); |
| + break; |
| + } |
| + case IceType_i16: { |
| + Operand *Mask = |
| + legalize(Ctx->getConstantInt32(0xFFFF), Legal_Reg | Legal_Flex); |
| + _tst(SrcLoReg, Mask); |
| + break; |
| + } |
| + case IceType_i32: { |
| + _tst(SrcLoReg, SrcLoReg); |
| + break; |
| + } |
| + case IceType_i64: { |
| + Variable *ScratchReg = makeReg(IceType_i32); |
| + _orrs(ScratchReg, SrcLoReg, SrcHi); |
| + // ScratchReg isn't going to be used, but we need the |
| + // side-effect of setting flags from this operation. |
| + Context.insert(InstFakeUse::create(Func, ScratchReg)); |
| + } |
| + } |
| + _br(Label, CondARM32::EQ); |
| + return Label; |
| +} |
| + |
| +void TargetARM32::endDiv0Check(InstARM32Label *CheckLabel) { |
| + if (CheckLabel) { |
| + Context.insert(CheckLabel); |
|
jvoung (off chromium)
2015/06/30 16:58:08
Oops, also realized that I need to do the trap, an
Jim Stichnoth
2015/06/30 17:06:51
Heh... I think those cross tests explicitly avoid
|
| + _trap(); |
| + } |
| +} |
| + |
| +void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
| + Operand *Src1, ExtInstr ExtFunc, |
| + DivInstr DivFunc, const char *DivHelperName, |
| + bool IsRemainder) { |
| + InstARM32Label *Label = beginDiv0Check(Dest->getType(), Src1, nullptr); |
| + Variable *Src1R = legalizeToVar(Src1); |
| + Variable *T0R = Src0R; |
| + Variable *T1R = Src1R; |
| + if (Dest->getType() != IceType_i32) { |
| + T0R = makeReg(IceType_i32); |
| + (this->*ExtFunc)(T0R, Src0R, CondARM32::AL); |
| + T1R = makeReg(IceType_i32); |
| + (this->*ExtFunc)(T1R, Src1R, CondARM32::AL); |
| + } |
| + if (hasCPUFeature(TargetARM32Features::HWDivArm)) { |
| + (this->*DivFunc)(T, T0R, T1R, CondARM32::AL); |
| + if (IsRemainder) { |
| + Variable *T2 = makeReg(IceType_i32); |
| + _mls(T2, T, T1R, T0R); |
| + T = T2; |
| + } |
| + _mov(Dest, T); |
| + } else { |
| + const SizeT MaxSrcs = 2; |
| + InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); |
| + Call->addArg(T0R); |
| + Call->addArg(T1R); |
| + lowerCall(Call); |
| + } |
| + endDiv0Check(Label); |
| + return; |
| +} |
| + |
| void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| Variable *Dest = Inst->getDest(); |
| // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier |
| @@ -1182,9 +1276,49 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| case InstArithmetic::Udiv: |
| case InstArithmetic::Sdiv: |
| case InstArithmetic::Urem: |
| - case InstArithmetic::Srem: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - break; |
| + case InstArithmetic::Srem: { |
| + // Check for divide by 0 (ARM normally doesn't trap, but we want it |
| + // to trap for NaCl). |
| + InstARM32Label *Label = nullptr; |
| + // Src1Lo and Src1Hi may have already been legalized to a register, |
| + // which will hide a constant source operand. Instead, check the |
| + // not-yet-legalized Src1 to optimize-out a divide by 0 check. |
| + if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
| + if (C64->getValue() == 0) { |
| + Label = beginDiv0Check(IceType_i64, Src1Lo, Src1Hi); |
| + } |
| + } else { |
| + Label = beginDiv0Check(IceType_i64, Src1Lo, Src1Hi); |
| + } |
| + // Technically, ARM has their own aeabi routines, but we can use the |
| + // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, |
| + // but uses the more standard __moddi3 for rem. |
| + const char *HelperName = ""; |
| + switch (Inst->getOp()) { |
| + case InstArithmetic::Udiv: |
| + HelperName = H_udiv_i64; |
| + break; |
| + case InstArithmetic::Sdiv: |
| + HelperName = H_sdiv_i64; |
| + break; |
| + case InstArithmetic::Urem: |
| + HelperName = H_urem_i64; |
| + break; |
| + case InstArithmetic::Srem: |
| + HelperName = H_srem_i64; |
| + break; |
| + default: |
| + llvm_unreachable("Should have only matched div ops."); |
| + break; |
| + } |
| + const SizeT MaxSrcs = 2; |
|
Jim Stichnoth
2015/06/30 14:14:09
constexpr?
jvoung (off chromium)
2015/06/30 16:58:08
Done.
|
| + InstCall *Call = makeHelperCall(HelperName, Dest, MaxSrcs); |
| + Call->addArg(Inst->getSrc(0)); |
| + Call->addArg(Inst->getSrc(1)); |
| + lowerCall(Call); |
| + endDiv0Check(Label); |
| + return; |
| + } |
| case InstArithmetic::Fadd: |
| case InstArithmetic::Fsub: |
| case InstArithmetic::Fmul: |
| @@ -1197,61 +1331,73 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
| UnimplementedError(Func->getContext()->getFlags()); |
| } else { // Dest->getType() is non-i64 scalar |
| Variable *Src0R = legalizeToVar(Inst->getSrc(0)); |
| - Src1 = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex); |
| + Operand *Src1RF = legalize(Inst->getSrc(1), Legal_Reg | Legal_Flex); |
| Variable *T = makeReg(Dest->getType()); |
| switch (Inst->getOp()) { |
| case InstArithmetic::_num: |
| llvm_unreachable("Unknown arithmetic operator"); |
| break; |
| case InstArithmetic::Add: { |
| - _add(T, Src0R, Src1); |
| + _add(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::And: { |
| - _and(T, Src0R, Src1); |
| + _and(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::Or: { |
| - _orr(T, Src0R, Src1); |
| + _orr(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::Xor: { |
| - _eor(T, Src0R, Src1); |
| + _eor(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::Sub: { |
| - _sub(T, Src0R, Src1); |
| + _sub(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::Mul: { |
| - Variable *Src1R = legalizeToVar(Src1); |
| + Variable *Src1R = legalizeToVar(Src1RF); |
| _mul(T, Src0R, Src1R); |
| _mov(Dest, T); |
| } break; |
| case InstArithmetic::Shl: |
| - _lsl(T, Src0R, Src1); |
| + _lsl(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| break; |
| case InstArithmetic::Lshr: |
| - _lsr(T, Src0R, Src1); |
| + _lsr(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| break; |
| case InstArithmetic::Ashr: |
| - _asr(T, Src0R, Src1); |
| + _asr(T, Src0R, Src1RF); |
| _mov(Dest, T); |
| break; |
| - case InstArithmetic::Udiv: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - break; |
| - case InstArithmetic::Sdiv: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - break; |
| - case InstArithmetic::Urem: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - break; |
| - case InstArithmetic::Srem: |
| - UnimplementedError(Func->getContext()->getFlags()); |
| - break; |
| + case InstArithmetic::Udiv: { |
| + constexpr bool IsRemainder = false; |
| + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, |
| + &TargetARM32::_udiv, H_udiv_i32, IsRemainder); |
| + return; |
| + } |
| + case InstArithmetic::Sdiv: { |
| + constexpr bool IsRemainder = false; |
| + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, |
| + &TargetARM32::_sdiv, H_sdiv_i32, IsRemainder); |
| + return; |
| + } |
| + case InstArithmetic::Urem: { |
| + constexpr bool IsRemainder = true; |
| + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, |
| + &TargetARM32::_udiv, H_urem_i32, IsRemainder); |
| + return; |
| + } |
| + case InstArithmetic::Srem: { |
| + constexpr bool IsRemainder = true; |
| + lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, |
| + &TargetARM32::_sdiv, H_srem_i32, IsRemainder); |
| + return; |
| + } |
| case InstArithmetic::Fadd: |
| UnimplementedError(Func->getContext()->getFlags()); |
| break; |
| @@ -1322,7 +1468,7 @@ void TargetARM32::lowerBr(const InstBr *Inst) { |
| Variable *Src0R = legalizeToVar(Cond); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| _cmp(Src0R, Zero); |
| - _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| + _br(Inst->getTargetTrue(), Inst->getTargetFalse(), CondARM32::NE); |
| } |
| void TargetARM32::lowerCall(const InstCall *Instr) { |
| @@ -2057,7 +2203,7 @@ void TargetARM32::lowerSwitch(const InstSwitch *Inst) { |
| _br(Inst->getLabelDefault()); |
| return; |
| } |
| - |
| + |
| // 32 bit integer |
| Variable *Src0Var = legalizeToVar(Src0); |
| for (SizeT I = 0; I < NumCases; ++I) { |
| @@ -2070,7 +2216,7 @@ void TargetARM32::lowerSwitch(const InstSwitch *Inst) { |
| } |
| void TargetARM32::lowerUnreachable(const InstUnreachable * /*Inst*/) { |
| - UnimplementedError(Func->getContext()->getFlags()); |
| + _trap(); |
| } |
| // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| @@ -2374,7 +2520,7 @@ void TargetDataARM32::lowerConstants() { |
| } |
| TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) |
| - : TargetHeaderLowering(Ctx) {} |
| + : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} |
| void TargetHeaderARM32::lower() { |
| OstreamLocker L(Ctx); |
| @@ -2388,12 +2534,18 @@ void TargetHeaderARM32::lower() { |
| // sub-subsection of the first public subsection of the attributes. |
| Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; |
| // Chromebooks are at least A15, but do A9 for higher compat. |
| - Str << ".cpu cortex-a9\n" |
| - << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" |
| + // For some reason, the LLVM ARM asm parser has the .cpu directive override |
| + // the mattr specified on the commandline. So to test hwdiv, we need to set |
| + // the .cpu directive higher (can't just rely on --mattr=...). |
| + if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
| + Str << ".cpu cortex-a15\n"; |
| + } else { |
| + Str << ".cpu cortex-a9\n"; |
| + } |
| + Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" |
| << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; |
| Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" |
| << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; |
| - // TODO(jvoung): check other CPU features like HW div. |
| Str << ".fpu neon\n" |
| << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n" |
| << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n" |
| @@ -2407,6 +2559,9 @@ void TargetHeaderARM32::lower() { |
| << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n" |
| << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n" |
| << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
| + if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
| + Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
| + } |
| // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| // However, for compatibility with current NaCl LLVM, don't claim that. |
| Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |