Index: src/IceTargetLoweringARM32.cpp |
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
index 6639da855c97822e11cd531bab1a4b637af61442..10fdfe12a1b4a6f7c1b9a6bb334caa0e5d8530bb 100644 |
--- a/src/IceTargetLoweringARM32.cpp |
+++ b/src/IceTargetLoweringARM32.cpp |
@@ -1575,7 +1575,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
// Copy arguments that are passed on the stack to the appropriate |
// stack locations. |
- Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); |
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
for (auto &StackArg : StackArgs) { |
ConstantInteger32 *Loc = |
llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); |
@@ -1662,7 +1662,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
if (ParameterAreaSizeBytes) { |
Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
Legal_Reg | Legal_Flex); |
- Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); |
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
_add(SP, SP, AddAmount); |
} |
@@ -2032,19 +2032,91 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
case Intrinsics::Bswap: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Variable *Dest = Instr->getDest(); |
+ Operand *Val = Instr->getArg(0); |
+ Type Ty = Val->getType(); |
+ if (Ty == IceType_i64) { |
+ Variable *Val_Lo = legalizeToVar(loOperand(Val)); |
+ Variable *Val_Hi = legalizeToVar(hiOperand(Val)); |
+ Variable *T_Lo = makeReg(IceType_i32); |
+ Variable *T_Hi = makeReg(IceType_i32); |
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ _rev(T_Lo, Val_Lo); |
+ _rev(T_Hi, Val_Hi); |
+ _mov(DestLo, T_Hi); |
+ _mov(DestHi, T_Lo); |
+ } else { |
+ assert(Ty == IceType_i32 || Ty == IceType_i16); |
+ Variable *ValR = legalizeToVar(Val); |
+ Variable *T = makeReg(Ty); |
+ _rev(T, ValR); |
+ if (Val->getType() == IceType_i16) { |
+ Operand *Sixteen = |
+ legalize(Ctx->getConstantInt32(16), Legal_Reg | Legal_Flex); |
+ _lsr(T, T, Sixteen); |
+ } |
+ _mov(Dest, T); |
+ } |
return; |
} |
case Intrinsics::Ctpop: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Variable *Dest = Instr->getDest(); |
+ Operand *Val = Instr->getArg(0); |
+ InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
+ ? H_call_ctpop_i32 |
+ : H_call_ctpop_i64, |
+ Dest, 1); |
+ Call->addArg(Val); |
+ lowerCall(Call); |
+ // The popcount helpers always return 32-bit values, while the intrinsic's |
+ // signature matches some 64-bit platform's native instructions and |
+ // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest |
+ // just in case the user doesn't do that in the IR or doesn't toss the bits |
+ // via truncate. |
+ if (Val->getType() == IceType_i64) { |
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Constant *Zero = Ctx->getConstantZero(IceType_i32); |
+ _mov(DestHi, Zero); |
+ } |
return; |
} |
case Intrinsics::Ctlz: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ // The "is zero undef" parameter is ignored and we always return |
+ // a well-defined value. |
+ Operand *Val = Instr->getArg(0); |
+ Variable *ValLoR; |
+ Variable *ValHiR = nullptr; |
+ if (Val->getType() == IceType_i64) { |
+ ValLoR = legalizeToVar(loOperand(Val)); |
+ ValHiR = legalizeToVar(hiOperand(Val)); |
+ } else { |
+ ValLoR = legalizeToVar(Val); |
+ } |
+ lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
return; |
} |
case Intrinsics::Cttz: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ // Essentially like Clz, but reverse the bits first. |
+ Operand *Val = Instr->getArg(0); |
+ Variable *ValLoR; |
+ Variable *ValHiR = nullptr; |
+ if (Val->getType() == IceType_i64) { |
+ ValLoR = legalizeToVar(loOperand(Val)); |
+ ValHiR = legalizeToVar(hiOperand(Val)); |
+ Variable *TLo = makeReg(IceType_i32); |
+ Variable *THi = makeReg(IceType_i32); |
+ _rbit(TLo, ValLoR); |
+ _rbit(THi, ValHiR); |
+ ValLoR = THi; |
+ ValHiR = TLo; |
+ } else { |
+ ValLoR = legalizeToVar(Val); |
+ Variable *T = makeReg(IceType_i32); |
+ _rbit(T, ValLoR); |
+ ValLoR = T; |
+ } |
+ lowerCLZ(Instr->getDest(), ValLoR, ValHiR); |
return; |
} |
case Intrinsics::Fabs: { |
@@ -2077,13 +2149,15 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
case Intrinsics::Memset: { |
- // The value operand needs to be extended to a stack slot size |
- // because the PNaCl ABI requires arguments to be at least 32 bits |
- // wide. |
+ // The value operand needs to be extended to a stack slot size because the |
+ // PNaCl ABI requires arguments to be at least 32 bits wide. |
Operand *ValOp = Instr->getArg(1); |
assert(ValOp->getType() == IceType_i8); |
Variable *ValExt = Func->makeVariable(stackSlotType()); |
lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); |
+ // Technically, ARM has their own __aeabi_memset, but we can use plain |
+ // memset too. The value and size argument need to be flipped if we ever |
+ // decide to use __aeabi_memset. |
InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); |
Call->addArg(Instr->getArg(0)); |
Call->addArg(ValExt); |
@@ -2111,15 +2185,19 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
case Intrinsics::Stacksave: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
+ Variable *Dest = Instr->getDest(); |
+ _mov(Dest, SP); |
return; |
} |
case Intrinsics::Stackrestore: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
+ Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); |
+ _mov_nonkillable(SP, Val); |
return; |
} |
case Intrinsics::Trap: |
- UnimplementedError(Func->getContext()->getFlags()); |
+ _trap(); |
return; |
case Intrinsics::UnknownIntrinsic: |
Func->setError("Should not be lowering UnknownIntrinsic"); |
@@ -2128,6 +2206,34 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
+void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) { |
+ Type Ty = Dest->getType(); |
+ assert(Ty == IceType_i32 || Ty == IceType_i64); |
+ Variable *T = makeReg(IceType_i32); |
+ _clz(T, ValLoR); |
+ if (Ty == IceType_i64) { |
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
+ Operand *Zero = |
+ legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
+ Operand *ThirtyTwo = |
+ legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
+ _cmp(ValHiR, Zero); |
+ Variable *T2 = makeReg(IceType_i32); |
+ _add(T2, T, ThirtyTwo); |
+ _clz(T2, ValHiR, CondARM32::NE); |
+ // T2 is actually a source as well when the predicate is not AL |
+ // (since it may leave T2 alone). We use set_dest_nonkillable to |
+ // prolong the liveness of T2 as if it was used as a source. |
+ _set_dest_nonkillable(); |
+ _mov(DestLo, T2); |
+ _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
+ return; |
+ } |
+ _mov(Dest, T); |
+ return; |
+} |
+ |
void TargetARM32::lowerLoad(const InstLoad *Load) { |
// A Load instruction can be treated the same as an Assign |
// instruction, after the source operand is transformed into an |
@@ -2186,7 +2292,7 @@ void TargetARM32::lowerRet(const InstRet *Inst) { |
// eliminated. TODO: Are there more places where the fake use |
// should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
// have a ret instruction. |
- Variable *SP = Func->getTarget()->getPhysicalRegister(RegARM32::Reg_sp); |
+ Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
Context.insert(InstFakeUse::create(Func, SP)); |
} |