Index: src/IceTargetLoweringARM32.cpp |
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp |
index ec6a98b20fbec9cdeeedbb3f590fda8a7b3d2675..14fa072ee0cec5386201c29c44d376d7a7826229 100644 |
--- a/src/IceTargetLoweringARM32.cpp |
+++ b/src/IceTargetLoweringARM32.cpp |
@@ -174,16 +174,19 @@ TargetARM32::TargetARM32(Cfg *Func) |
// TODO: Don't initialize IntegerRegisters and friends every time. |
// Instead, initialize in some sort of static initializer for the |
// class. |
+ // Limit this size (or do all bitsets need to be the same width)??? |
llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); |
- llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM); |
+ llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); |
+ llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); |
llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); |
llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); |
ScratchRegs.resize(RegARM32::Reg_NUM); |
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
- isFP) \ |
+ isFP32, isFP64, isVec128) \ |
IntegerRegisters[RegARM32::val] = isInt; \ |
- FloatRegisters[RegARM32::val] = isFP; \ |
- VectorRegisters[RegARM32::val] = isFP; \ |
+ Float32Registers[RegARM32::val] = isFP32; \ |
+ Float64Registers[RegARM32::val] = isFP64; \ |
+ VectorRegisters[RegARM32::val] = isVec128; \ |
ScratchRegs[RegARM32::val] = scratch; |
REGARM32_TABLE; |
#undef X |
@@ -193,8 +196,8 @@ TargetARM32::TargetARM32(Cfg *Func) |
TypeToRegisterSet[IceType_i16] = IntegerRegisters; |
TypeToRegisterSet[IceType_i32] = IntegerRegisters; |
TypeToRegisterSet[IceType_i64] = IntegerRegisters; |
- TypeToRegisterSet[IceType_f32] = FloatRegisters; |
- TypeToRegisterSet[IceType_f64] = FloatRegisters; |
+ TypeToRegisterSet[IceType_f32] = Float32Registers; |
+ TypeToRegisterSet[IceType_f64] = Float64Registers; |
TypeToRegisterSet[IceType_v4i1] = VectorRegisters; |
TypeToRegisterSet[IceType_v8i1] = VectorRegisters; |
TypeToRegisterSet[IceType_v16i1] = VectorRegisters; |
@@ -363,7 +366,7 @@ IceString TargetARM32::getRegName(SizeT RegNum, Type Ty) const { |
(void)Ty; |
static const char *RegNames[] = { |
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
- isFP) \ |
+ isFP32, isFP64, isVec128) \ |
name, |
REGARM32_TABLE |
#undef X |
@@ -435,9 +438,7 @@ bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
int32_t RegLo, RegHi; |
// Always start i64 registers at an even register, so this may end |
// up padding away a register. |
- if (NumGPRRegsUsed % 2 != 0) { |
- ++NumGPRRegsUsed; |
- } |
+ NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); |
RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; |
++NumGPRRegsUsed; |
RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; |
@@ -459,6 +460,33 @@ bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { |
return true; |
} |
+bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { |
+ if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) |
+ return false; |
+ if (isVectorType(Ty)) { |
+ NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); |
+ *Reg = RegARM32::Reg_q0 + (NumFPRegUnits / 4); |
+ NumFPRegUnits += 4; |
+ // If this bumps us past the boundary, don't allocate to a register |
+ // and leave any previously speculatively consumed registers as consumed. |
+ if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) |
+ return false; |
+ } else if (Ty == IceType_f64) { |
+ NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); |
+ *Reg = RegARM32::Reg_d0 + (NumFPRegUnits / 2); |
+ NumFPRegUnits += 2; |
+ // If this bumps us past the boundary, don't allocate to a register |
+ // and leave any previously speculatively consumed registers as consumed. |
+ if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) |
+ return false; |
+ } else { |
+ assert(Ty == IceType_f32); |
+ *Reg = RegARM32::Reg_s0 + NumFPRegUnits; |
+ ++NumFPRegUnits; |
+ } |
+ return true; |
+} |
+ |
void TargetARM32::lowerArguments() { |
VarList &Args = Func->getArgs(); |
TargetARM32::CallingConv CC; |
@@ -472,14 +500,7 @@ void TargetARM32::lowerArguments() { |
for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
Variable *Arg = Args[I]; |
Type Ty = Arg->getType(); |
- // TODO(jvoung): handle float/vector types. |
- if (isVectorType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- continue; |
- } else if (isFloatingType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- continue; |
- } else if (Ty == IceType_i64) { |
+ if (Ty == IceType_i64) { |
std::pair<int32_t, int32_t> RegPair; |
if (!CC.I64InRegs(&RegPair)) |
continue; |
@@ -503,10 +524,15 @@ void TargetARM32::lowerArguments() { |
Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
continue; |
} else { |
- assert(Ty == IceType_i32); |
int32_t RegNum; |
- if (!CC.I32InReg(&RegNum)) |
- continue; |
+ if (isVectorType(Ty) || isFloatingType(Ty)) { |
+ if (!CC.FPInReg(Ty, &RegNum)) |
+ continue; |
+ } else { |
+ assert(Ty == IceType_i32); |
+ if (!CC.I32InReg(&RegNum)) |
+ continue; |
+ } |
Variable *RegisterArg = Func->makeVariable(Ty); |
if (BuildDefs::dump()) { |
RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
@@ -517,6 +543,7 @@ void TargetARM32::lowerArguments() { |
Args[I] = RegisterArg; |
Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
+ continue; |
} |
} |
} |
@@ -554,7 +581,10 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
Ctx->getConstantInt32(Arg->getStackOffset()))); |
if (isVectorType(Arg->getType())) { |
+ // Use vld1.$elem or something? |
UnimplementedError(Func->getContext()->getFlags()); |
+ } else if (isFloatingType(Arg->getType())) { |
+ _vldr(Arg, Mem); |
} else { |
_ldr(Arg, Mem); |
} |
@@ -725,12 +755,9 @@ void TargetARM32::addProlog(CfgNode *Node) { |
Type Ty = Arg->getType(); |
bool InRegs = false; |
// Skip arguments passed in registers. |
- if (isVectorType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- continue; |
- } else if (isFloatingType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- continue; |
+ if (isVectorType(Ty) || isFloatingType(Ty)) { |
+ int32_t DummyReg; |
+ InRegs = CC.FPInReg(Ty, &DummyReg); |
} else if (Ty == IceType_i64) { |
std::pair<int32_t, int32_t> DummyRegs; |
InRegs = CC.I64InRegs(&DummyRegs); |
@@ -858,6 +885,8 @@ void TargetARM32::addEpilog(CfgNode *Node) { |
bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { |
constexpr bool SignExt = false; |
+ // TODO(jvoung): vldr of FP stack slots has a different limit from the |
+ // plain stackSlotType(). |
return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); |
} |
@@ -1121,7 +1150,7 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, |
llvm::SmallBitVector Registers(RegARM32::Reg_NUM); |
#define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
- isFP) \ |
+ isFP32, isFP64, isVec128) \ |
if (scratch && (Include & RegSet_CallerSave)) \ |
Registers[RegARM32::val] = true; \ |
if (preserved && (Include & RegSet_CalleeSave)) \ |
@@ -1518,6 +1547,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
return; |
} else if (isVectorType(Dest->getType())) { |
UnimplementedError(Func->getContext()->getFlags()); |
+ // Add a fake def to keep liveness consistent in the meantime. |
+ Context.insert(InstFakeDef::create(Func, Dest)); |
return; |
} |
// Dest->getType() is a non-i64 scalar. |
@@ -1553,6 +1584,47 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
H_srem_i32, IsRemainder); |
return; |
} |
+ case InstArithmetic::Frem: { |
+ const SizeT MaxSrcs = 2; |
+ Type Ty = Dest->getType(); |
+ InstCall *Call = makeHelperCall( |
+ isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs); |
+ Call->addArg(Src0R); |
+ Call->addArg(Src1); |
+ lowerCall(Call); |
+ return; |
+ } |
+ } |
+ |
+ // Handle floating point arithmetic separately: they require Src1 to be |
+ // legalized to a register. |
+ switch (Inst->getOp()) { |
+ default: |
+ break; |
+ case InstArithmetic::Fadd: { |
+ Variable *Src1R = legalizeToReg(Src1); |
+ _vadd(T, Src0R, Src1R); |
+ _vmov(Dest, T); |
+ return; |
+ } |
+ case InstArithmetic::Fsub: { |
+ Variable *Src1R = legalizeToReg(Src1); |
+ _vsub(T, Src0R, Src1R); |
+ _vmov(Dest, T); |
+ return; |
+ } |
+ case InstArithmetic::Fmul: { |
+ Variable *Src1R = legalizeToReg(Src1); |
+ _vmul(T, Src0R, Src1R); |
+ _vmov(Dest, T); |
+ return; |
+ } |
+ case InstArithmetic::Fdiv: { |
+ Variable *Src1R = legalizeToReg(Src1); |
+ _vdiv(T, Src0R, Src1R); |
+ _vmov(Dest, T); |
+ return; |
+ } |
} |
Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex); |
@@ -1605,19 +1677,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
llvm_unreachable("Integer div/rem should have been handled earlier."); |
return; |
case InstArithmetic::Fadd: |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
case InstArithmetic::Fsub: |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
case InstArithmetic::Fmul: |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
case InstArithmetic::Fdiv: |
- UnimplementedError(Func->getContext()->getFlags()); |
- return; |
case InstArithmetic::Frem: |
- UnimplementedError(Func->getContext()->getFlags()); |
+ llvm_unreachable("Floating point arith should have been handled earlier."); |
return; |
} |
} |
@@ -1652,6 +1716,9 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) { |
} |
if (isVectorType(Dest->getType())) { |
UnimplementedError(Func->getContext()->getFlags()); |
+ } else if (isFloatingType(Dest->getType())) { |
+ Variable *SrcR = legalizeToReg(NewSrc); |
+ _vmov(Dest, SrcR); |
} else { |
_mov(Dest, NewSrc); |
} |
@@ -1681,6 +1748,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
// Pair of Arg Operand -> GPR number assignments. |
llvm::SmallVector<std::pair<Operand *, int32_t>, |
TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs; |
+ llvm::SmallVector<std::pair<Operand *, int32_t>, |
+ TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs; |
// Pair of Arg Operand -> stack offset. |
llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; |
int32_t ParameterAreaSizeBytes = 0; |
@@ -1691,11 +1760,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
Operand *Arg = legalizeUndef(Instr->getArg(i)); |
Type Ty = Arg->getType(); |
bool InRegs = false; |
- if (isVectorType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- } else if (isFloatingType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
- } else if (Ty == IceType_i64) { |
+ if (Ty == IceType_i64) { |
std::pair<int32_t, int32_t> Regs; |
if (CC.I64InRegs(&Regs)) { |
InRegs = true; |
@@ -1704,6 +1769,12 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
GPRArgs.push_back(std::make_pair(Lo, Regs.first)); |
GPRArgs.push_back(std::make_pair(Hi, Regs.second)); |
} |
+ } else if (isVectorType(Ty) || isFloatingType(Ty)) { |
+ int32_t Reg; |
+ if (CC.FPInReg(Ty, &Reg)) { |
+ InRegs = true; |
+ FPArgs.push_back(std::make_pair(Arg, Reg)); |
+ } |
} else { |
assert(Ty == IceType_i32); |
int32_t Reg; |
@@ -1766,6 +1837,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
// registers after the call. |
Context.insert(InstFakeUse::create(Func, Reg)); |
} |
+ for (auto &FPArg : FPArgs) { |
+ Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); |
+ Context.insert(InstFakeUse::create(Func, Reg)); |
+ } |
// Generate the call instruction. Assign its result to a temporary |
// with high register allocation weight. |
@@ -1791,9 +1866,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1); |
break; |
case IceType_f32: |
+ ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0); |
+ break; |
case IceType_f64: |
- // Use S and D regs. |
- UnimplementedError(Func->getContext()->getFlags()); |
+ ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0); |
break; |
case IceType_v4i1: |
case IceType_v8i1: |
@@ -1802,8 +1878,7 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
case IceType_v8i16: |
case IceType_v4i32: |
case IceType_v4f32: |
- // Use Q regs. |
- UnimplementedError(Func->getContext()->getFlags()); |
+ ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); |
break; |
} |
} |
@@ -1853,12 +1928,11 @@ void TargetARM32::lowerCall(const InstCall *Instr) { |
_mov(DestLo, ReturnReg); |
_mov(DestHi, ReturnRegHi); |
} else { |
- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
- isVectorType(Dest->getType())); |
if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ _vmov(Dest, ReturnReg); |
} else { |
+ assert(isIntegerType(Dest->getType()) && |
+ typeWidthInBytes(Dest->getType()) <= 4); |
_mov(Dest, ReturnReg); |
} |
} |
@@ -2291,6 +2365,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
case Intrinsics::Fabs: { |
+ // Add a fake def to keep liveness consistent in the meantime. |
+ Context.insert(InstFakeDef::create(Func, Instr->getDest())); |
UnimplementedError(Func->getContext()->getFlags()); |
return; |
} |
@@ -2352,7 +2428,11 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
return; |
} |
case Intrinsics::Sqrt: { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Variable *Src = legalizeToReg(Instr->getArg(0)); |
+ Variable *Dest = Instr->getDest(); |
+ Variable *T = makeReg(Dest->getType()); |
+ _vsqrt(T, Src); |
+ _vmov(Dest, T); |
return; |
} |
case Intrinsics::Stacksave: { |
@@ -2440,16 +2520,22 @@ void TargetARM32::lowerRet(const InstRet *Inst) { |
Variable *Reg = nullptr; |
if (Inst->hasRetValue()) { |
Operand *Src0 = Inst->getRetValue(); |
- if (Src0->getType() == IceType_i64) { |
+ Type Ty = Src0->getType(); |
+ if (Ty == IceType_i64) { |
Src0 = legalizeUndef(Src0); |
Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0); |
Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1); |
Reg = R0; |
Context.insert(InstFakeUse::create(Func, R1)); |
- } else if (isScalarFloatingType(Src0->getType())) { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ } else if (Ty == IceType_f32) { |
+ Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0); |
+ Reg = S0; |
+ } else if (Ty == IceType_f64) { |
+ Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); |
+ Reg = D0; |
} else if (isVectorType(Src0->getType())) { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); |
+ Reg = Q0; |
} else { |
Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); |
_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); |
@@ -2596,8 +2682,8 @@ Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { |
Type Ty = Src->getType(); |
Variable *Reg = makeReg(Ty, RegNum); |
- if (isVectorType(Ty)) { |
- UnimplementedError(Func->getContext()->getFlags()); |
+ if (isVectorType(Ty) || isFloatingType(Ty)) { |
+ _vmov(Reg, Src); |
} else { |
// Mov's Src operand can really only be the flexible second operand type |
// or a register. Users should guarantee that. |
@@ -2646,7 +2732,13 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
} |
if (!(Allowed & Legal_Mem)) { |
Variable *Reg = makeReg(Ty, RegNum); |
- _ldr(Reg, Mem); |
+ if (isVectorType(Ty)) { |
+ UnimplementedError(Func->getContext()->getFlags()); |
+ } else if (isFloatingType(Ty)) { |
+ _vldr(Reg, Mem); |
+ } else { |
+ _ldr(Reg, Mem); |
+ } |
From = Reg; |
} else { |
From = Mem; |
@@ -2716,11 +2808,25 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
_movt(Reg, C); |
return Reg; |
} else { |
+ assert(isScalarFloatingType(Ty)); |
// Load floats/doubles from literal pool. |
- UnimplementedError(Func->getContext()->getFlags()); |
- From = copyToReg(From, RegNum); |
+ // TODO(jvoung): Allow certain immediates to be encoded directly in |
+ // an operand. See Table A7-18 of the ARM manual: |
+ // "Floating-point modified immediate constants". |
+ // Or, for 32-bit floating point numbers, just encode the raw bits |
+ // into a movw/movt pair to GPR, and vmov to an SREG, instead of using |
+ // a movw/movt pair to get the const-pool address then loading to SREG. |
+ std::string Buffer; |
+ llvm::raw_string_ostream StrBuf(Buffer); |
+ llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); |
+ llvm::cast<Constant>(From)->setShouldBePooled(true); |
+ Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
+ Variable *BaseReg = makeReg(getPointerType()); |
+ _movw(BaseReg, Offset); |
+ _movt(BaseReg, Offset); |
+ From = formMemoryOperand(BaseReg, Ty); |
+ return copyToReg(From, RegNum); |
} |
- return From; |
} |
if (auto Var = llvm::dyn_cast<Variable>(From)) { |