Index: src/IceTargetLoweringMIPS32.cpp |
diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp |
index 6d6c6b9316f43f8e48f4e759d7493ea7a7eea595..6acbac31bb8d31ac7b5c2949b0fdc14b3d0f12b6 100644 |
--- a/src/IceTargetLoweringMIPS32.cpp |
+++ b/src/IceTargetLoweringMIPS32.cpp |
@@ -90,8 +90,9 @@ constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16; |
// stack alignment required for the given type. |
uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
size_t typeAlignInBytes = typeWidthInBytes(Ty); |
+ // Vectors are stored on stack with the same alignment as that of int type |
if (isVectorType(Ty)) |
- UnimplementedError(getFlags()); |
+ typeAlignInBytes = typeWidthInBytes(IceType_i32); |
return Utils::applyAlignment(Value, typeAlignInBytes); |
} |
@@ -228,19 +229,9 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { |
const InstArithmetic::OpKind Op = |
llvm::cast<InstArithmetic>(Instr)->getOp(); |
if (isVectorType(DestTy)) { |
- switch (Op) { |
- default: |
- break; |
- case InstArithmetic::Fdiv: |
- case InstArithmetic::Frem: |
- case InstArithmetic::Sdiv: |
- case InstArithmetic::Srem: |
- case InstArithmetic::Udiv: |
- case InstArithmetic::Urem: |
- scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); |
- Instr->setDeleted(); |
- return; |
- } |
+ scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); |
+ Instr->setDeleted(); |
+ return; |
} |
switch (DestTy) { |
default: |
@@ -303,7 +294,6 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { |
const Type SrcTy = Src0->getType(); |
auto *CastInstr = llvm::cast<InstCast>(Instr); |
const InstCast::OpKind CastKind = CastInstr->getCastKind(); |
- |
switch (CastKind) { |
default: |
return; |
@@ -424,6 +414,39 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { |
Variable *Dest = Instr->getDest(); |
auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr); |
Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID; |
+ if (Dest && isVectorType(Dest->getType()) && ID == Intrinsics::Fabs) { |
+ Operand *Src0 = IntrinsicCall->getArg(0); |
+ GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32"); |
+ Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat); |
+ GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32"); |
+ bool BadIntrinsic = false; |
+ const Intrinsics::FullIntrinsicInfo *FullInfo = |
+ Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic); |
+ Intrinsics::IntrinsicInfo Info = FullInfo->Info; |
+ |
+ Variable *T = Func->makeVariable(IceType_v4f32); |
+ auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T); |
+ VarVecOn32->initVecElement(Func); |
+ Context.insert<InstFakeDef>(T); |
+ |
+ for (SizeT i = 0; i < VarVecOn32->ElementsPerContainer; ++i) { |
+ auto *Index = Ctx->getConstantInt32(i); |
+ auto *Op = Func->makeVariable(IceType_f32); |
+ Context.insert<InstExtractElement>(Op, Src0, Index); |
+ auto *Res = Func->makeVariable(IceType_f32); |
+ Variable *DestT = Func->makeVariable(IceType_v4f32); |
+ auto *Call = |
+ Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info); |
+ Call->addArg(Op); |
+ Context.insert<InstInsertElement>(DestT, T, Res, Index); |
+ T = DestT; |
+ } |
+ |
+ Context.insert<InstAssign>(Dest, T); |
+ |
+ Instr->setDeleted(); |
+ return; |
+ } |
switch (ID) { |
default: |
return; |
@@ -788,8 +811,17 @@ Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) { |
// overestimated. If the constant being lowered is a 64 bit value, |
// then the result should be split and the lo and hi components will |
// need to go in uninitialized registers. |
- if (isVectorType(Ty)) |
- UnimplementedError(getFlags()); |
+ if (isVectorType(Ty)) { |
+ Variable *Var = makeReg(Ty, RegNum); |
+ auto *Reg = llvm::cast<VariableVecOn32>(Var); |
+ Reg->initVecElement(Func); |
+ auto *Zero = getZero(); |
+ Context.insert<InstFakeDef>(Zero); |
+ for (Variable *Var : Reg->getContainers()) { |
+ _mov(Var, Zero); |
+ } |
+ return Reg; |
+ } |
return Ctx->getConstantZero(Ty); |
} |
return From; |
@@ -859,7 +891,7 @@ TargetMIPS32::CallingConv::CallingConv() |
// number to make register allocation decisions. |
bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo, |
RegNumT *Reg) { |
- if (isScalarIntegerType(Ty)) |
+ if (isScalarIntegerType(Ty) || isVectorType(Ty)) |
return argInGPR(Ty, Reg); |
if (isScalarFloatingType(Ty)) { |
if (ArgNo == 0) { |
@@ -884,6 +916,13 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
UnimplementedError(getFlags()); |
return false; |
} break; |
+ case IceType_v4i1: |
+ case IceType_v8i1: |
+ case IceType_v16i1: |
+ case IceType_v16i8: |
+ case IceType_v8i16: |
+ case IceType_v4i32: |
+ case IceType_v4f32: |
case IceType_i32: |
case IceType_f32: { |
Source = &GPRArgs; |
@@ -896,6 +935,12 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
discardUnavailableGPRsAndTheirAliases(Source); |
+ // If $4 is used for any scalar type (or returining v4f32) then the next |
+ // vector type if passed in $6:$7:stack:stack |
+ if (isVectorType(Ty)) { |
+ alignGPR(Source); |
+ } |
+ |
if (Source->empty()) { |
GPRegsUsed.set(); |
return false; |
@@ -907,6 +952,21 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
// Source->back() is marked as unavailable, and it is thus implicitly popped |
// from the stack. |
GPRegsUsed |= RegisterAliases[*Reg]; |
+ |
+ // All vector arguments irrespective of their base type are passed in GP |
+ // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd |
+ // is passed in $6:$7:stack:stack. If it is 1st argument then discard |
+ // $4:$5:$6:$7 otherwise discard $6:$7 only. |
+ if (isVectorType(Ty)) { |
+ if (((unsigned)*Reg) == RegMIPS32::Reg_A0) { |
+ GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1]; |
+ GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2]; |
+ GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3]; |
+ } else { |
+ GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3]; |
+ } |
+ } |
+ |
return true; |
} |
@@ -997,11 +1057,32 @@ void TargetMIPS32::lowerArguments() { |
Context.init(Func->getEntryNode()); |
Context.setInsertPoint(Context.getCur()); |
- for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
- Variable *Arg = Args[I]; |
+ // v4f32 is returned through stack. $4 is setup by the caller and passed as |
+ // first argument implicitly. Callee then copies the return vector at $4. |
+ if (isVectorFloatingType(Func->getReturnType())) { |
+ Variable *ImplicitRetVec = Func->makeVariable(IceType_i32); |
+ ImplicitRetVec->setName(Func, "ImplicitRet_v4f32"); |
+ ImplicitRetVec->setIsArg(); |
+ Args.insert(Args.begin(), ImplicitRetVec); |
+ setImplicitRet(ImplicitRetVec); |
+ Context.insert<InstFakeDef>(ImplicitRetVec); |
+ for (CfgNode *Node : Func->getNodes()) { |
+ for (Inst &Instr : Node->getInsts()) { |
+ if (llvm::isa<InstRet>(&Instr)) { |
+ Context.setInsertPoint(Instr); |
+ Context.insert<InstFakeUse>(ImplicitRetVec); |
+ break; |
+ } |
+ } |
+ } |
+ Context.setInsertPoint(Context.getCur()); |
+ } |
+ |
+ for (SizeT i = 0, E = Args.size(); i < E; ++i) { |
+ Variable *Arg = Args[i]; |
Type Ty = Arg->getType(); |
RegNumT RegNum; |
- if (!CC.argInReg(Ty, I, &RegNum)) { |
+ if (!CC.argInReg(Ty, i, &RegNum)) { |
continue; |
} |
Variable *RegisterArg = Func->makeVariable(Ty); |
@@ -1010,17 +1091,41 @@ void TargetMIPS32::lowerArguments() { |
} |
RegisterArg->setIsArg(); |
Arg->setIsArg(false); |
- Args[I] = RegisterArg; |
- switch (Ty) { |
- default: { RegisterArg->setRegNum(RegNum); } break; |
- case IceType_i64: { |
- auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); |
- RegisterArg64->initHiLo(Func); |
- RegisterArg64->getLo()->setRegNum( |
- RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum))); |
- RegisterArg64->getHi()->setRegNum( |
- RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum))); |
- } break; |
+ Args[i] = RegisterArg; |
+ |
+ if (isVectorType(Ty)) { |
+ auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg); |
+ RegisterArgVec->initVecElement(Func); |
+ RegisterArgVec->getContainers()[0]->setRegNum( |
+ RegNumT::fixme((unsigned)RegNum + 0)); |
+ RegisterArgVec->getContainers()[1]->setRegNum( |
+ RegNumT::fixme((unsigned)RegNum + 1)); |
+ // First two elements of second vector argument are passed |
+ // in $6:$7 and remaining two on stack. Do not assign register |
+ // to this is second vector argument. |
+ if (i == 0) { |
+ RegisterArgVec->getContainers()[2]->setRegNum( |
+ RegNumT::fixme((unsigned)RegNum + 2)); |
+ RegisterArgVec->getContainers()[3]->setRegNum( |
+ RegNumT::fixme((unsigned)RegNum + 3)); |
+ } else { |
+ RegisterArgVec->getContainers()[2]->setRegNum( |
+ RegNumT::fixme(RegNumT())); |
+ RegisterArgVec->getContainers()[3]->setRegNum( |
+ RegNumT::fixme(RegNumT())); |
+ } |
+ } else { |
+ switch (Ty) { |
+ default: { RegisterArg->setRegNum(RegNum); } break; |
+ case IceType_i64: { |
+ auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); |
+ RegisterArg64->initHiLo(Func); |
+ RegisterArg64->getLo()->setRegNum( |
+ RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum))); |
+ RegisterArg64->getHi()->setRegNum( |
+ RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum))); |
+ } break; |
+ } |
} |
Context.insert<InstAssign>(Arg, RegisterArg); |
} |
@@ -1036,20 +1141,46 @@ Type TargetMIPS32::stackSlotType() { return IceType_i32; } |
// recursively on the components, taking care to handle Lo first because of the |
// little-endian architecture. Lastly, this function generates an instruction |
// to copy Arg into its assigned register if applicable. |
-void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
+void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack, |
+ Variable *FramePtr, |
size_t BasicFrameOffset, |
size_t *InArgsSizeBytes) { |
const Type Ty = Arg->getType(); |
*InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); |
+ // If $4 is used for any scalar type (or returining v4f32) then the next |
+ // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element |
+ // from agument stack. |
+ if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) { |
+ if (PartialOnStack == false) { |
+ auto *Elem0 = ArgVecOn32->getContainers()[0]; |
+ auto *Elem1 = ArgVecOn32->getContainers()[1]; |
+ finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset, |
+ InArgsSizeBytes); |
+ finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset, |
+ InArgsSizeBytes); |
+ } |
+ auto *Elem2 = ArgVecOn32->getContainers()[2]; |
+ auto *Elem3 = ArgVecOn32->getContainers()[3]; |
+ finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset, |
+ InArgsSizeBytes); |
+ finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset, |
+ InArgsSizeBytes); |
+ return; |
+ } |
+ |
if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
Variable *const Lo = Arg64On32->getLo(); |
Variable *const Hi = Arg64On32->getHi(); |
- finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
- finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
+ finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset, |
+ InArgsSizeBytes); |
+ finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset, |
+ InArgsSizeBytes); |
return; |
} |
+ |
assert(Ty != IceType_i64); |
+ assert(!isVectorType(Ty)); |
const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; |
*InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
@@ -1262,13 +1393,25 @@ void TargetMIPS32::addProlog(CfgNode *Node) { |
for (Variable *Arg : Args) { |
RegNumT DummyReg; |
const Type Ty = Arg->getType(); |
+ bool PartialOnStack; |
// Skip arguments passed in registers. |
if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
- ArgNo++; |
- continue; |
+ // Load argument from stack: |
+ // 1. If this is first vector argument and return type is v4f32. |
+ // In this case $4 is used to pass stack address implicitly. |
+ // 3rd and 4th element of vector argument is passed through stack. |
+ // 2. If this is second vector argument. |
+ if (ArgNo != 0 && isVectorType(Ty)) { |
+ PartialOnStack = true; |
+ finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes, |
+ &InArgsSizeBytes); |
+ } |
} else { |
- finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes); |
+ PartialOnStack = false; |
+ finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes, |
+ &InArgsSizeBytes); |
} |
+ ++ArgNo; |
} |
// Fill in stack offsets for locals. |
@@ -1571,6 +1714,42 @@ Operand *TargetMIPS32::loOperand(Operand *Operand) { |
return nullptr; |
} |
+Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType, |
+ uint32_t Index) { |
+ if (!isVectorType(Operand->getType())) { |
+ llvm::report_fatal_error("getOperandAtIndex: Operand is not vector"); |
+ return nullptr; |
+ } |
+ |
+ if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { |
+ assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset); |
+ Variable *Base = Mem->getBase(); |
+ auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset()); |
+ assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); |
+ int32_t NextOffsetVal = |
+ Offset->getValue() + (Index * typeWidthInBytes(BaseType)); |
+ constexpr bool NoSignExt = false; |
+ if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) { |
+ Constant *_4 = Ctx->getConstantInt32(4); |
+ Variable *NewBase = Func->makeVariable(Base->getType()); |
+ lowerArithmetic( |
+ InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4)); |
+ Base = NewBase; |
+ } else { |
+ Offset = |
+ llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); |
+ } |
+ return OperandMIPS32Mem::create(Func, BaseType, Base, Offset, |
+ Mem->getAddrMode()); |
+ } |
+ |
+ if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand)) |
+ return VarVecOn32->getContainers()[Index]; |
+ |
+ llvm_unreachable("Unsupported operand type"); |
+ return nullptr; |
+} |
+ |
Operand *TargetMIPS32::hiOperand(Operand *Operand) { |
assert(Operand->getType() == IceType_i64); |
if (Operand->getType() != IceType_i64) |
@@ -2005,25 +2184,33 @@ void TargetMIPS32::lowerAssign(const InstAssign *Instr) { |
_mov(DestLo, T_Lo); |
_mov(T_Hi, Src0Hi); |
_mov(DestHi, T_Hi); |
- } else { |
- Operand *SrcR; |
- if (Dest->hasReg()) { |
- // If Dest already has a physical register, then legalize the Src operand |
- // into a Variable with the same register assignment. This especially |
- // helps allow the use of Flex operands. |
- SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum()); |
- } else { |
- // Dest could be a stack operand. Since we could potentially need |
- // to do a Store (and store can only have Register operands), |
- // legalize this to a register. |
- SrcR = legalize(Src0, Legal_Reg); |
- } |
- if (isVectorType(Dest->getType())) { |
- UnimplementedLoweringError(this, Instr); |
- } else { |
- _mov(Dest, SrcR); |
+ return; |
+ } |
+ if (isVectorType(Dest->getType())) { |
+ auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest); |
+ for (SizeT i = 0; i < DstVec->ElementsPerContainer; ++i) { |
+ auto *DCont = DstVec->getContainers()[i]; |
+ auto *SCont = |
+ legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg); |
+ auto *TReg = makeReg(IceType_i32); |
+ _mov(TReg, SCont); |
+ _mov(DCont, TReg); |
} |
+ return; |
} |
+ Operand *SrcR; |
+ if (Dest->hasReg()) { |
+ // If Dest already has a physical register, then legalize the Src operand |
+ // into a Variable with the same register assignment. This especially |
+ // helps allow the use of Flex operands. |
+ SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum()); |
+ } else { |
+ // Dest could be a stack operand. Since we could potentially need |
+ // to do a Store (and store can only have Register operands), |
+ // legalize this to a register. |
+ SrcR = legalize(Src0, Legal_Reg); |
+ } |
+ _mov(Dest, SrcR); |
} |
void TargetMIPS32::lowerBr(const InstBr *Instr) { |
@@ -2112,6 +2299,7 @@ void TargetMIPS32::lowerBr(const InstBr *Instr) { |
} |
void TargetMIPS32::lowerCall(const InstCall *Instr) { |
+ CfgVector<Variable *> RegArgs; |
NeedsStackAlignment = true; |
// Assign arguments to registers and stack. Also reserve stack. |
@@ -2127,6 +2315,22 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
// Classify each argument operand according to the location where the |
// argument is passed. |
+ // v4f32 is returned through stack. $4 is setup by the caller and passed as |
+ // first argument implicitly. Callee then copies the return vector at $4. |
+ SizeT ArgNum = 0; |
+ Variable *Dest = Instr->getDest(); |
+ Variable *RetVecFloat = nullptr; |
+ if (Dest && isVectorFloatingType(Dest->getType())) { |
+ ArgNum = 1; |
+ CC.discardReg(RegMIPS32::Reg_A0); |
+ RetVecFloat = Func->makeVariable(IceType_i32); |
+ auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16); |
+ constexpr SizeT Alignment = 4; |
+ lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment)); |
+ RegArgs.emplace_back( |
+ legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0))); |
+ } |
+ |
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
Operand *Arg = legalizeUndef(Instr->getArg(i)); |
const Type Ty = Arg->getType(); |
@@ -2136,14 +2340,52 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
InReg = CC.argInReg(Ty, i, &Reg); |
if (!InReg) { |
- ParameterAreaSizeBytes = |
- applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
- StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
- ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); |
+ if (isVectorType(Ty)) { |
+ auto *ArgVec = llvm::cast<VariableVecOn32>(Arg); |
+ for (Variable *Elem : ArgVec->getContainers()) { |
+ ParameterAreaSizeBytes = |
+ applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32); |
+ StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes)); |
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); |
+ } |
+ } else { |
+ ParameterAreaSizeBytes = |
+ applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
+ StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); |
+ } |
+ ++ArgNum; |
continue; |
} |
- if (Ty == IceType_i64) { |
+ if (isVectorType(Ty)) { |
+ auto *ArgVec = llvm::cast<VariableVecOn32>(Arg); |
+ Operand *Elem0 = ArgVec->getContainers()[0]; |
+ Operand *Elem1 = ArgVec->getContainers()[1]; |
+ GPRArgs.push_back( |
+ std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0))); |
+ GPRArgs.push_back( |
+ std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1))); |
+ Operand *Elem2 = ArgVec->getContainers()[2]; |
+ Operand *Elem3 = ArgVec->getContainers()[3]; |
+ // First argument is passed in $4:$5:$6:$7 |
+ // Second and rest arguments are passed in $6:$7:stack:stack |
+ if (ArgNum == 0) { |
+ GPRArgs.push_back( |
+ std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2))); |
+ GPRArgs.push_back( |
+ std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3))); |
+ } else { |
+ ParameterAreaSizeBytes = |
+ applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32); |
+ StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes)); |
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); |
+ ParameterAreaSizeBytes = |
+ applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32); |
+ StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes)); |
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); |
+ } |
+ } else if (Ty == IceType_i64) { |
Operand *Lo = loOperand(Arg); |
Operand *Hi = hiOperand(Arg); |
GPRArgs.push_back( |
@@ -2155,6 +2397,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
} else { |
FPArgs.push_back(std::make_pair(Arg, Reg)); |
} |
+ ArgNum++; |
Jim Stichnoth
2016/10/03 14:52:22
++ArgNum
|
} |
// Adjust the parameter area so that the stack is aligned. It is assumed that |
@@ -2183,7 +2426,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
// Generate the call instruction. Assign its result to a temporary with high |
// register allocation weight. |
- Variable *Dest = Instr->getDest(); |
+ |
// ReturnReg doubles as ReturnRegLo as necessary. |
Variable *ReturnReg = nullptr; |
Variable *ReturnRegHi = nullptr; |
@@ -2215,10 +2458,19 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
case IceType_v16i1: |
case IceType_v16i8: |
case IceType_v8i16: |
- case IceType_v4i32: |
+ case IceType_v4i32: { |
+ ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0); |
+ auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg); |
+ RetVec->initVecElement(Func); |
+ for (SizeT i = 0; i < RetVec->ElementsPerContainer; ++i) { |
+ auto *Var = RetVec->getContainers()[i]; |
+ Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i)); |
+ } |
+ break; |
+ } |
case IceType_v4f32: |
- UnimplementedLoweringError(this, Instr); |
- return; |
+ ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0); |
+ break; |
} |
} |
Operand *CallTarget = Instr->getCallTarget(); |
@@ -2230,7 +2482,6 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
} |
// Copy arguments to be passed in registers to the appropriate registers. |
- CfgVector<Variable *> RegArgs; |
for (auto &FPArg : FPArgs) { |
RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); |
} |
@@ -2251,7 +2502,16 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
if (VariableAllocaUsed) |
_addiu(SP, SP, -MaxOutArgsSizeBytes); |
- Inst *NewCall = InstMIPS32Call::create(Func, ReturnReg, CallTarget); |
+ Inst *NewCall; |
+ |
+ // We don't need to define the return register if it is a vector. |
+ // We have inserted fake defs of it just after the call. |
+ if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) { |
+ Variable *RetReg = nullptr; |
+ NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget); |
+ } else { |
+ NewCall = InstMIPS32Call::create(Func, ReturnReg, CallTarget); |
+ } |
Context.insert(NewCall); |
if (VariableAllocaUsed) |
@@ -2263,18 +2523,49 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
if (ReturnRegHi) |
Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
+ |
+ if (ReturnReg) { |
+ if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { |
+ for (Variable *Var : RetVec->getContainers()) { |
+ Context.insert(InstFakeDef::create(Func, Var)); |
+ } |
+ } |
+ } |
+ |
// Insert a register-kill pseudo instruction. |
Context.insert(InstFakeKill::create(Func, NewCall)); |
+ |
// Generate a FakeUse to keep the call live if necessary. |
if (Instr->hasSideEffects() && ReturnReg) { |
- Context.insert<InstFakeUse>(ReturnReg); |
+ if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { |
+ for (Variable *Var : RetVec->getContainers()) { |
+ Context.insert<InstFakeUse>(Var); |
+ } |
+ } else { |
+ Context.insert<InstFakeUse>(ReturnReg); |
+ } |
} |
+ |
if (Dest == nullptr) |
return; |
// Assign the result of the call to Dest. |
if (ReturnReg) { |
- if (ReturnRegHi) { |
+ if (RetVecFloat) { |
+ auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest); |
+ for (SizeT i = 0; i < DestVecOn32->ElementsPerContainer; ++i) { |
+ auto *Var = DestVecOn32->getContainers()[i]; |
+ OperandMIPS32Mem *Mem = OperandMIPS32Mem::create( |
+ Func, IceType_i32, RetVecFloat, |
+ llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4))); |
+ _lw(Var, Mem); |
+ } |
+ } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { |
+ auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest); |
+ for (SizeT i = 0; i < DestVecOn32->ElementsPerContainer; ++i) { |
+ _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]); |
+ } |
+ } else if (ReturnRegHi) { |
assert(Dest->getType() == IceType_i64); |
auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
Variable *DestLo = Dest64On32->getLo(); |
@@ -2286,12 +2577,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
isScalarFloatingType(Dest->getType()) || |
isVectorType(Dest->getType())); |
- if (isVectorType(Dest->getType())) { |
- UnimplementedLoweringError(this, Instr); |
- return; |
- } else { |
- _mov(Dest, ReturnReg); |
- } |
+ _mov(Dest, ReturnReg); |
} |
} |
} |
@@ -2453,7 +2739,65 @@ void TargetMIPS32::lowerCast(const InstCast *Instr) { |
} |
void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) { |
- UnimplementedLoweringError(this, Instr); |
+ Variable *Dest = Instr->getDest(); |
+ const Type DestTy = Dest->getType(); |
+ Operand *Src1 = Instr->getSrc(1); |
+ if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
+ const uint32_t Index = Imm->getValue(); |
+ Variable *TDest = makeReg(DestTy); |
+ Variable *TReg = makeReg(DestTy); |
+ auto *Src0 = legalizeUndef(Instr->getSrc(0)); |
+ auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0); |
+ // Number of elements in each container |
+ uint32_t ElemPerCont = |
+ typeNumElements(Src0->getType()) / Src0R->ElementsPerContainer; |
+ auto *SrcE = Src0R->getContainers()[Index / ElemPerCont]; |
+ // Position of the element in the container |
+ uint32_t PosInCont = Index % ElemPerCont; |
+ if (ElemPerCont == 1) { |
+ _mov(TDest, SrcE); |
+ } else if (ElemPerCont == 2) { |
+ switch (PosInCont) { |
+ case 0: |
+ _andi(TDest, SrcE, 0xffff); |
+ break; |
+ case 1: |
+ _srl(TDest, SrcE, 16); |
+ break; |
+ default: |
+ llvm::report_fatal_error("ExtractElement: Invalid PosInCont"); |
+ break; |
+ } |
+ } else if (ElemPerCont == 4) { |
+ switch (PosInCont) { |
+ case 0: |
+ _andi(TDest, SrcE, 0xff); |
+ break; |
+ case 1: |
+ _srl(TReg, SrcE, 8); |
+ _andi(TDest, TReg, 0xff); |
+ break; |
+ case 2: |
+ _srl(TReg, SrcE, 16); |
+ _andi(TDest, TReg, 0xff); |
+ break; |
+ case 3: |
+ _srl(TDest, SrcE, 24); |
+ break; |
+ default: |
+ llvm::report_fatal_error("ExtractElement: Invalid PosInCont"); |
+ break; |
+ } |
+ } |
+ if (typeElementType(Src0R->getType()) == IceType_i1) { |
+ _andi(TReg, TDest, 0x1); |
+ _mov(Dest, TReg); |
+ } else { |
+ _mov(Dest, TDest); |
+ } |
+ return; |
+ } |
+ llvm::report_fatal_error("ExtractElement requires a constant index"); |
} |
void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) { |
@@ -2765,7 +3109,111 @@ void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) { |
} |
void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) { |
- UnimplementedLoweringError(this, Instr); |
+ Variable *Dest = Instr->getDest(); |
+ const Type DestTy = Dest->getType(); |
+ Operand *Src2 = Instr->getSrc(2); |
+ if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) { |
+ const uint32_t Index = Imm->getValue(); |
+ // Vector to insert in |
+ auto *Src0 = Instr->getSrc(0); |
+ auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0); |
+ // Number of elements in each container |
+ uint32_t ElemPerCont = |
+ typeNumElements(Src0->getType()) / Src0R->ElementsPerContainer; |
+ // Source Element |
+ auto *SrcE = Src0R->getContainers()[Index / ElemPerCont]; |
+ Context.insert<InstFakeDef>(SrcE); |
+ // Dest is a vector |
+ auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest); |
+ VDest->initVecElement(Func); |
+ // Temp vector variable |
+ auto *TDest = makeReg(DestTy); |
+ auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest); |
+ TVDest->initVecElement(Func); |
+ // Destination element |
+ auto *DstE = TVDest->getContainers()[Index / ElemPerCont]; |
+ // Element to insert |
+ auto *Src1R = legalizeToReg(Instr->getSrc(1)); |
+ auto *TReg1 = makeReg(Src1R->getType()); |
+ auto *TReg2 = makeReg(Src1R->getType()); |
+ auto *TReg3 = makeReg(Src1R->getType()); |
+ auto *TReg4 = makeReg(Src1R->getType()); |
+ auto *TReg5 = makeReg(Src1R->getType()); |
+ // Position of the element in the container |
+ uint32_t PosInCont = Index % ElemPerCont; |
+ // Load source vector in a temporary vector |
+ for (SizeT i = 0; i < TVDest->ElementsPerContainer; ++i) { |
+ auto *DCont = TVDest->getContainers()[i]; |
+ // Do not define DstE as we are going to redefine it |
+ if (DCont == DstE) |
+ continue; |
+ auto *SCont = Src0R->getContainers()[i]; |
+ auto *TReg = makeReg(IceType_i32); |
+ _mov(TReg, SCont); |
+ _mov(DCont, TReg); |
+ } |
+ // Insert the element |
+ if (ElemPerCont == 1) { |
+ _mov(DstE, Src1R); |
+ } else if (ElemPerCont == 2) { |
+ switch (PosInCont) { |
+ case 0: |
+ _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source |
+ _srl(TReg2, SrcE, 16); |
+ _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element |
+ _or(DstE, TReg1, TReg3); |
+ break; |
+ case 1: |
+ _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source |
+ _sll(TReg2, SrcE, 16); |
+ _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element |
+ _or(DstE, TReg1, TReg3); |
+ break; |
+ default: |
+ llvm::report_fatal_error("InsertElement: Invalid PosInCont"); |
+ break; |
+ } |
+ } else if (ElemPerCont == 4) { |
+ switch (PosInCont) { |
+ case 0: |
+ _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source |
+ _srl(TReg2, SrcE, 8); |
+ _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element |
+ _or(DstE, TReg1, TReg3); |
+ break; |
+ case 1: |
+ _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source |
+ _sll(TReg5, TReg1, 8); // Position in the destination |
+ _lui(TReg2, Ctx->getConstantInt32(0xffff)); |
+ _ori(TReg3, TReg2, 0x00ff); |
+ _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element |
+ _or(DstE, TReg5, TReg4); |
+ break; |
+ case 2: |
+ _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source |
+ _sll(TReg5, TReg1, 16); // Position in the destination |
+ _lui(TReg2, Ctx->getConstantInt32(0xff00)); |
+ _ori(TReg3, TReg2, 0xffff); |
+ _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element |
+ _or(DstE, TReg5, TReg4); |
+ break; |
+ case 3: |
+ _srl(TReg1, Src1R, 24); // Position in the destination |
+ _sll(TReg2, SrcE, 8); |
+ _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element |
+ _or(DstE, TReg1, TReg3); |
+ break; |
+ default: |
+ llvm::report_fatal_error("InsertElement: Invalid PosInCont"); |
+ break; |
+ } |
+ } |
+ // Write back temporary vector to the destination |
+ auto *Assign = InstAssign::create(Func, Dest, TDest); |
+ lowerAssign(Assign); |
+ return; |
+ } |
+ llvm::report_fatal_error("InsertElement requires a constant index"); |
} |
void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
@@ -3201,8 +3649,48 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) { |
Context.insert<InstFakeUse>(R1); |
break; |
} |
+ case IceType_v4i1: |
+ case IceType_v8i1: |
+ case IceType_v16i1: |
+ case IceType_v16i8: |
+ case IceType_v8i16: |
+ case IceType_v4i32: { |
+ auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(Src0); |
+ Variable *V0 = |
+ legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0); |
+ Variable *V1 = |
+ legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1); |
+ Variable *A0 = |
+ legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0); |
+ Variable *A1 = |
+ legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1); |
+ Reg = V0; |
+ Context.insert<InstFakeUse>(V1); |
+ Context.insert<InstFakeUse>(A0); |
+ Context.insert<InstFakeUse>(A1); |
+ break; |
+ } |
+ case IceType_v4f32: { |
+ auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(Src0); |
+ Reg = getImplicitRet(); |
+ auto *RegT = legalizeToReg(Reg); |
+ // Return the vector through buffer in implicit argument a0 |
+ for (SizeT i = 0; i < SrcVec->ElementsPerContainer; ++i) { |
+ OperandMIPS32Mem *Mem = OperandMIPS32Mem::create( |
+ Func, IceType_f32, RegT, |
+ llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4))); |
+ Variable *Var = legalizeToReg(SrcVec->getContainers()[i]); |
+ _sw(Var, Mem); |
+ } |
+ Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0); |
+ _mov(V0, Reg); // move v0,a0 |
+ Context.insert<InstFakeUse>(Reg); |
+ Context.insert<InstFakeUse>(V0); |
+ break; |
+ } |
default: |
- UnimplementedLoweringError(this, Instr); |
+ llvm::report_fatal_error("Ret: Invalid type."); |
+ break; |
} |
} |
_ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg); |
@@ -3265,6 +3753,14 @@ void TargetMIPS32::lowerStore(const InstStore *Instr) { |
Variable *ValueLo = legalizeToReg(loOperand(Value)); |
_sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr))); |
_sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr))); |
+ } else if (isVectorType(Value->getType())) { |
+ auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value); |
+ for (SizeT i = 0; i < DataVec->ElementsPerContainer; ++i) { |
+ auto *DCont = legalizeToReg(DataVec->getContainers()[i]); |
+ auto *MCont = llvm::cast<OperandMIPS32Mem>( |
+ getOperandAtIndex(NewAddr, IceType_i32, i)); |
+ _sw(DCont, MCont); |
+ } |
} else { |
Variable *ValueR = legalizeToReg(Value); |
_sw(ValueR, NewAddr); |
@@ -3496,7 +3992,7 @@ Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) { |
Type Ty = Src->getType(); |
Variable *Reg = makeReg(Ty, RegNum); |
if (isVectorType(Ty)) { |
- UnimplementedError(getFlags()); |
+ llvm::report_fatal_error("Invalid copy from vector type."); |
} else { |
if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) { |
_lw(Reg, Mem); |
@@ -3568,6 +4064,11 @@ Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed, |
} |
if (llvm::isa<Constant>(From)) { |
+ if (llvm::isa<ConstantUndef>(From)) { |
+ From = legalizeUndef(From, RegNum); |
+ if (isVectorType(Ty)) |
+ return From; |
+ } |
if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
(void)C; |
// TODO(reed kotler): complete this case for proper implementation |
@@ -3576,23 +4077,15 @@ Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed, |
return Reg; |
} else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { |
const uint32_t Value = C32->getValue(); |
- // Check if the immediate will fit in a Flexible second operand, |
- // if a Flexible second operand is allowed. We need to know the exact |
- // value, so that rules out relocatable constants. |
- // Also try the inverse and use MVN if possible. |
- // Do a movw/movt to a register. |
- Variable *Reg; |
- if (RegNum.hasValue()) |
- Reg = getPhysicalRegister(RegNum); |
- else |
- Reg = makeReg(Ty, RegNum); |
+ // Use addiu if the immediate is a 16bit value. Otherwise load it |
+ // using a lui-ori instructions. |
+ Variable *Reg = makeReg(Ty, RegNum); |
if (isInt<16>(int32_t(Value))) { |
Variable *Zero = getPhysicalRegister(RegMIPS32::Reg_ZERO, Ty); |
Context.insert<InstFakeDef>(Zero); |
_addiu(Reg, Zero, Value); |
} else { |
uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
- (void)UpperBits; |
uint32_t LowerBits = Value & 0xFFFF; |
Variable *TReg = makeReg(Ty, RegNum); |
if (LowerBits) { |