| Index: src/IceTargetLoweringMIPS32.cpp
|
| diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp
|
| index 6d6c6b9316f43f8e48f4e759d7493ea7a7eea595..daf7f97556f609a9cecf2fad14e6020003c5ead1 100644
|
| --- a/src/IceTargetLoweringMIPS32.cpp
|
| +++ b/src/IceTargetLoweringMIPS32.cpp
|
| @@ -90,8 +90,9 @@ constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
|
| // stack alignment required for the given type.
|
| uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
|
| size_t typeAlignInBytes = typeWidthInBytes(Ty);
|
| + // Vectors are stored on stack with the same alignment as that of int type
|
| if (isVectorType(Ty))
|
| - UnimplementedError(getFlags());
|
| + typeAlignInBytes = typeWidthInBytes(IceType_i32);
|
| return Utils::applyAlignment(Value, typeAlignInBytes);
|
| }
|
|
|
| @@ -228,19 +229,9 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
|
| const InstArithmetic::OpKind Op =
|
| llvm::cast<InstArithmetic>(Instr)->getOp();
|
| if (isVectorType(DestTy)) {
|
| - switch (Op) {
|
| - default:
|
| - break;
|
| - case InstArithmetic::Fdiv:
|
| - case InstArithmetic::Frem:
|
| - case InstArithmetic::Sdiv:
|
| - case InstArithmetic::Srem:
|
| - case InstArithmetic::Udiv:
|
| - case InstArithmetic::Urem:
|
| - scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
|
| - Instr->setDeleted();
|
| - return;
|
| - }
|
| + scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
|
| + Instr->setDeleted();
|
| + return;
|
| }
|
| switch (DestTy) {
|
| default:
|
| @@ -303,7 +294,6 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
|
| const Type SrcTy = Src0->getType();
|
| auto *CastInstr = llvm::cast<InstCast>(Instr);
|
| const InstCast::OpKind CastKind = CastInstr->getCastKind();
|
| -
|
| switch (CastKind) {
|
| default:
|
| return;
|
| @@ -424,6 +414,39 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
|
| Variable *Dest = Instr->getDest();
|
| auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
|
| Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
|
| + if (Dest && isVectorType(Dest->getType()) && ID == Intrinsics::Fabs) {
|
| + Operand *Src0 = IntrinsicCall->getArg(0);
|
| + GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32");
|
| + Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat);
|
| + GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32");
|
| + bool BadIntrinsic = false;
|
| + const Intrinsics::FullIntrinsicInfo *FullInfo =
|
| + Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic);
|
| + Intrinsics::IntrinsicInfo Info = FullInfo->Info;
|
| +
|
| + Variable *T = Func->makeVariable(IceType_v4f32);
|
| + auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T);
|
| + VarVecOn32->initVecElement(Func, IceType_v4f32);
|
| + Context.insert<InstFakeDef>(T);
|
| +
|
| + for (SizeT I = 0; I < 4; ++I) {
|
| + auto *Index = Ctx->getConstantInt32(I);
|
| + auto *Op = Func->makeVariable(IceType_f32);
|
| + Context.insert<InstExtractElement>(Op, Src0, Index);
|
| + auto *Res = Func->makeVariable(IceType_f32);
|
| + Variable *DestT = Func->makeVariable(IceType_v4f32);
|
| + auto *Call =
|
| + Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info);
|
| + Call->addArg(Op);
|
| + Context.insert<InstInsertElement>(DestT, T, Res, Index);
|
| + T = DestT;
|
| + }
|
| +
|
| + Context.insert<InstAssign>(Dest, T);
|
| +
|
| + Instr->setDeleted();
|
| + return;
|
| + }
|
| switch (ID) {
|
| default:
|
| return;
|
| @@ -788,8 +811,17 @@ Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
|
| // overestimated. If the constant being lowered is a 64 bit value,
|
| // then the result should be split and the lo and hi components will
|
| // need to go in uninitialized registers.
|
| - if (isVectorType(Ty))
|
| - UnimplementedError(getFlags());
|
| + if (isVectorType(Ty)) {
|
| + Variable *Var = makeReg(Ty, RegNum);
|
| + auto *Reg = llvm::cast<VariableVecOn32>(Var);
|
| + Reg->initVecElement(Func, Ty);
|
| + auto *Zero = getZero();
|
| + Context.insert<InstFakeDef>(Zero);
|
| + for (SizeT I = 0; I < Reg->getNumContainers(); I++) {
|
| + _mov(Reg->getContainerAtIndex(I), Zero);
|
| + }
|
| + return Reg;
|
| + }
|
| return Ctx->getConstantZero(Ty);
|
| }
|
| return From;
|
| @@ -859,7 +891,7 @@ TargetMIPS32::CallingConv::CallingConv()
|
| // number to make register allocation decisions.
|
| bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
|
| RegNumT *Reg) {
|
| - if (isScalarIntegerType(Ty))
|
| + if (isScalarIntegerType(Ty) || isVectorType(Ty))
|
| return argInGPR(Ty, Reg);
|
| if (isScalarFloatingType(Ty)) {
|
| if (ArgNo == 0) {
|
| @@ -884,6 +916,13 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
|
| UnimplementedError(getFlags());
|
| return false;
|
| } break;
|
| + case IceType_v4i1:
|
| + case IceType_v8i1:
|
| + case IceType_v16i1:
|
| + case IceType_v16i8:
|
| + case IceType_v8i16:
|
| + case IceType_v4i32:
|
| + case IceType_v4f32:
|
| case IceType_i32:
|
| case IceType_f32: {
|
| Source = &GPRArgs;
|
| @@ -896,6 +935,12 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
|
|
|
| discardUnavailableGPRsAndTheirAliases(Source);
|
|
|
| + // If $4 is used for any scalar type (or returining v4f32) then the next
|
| + // vector type if passed in $6:$7:stack:stack
|
| + if (isVectorType(Ty)) {
|
| + alignGPR(Source);
|
| + }
|
| +
|
| if (Source->empty()) {
|
| GPRegsUsed.set();
|
| return false;
|
| @@ -907,6 +952,21 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
|
| // Source->back() is marked as unavailable, and it is thus implicitly popped
|
| // from the stack.
|
| GPRegsUsed |= RegisterAliases[*Reg];
|
| +
|
| + // All vector arguments irrespective of their base type are passed in GP
|
| + // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
|
| + // is passed in $6:$7:stack:stack. If it is 1st argument then discard
|
| + // $4:$5:$6:$7 otherwise discard $6:$7 only.
|
| + if (isVectorType(Ty)) {
|
| + if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
|
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
|
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
|
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
|
| + } else {
|
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
|
| + }
|
| + }
|
| +
|
| return true;
|
| }
|
|
|
| @@ -997,6 +1057,27 @@ void TargetMIPS32::lowerArguments() {
|
| Context.init(Func->getEntryNode());
|
| Context.setInsertPoint(Context.getCur());
|
|
|
| + // v4f32 is returned through stack. $4 is setup by the caller and passed as
|
| + // first argument implicitly. Callee then copies the return vector at $4.
|
| + if (isVectorFloatingType(Func->getReturnType())) {
|
| + Variable *ImplicitRetVec = Func->makeVariable(IceType_i32);
|
| + ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
|
| + ImplicitRetVec->setIsArg();
|
| + Args.insert(Args.begin(), ImplicitRetVec);
|
| + Func->setImplicitRet(ImplicitRetVec);
|
| + Context.insert<InstFakeDef>(ImplicitRetVec);
|
| + for (CfgNode *Node : Func->getNodes()) {
|
| + for (Inst &Instr : Node->getInsts()) {
|
| + if (llvm::isa<InstRet>(&Instr)) {
|
| + Context.setInsertPoint(Instr);
|
| + Context.insert<InstFakeUse>(ImplicitRetVec);
|
| + break;
|
| + }
|
| + }
|
| + }
|
| + Context.setInsertPoint(Context.getCur());
|
| + }
|
| +
|
| for (SizeT I = 0, E = Args.size(); I < E; ++I) {
|
| Variable *Arg = Args[I];
|
| Type Ty = Arg->getType();
|
| @@ -1011,16 +1092,40 @@ void TargetMIPS32::lowerArguments() {
|
| RegisterArg->setIsArg();
|
| Arg->setIsArg(false);
|
| Args[I] = RegisterArg;
|
| - switch (Ty) {
|
| - default: { RegisterArg->setRegNum(RegNum); } break;
|
| - case IceType_i64: {
|
| - auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
|
| - RegisterArg64->initHiLo(Func);
|
| - RegisterArg64->getLo()->setRegNum(
|
| - RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
|
| - RegisterArg64->getHi()->setRegNum(
|
| - RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
|
| - } break;
|
| +
|
| + if (isVectorType(Ty)) {
|
| + auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
|
| + RegisterArgVec->initVecElement(Func, Ty);
|
| + RegisterArgVec->getContainerAtIndex(0)
|
| + ->setRegNum(RegNumT::fixme((unsigned)RegNum + 0));
|
| + RegisterArgVec->getContainerAtIndex(1)
|
| + ->setRegNum(RegNumT::fixme((unsigned)RegNum + 1));
|
| + // First two elements of second vector argument are passed
|
| + // in $6:$7 and remaining two on stack. Do not assign register
|
| + // to this is second vector argument.
|
| + if (I == 0) {
|
| + RegisterArgVec->getContainerAtIndex(2)
|
| + ->setRegNum(RegNumT::fixme((unsigned)RegNum + 2));
|
| + RegisterArgVec->getContainerAtIndex(3)
|
| + ->setRegNum(RegNumT::fixme((unsigned)RegNum + 3));
|
| + } else {
|
| + RegisterArgVec->getContainerAtIndex(2)
|
| + ->setRegNum(RegNumT::fixme(RegNumT()));
|
| + RegisterArgVec->getContainerAtIndex(3)
|
| + ->setRegNum(RegNumT::fixme(RegNumT()));
|
| + }
|
| + } else {
|
| + switch (Ty) {
|
| + default: { RegisterArg->setRegNum(RegNum); } break;
|
| + case IceType_i64: {
|
| + auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
|
| + RegisterArg64->initHiLo(Func);
|
| + RegisterArg64->getLo()->setRegNum(
|
| + RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
|
| + RegisterArg64->getHi()->setRegNum(
|
| + RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
|
| + } break;
|
| + }
|
| }
|
| Context.insert<InstAssign>(Arg, RegisterArg);
|
| }
|
| @@ -1036,20 +1141,46 @@ Type TargetMIPS32::stackSlotType() { return IceType_i32; }
|
| // recursively on the components, taking care to handle Lo first because of the
|
| // little-endian architecture. Lastly, this function generates an instruction
|
| // to copy Arg into its assigned register if applicable.
|
| -void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
|
| +void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
|
| + Variable *FramePtr,
|
| size_t BasicFrameOffset,
|
| size_t *InArgsSizeBytes) {
|
| const Type Ty = Arg->getType();
|
| *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
|
|
|
| + // If $4 is used for any scalar type (or returining v4f32) then the next
|
| + // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
|
| + // from agument stack.
|
| + if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
|
| + if (PartialOnStack == false) {
|
| + auto *Elem0 = ArgVecOn32->getContainerAtIndex(0);
|
| + auto *Elem1 = ArgVecOn32->getContainerAtIndex(1);
|
| + finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
|
| + InArgsSizeBytes);
|
| + finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
|
| + InArgsSizeBytes);
|
| + }
|
| + auto *Elem2 = ArgVecOn32->getContainerAtIndex(2);
|
| + auto *Elem3 = ArgVecOn32->getContainerAtIndex(3);
|
| + finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
|
| + InArgsSizeBytes);
|
| + finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
|
| + InArgsSizeBytes);
|
| + return;
|
| + }
|
| +
|
| if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
|
| Variable *const Lo = Arg64On32->getLo();
|
| Variable *const Hi = Arg64On32->getHi();
|
| - finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
|
| - finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
|
| + finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
|
| + InArgsSizeBytes);
|
| + finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
|
| + InArgsSizeBytes);
|
| return;
|
| }
|
| +
|
| assert(Ty != IceType_i64);
|
| + assert(!isVectorType(Ty));
|
|
|
| const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
|
| *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
|
| @@ -1262,13 +1393,25 @@ void TargetMIPS32::addProlog(CfgNode *Node) {
|
| for (Variable *Arg : Args) {
|
| RegNumT DummyReg;
|
| const Type Ty = Arg->getType();
|
| + bool PartialOnStack;
|
| // Skip arguments passed in registers.
|
| if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
|
| - ArgNo++;
|
| - continue;
|
| + // Load argument from stack:
|
| + // 1. If this is first vector argument and return type is v4f32.
|
| + // In this case $4 is used to pass stack address implicitly.
|
| + // 3rd and 4th element of vector argument is passed through stack.
|
| + // 2. If this is second vector argument.
|
| + if (ArgNo != 0 && isVectorType(Ty)) {
|
| + PartialOnStack = true;
|
| + finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
|
| + &InArgsSizeBytes);
|
| + }
|
| } else {
|
| - finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes);
|
| + PartialOnStack = false;
|
| + finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
|
| + &InArgsSizeBytes);
|
| }
|
| + ArgNo++;
|
| }
|
|
|
| // Fill in stack offsets for locals.
|
| @@ -1571,6 +1714,47 @@ Operand *TargetMIPS32::loOperand(Operand *Operand) {
|
| return nullptr;
|
| }
|
|
|
| +Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
|
| + uint32_t Index) {
|
| + if (!isVectorType(Operand->getType())) {
|
| + llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
|
| + return nullptr;
|
| + }
|
| +
|
| + if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
|
| + llvm::report_fatal_error("getOperandAtIndex: Operand is 64-bit constant");
|
| + return nullptr;
|
| + }
|
| +
|
| + if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
|
| + assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
|
| + Variable *Base = Mem->getBase();
|
| + auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
|
| + assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
|
| + int32_t NextOffsetVal =
|
| + Offset->getValue() + (Index * typeWidthInBytes(BaseType));
|
| + constexpr bool SignExt = false;
|
| + if (!OperandMIPS32Mem::canHoldOffset(BaseType, SignExt, NextOffsetVal)) {
|
| + Constant *Four = Ctx->getConstantInt32(4);
|
| + Variable *NewBase = Func->makeVariable(Base->getType());
|
| + lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
|
| + Base, Four));
|
| + Base = NewBase;
|
| + } else {
|
| + Offset =
|
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
|
| + }
|
| + return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
|
| + Mem->getAddrMode());
|
| + }
|
| +
|
| + if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
|
| + return VarVecOn32->getContainerAtIndex(Index);
|
| +
|
| + llvm_unreachable("Unsupported operand type");
|
| + return nullptr;
|
| +}
|
| +
|
| Operand *TargetMIPS32::hiOperand(Operand *Operand) {
|
| assert(Operand->getType() == IceType_i64);
|
| if (Operand->getType() != IceType_i64)
|
| @@ -2005,6 +2189,16 @@ void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
|
| _mov(DestLo, T_Lo);
|
| _mov(T_Hi, Src0Hi);
|
| _mov(DestHi, T_Hi);
|
| + } else if (isVectorType(Dest->getType())) {
|
| + auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
|
| + for (size_t I = 0; I < DstVec->getNumContainers(); I++) {
|
| + auto *DCont = DstVec->getContainerAtIndex(I);
|
| + auto *SCont = legalize(
|
| + getOperandAtIndex(Src0, DstVec->getContainerType(), I), Legal_Reg);
|
| + auto *TReg = makeReg(DstVec->getContainerType());
|
| + _mov(TReg, SCont);
|
| + _mov(DCont, TReg);
|
| + }
|
| } else {
|
| Operand *SrcR;
|
| if (Dest->hasReg()) {
|
| @@ -2018,11 +2212,7 @@ void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
|
| // legalize this to a register.
|
| SrcR = legalize(Src0, Legal_Reg);
|
| }
|
| - if (isVectorType(Dest->getType())) {
|
| - UnimplementedLoweringError(this, Instr);
|
| - } else {
|
| - _mov(Dest, SrcR);
|
| - }
|
| + _mov(Dest, SrcR);
|
| }
|
| }
|
|
|
| @@ -2112,6 +2302,7 @@ void TargetMIPS32::lowerBr(const InstBr *Instr) {
|
| }
|
|
|
| void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| + CfgVector<Variable *> RegArgs;
|
| NeedsStackAlignment = true;
|
|
|
| // Assign arguments to registers and stack. Also reserve stack.
|
| @@ -2127,23 +2318,78 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| // Classify each argument operand according to the location where the
|
| // argument is passed.
|
|
|
| - for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
|
| - Operand *Arg = legalizeUndef(Instr->getArg(i));
|
| + // v4f32 is returned through stack. $4 is setup by the caller and passed as
|
| + // first argument implicitly. Callee then copies the return vector at $4.
|
| + SizeT ArgNum = 0;
|
| + Variable *Dest = Instr->getDest();
|
| + Variable *RetVecFloat = nullptr;
|
| + if (Dest && isVectorFloatingType(Dest->getType())) {
|
| + ArgNum = 1;
|
| + CC.discardReg(RegMIPS32::Reg_A0);
|
| + RetVecFloat = Func->makeVariable(IceType_i32);
|
| + auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
|
| + constexpr SizeT Alignment = 4;
|
| + lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
|
| + RegArgs.emplace_back(
|
| + legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
|
| + }
|
| +
|
| + for (SizeT I = 0, NumArgs = Instr->getNumArgs(); I < NumArgs; ++I) {
|
| + Operand *Arg = legalizeUndef(Instr->getArg(I));
|
| const Type Ty = Arg->getType();
|
| bool InReg = false;
|
| RegNumT Reg;
|
|
|
| - InReg = CC.argInReg(Ty, i, &Reg);
|
| + InReg = CC.argInReg(Ty, I, &Reg);
|
|
|
| if (!InReg) {
|
| - ParameterAreaSizeBytes =
|
| - applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
|
| - StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
|
| - ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
|
| + if (isVectorType(Ty)) {
|
| + auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
|
| + for (SizeT I = 0; I < ArgVec->getNumContainers(); I++) {
|
| + Operand *Elem = ArgVec->getContainerAtIndex(I);
|
| + ParameterAreaSizeBytes =
|
| + applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32);
|
| + StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
|
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
|
| + }
|
| + } else {
|
| + ParameterAreaSizeBytes =
|
| + applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
|
| + StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
|
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
|
| + }
|
| + ArgNum++;
|
| continue;
|
| }
|
|
|
| - if (Ty == IceType_i64) {
|
| + if (isVectorType(Ty)) {
|
| + auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
|
| + Operand *Elem0 = ArgVec->getContainerAtIndex(0);
|
| + Operand *Elem1 = ArgVec->getContainerAtIndex(1);
|
| + GPRArgs.push_back(
|
| + std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
|
| + GPRArgs.push_back(
|
| + std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
|
| + Operand *Elem2 = ArgVec->getContainerAtIndex(2);
|
| + Operand *Elem3 = ArgVec->getContainerAtIndex(3);
|
| + // First argument is passed in $4:$5:$6:$7
|
| + // Second and rest arguments are passed in $6:$7:stack:stack
|
| + if (ArgNum == 0) {
|
| + GPRArgs.push_back(
|
| + std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
|
| + GPRArgs.push_back(
|
| + std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
|
| + } else {
|
| + ParameterAreaSizeBytes =
|
| + applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32);
|
| + StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
|
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
|
| + ParameterAreaSizeBytes =
|
| + applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32);
|
| + StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
|
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
|
| + }
|
| + } else if (Ty == IceType_i64) {
|
| Operand *Lo = loOperand(Arg);
|
| Operand *Hi = hiOperand(Arg);
|
| GPRArgs.push_back(
|
| @@ -2155,6 +2401,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| } else {
|
| FPArgs.push_back(std::make_pair(Arg, Reg));
|
| }
|
| + ArgNum++;
|
| }
|
|
|
| // Adjust the parameter area so that the stack is aligned. It is assumed that
|
| @@ -2183,7 +2430,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
|
|
| // Generate the call instruction. Assign its result to a temporary with high
|
| // register allocation weight.
|
| - Variable *Dest = Instr->getDest();
|
| +
|
| // ReturnReg doubles as ReturnRegLo as necessary.
|
| Variable *ReturnReg = nullptr;
|
| Variable *ReturnRegHi = nullptr;
|
| @@ -2215,10 +2462,19 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| case IceType_v16i1:
|
| case IceType_v16i8:
|
| case IceType_v8i16:
|
| - case IceType_v4i32:
|
| + case IceType_v4i32: {
|
| + ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
|
| + auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
|
| + RetVec->initVecElement(Func, Dest->getType());
|
| + for (SizeT I = 0; I < RetVec->getNumContainers(); I++) {
|
| + RetVec->getContainerAtIndex(I)
|
| + ->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + I));
|
| + }
|
| + break;
|
| + }
|
| case IceType_v4f32:
|
| - UnimplementedLoweringError(this, Instr);
|
| - return;
|
| + ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
|
| + break;
|
| }
|
| }
|
| Operand *CallTarget = Instr->getCallTarget();
|
| @@ -2230,7 +2486,6 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| }
|
|
|
| // Copy arguments to be passed in registers to the appropriate registers.
|
| - CfgVector<Variable *> RegArgs;
|
| for (auto &FPArg : FPArgs) {
|
| RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
|
| }
|
| @@ -2251,7 +2506,16 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| if (VariableAllocaUsed)
|
| _addiu(SP, SP, -MaxOutArgsSizeBytes);
|
|
|
| - Inst *NewCall = InstMIPS32Call::create(Func, ReturnReg, CallTarget);
|
| + Inst *NewCall;
|
| +
|
| + // We don't need to define the return register if it is a vector.
|
| + // We have inserted fake defs of it just after the call.
|
| + if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
|
| + Variable *RetReg = nullptr;
|
| + NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
|
| + } else {
|
| + NewCall = InstMIPS32Call::create(Func, ReturnReg, CallTarget);
|
| + }
|
| Context.insert(NewCall);
|
|
|
| if (VariableAllocaUsed)
|
| @@ -2263,18 +2527,51 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
|
|
| if (ReturnRegHi)
|
| Context.insert(InstFakeDef::create(Func, ReturnRegHi));
|
| +
|
| + if (ReturnReg) {
|
| + if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
|
| + for (SizeT I = 0; I < RetVec->getNumContainers(); I++) {
|
| + Context.insert(
|
| + InstFakeDef::create(Func, RetVec->getContainerAtIndex(I)));
|
| + }
|
| + }
|
| + }
|
| +
|
| // Insert a register-kill pseudo instruction.
|
| Context.insert(InstFakeKill::create(Func, NewCall));
|
| +
|
| // Generate a FakeUse to keep the call live if necessary.
|
| if (Instr->hasSideEffects() && ReturnReg) {
|
| - Context.insert<InstFakeUse>(ReturnReg);
|
| + if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
|
| + for (SizeT I = 0; I < RetVec->getNumContainers(); I++) {
|
| + Context.insert<InstFakeUse>(RetVec->getContainerAtIndex(I));
|
| + }
|
| + } else {
|
| + Context.insert<InstFakeUse>(ReturnReg);
|
| + }
|
| }
|
| +
|
| if (Dest == nullptr)
|
| return;
|
|
|
| // Assign the result of the call to Dest.
|
| if (ReturnReg) {
|
| - if (ReturnRegHi) {
|
| + if (RetVecFloat) {
|
| + auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
|
| + for (SizeT I = 0; I < DestVecOn32->getNumContainers(); I++) {
|
| + OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
|
| + Func, IceType_i32, RetVecFloat,
|
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(I * 4)));
|
| + Variable *Dest = DestVecOn32->getContainerAtIndex(I);
|
| + _lw(Dest, Mem);
|
| + }
|
| + } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
|
| + auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
|
| + for (SizeT I = 0; I < DestVecOn32->getNumContainers(); I++) {
|
| + Variable *Dest = DestVecOn32->getContainerAtIndex(I);
|
| + _mov(Dest, RetVec->getContainerAtIndex(I));
|
| + }
|
| + } else if (ReturnRegHi) {
|
| assert(Dest->getType() == IceType_i64);
|
| auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
|
| Variable *DestLo = Dest64On32->getLo();
|
| @@ -2286,12 +2583,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) {
|
| Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
|
| isScalarFloatingType(Dest->getType()) ||
|
| isVectorType(Dest->getType()));
|
| - if (isVectorType(Dest->getType())) {
|
| - UnimplementedLoweringError(this, Instr);
|
| - return;
|
| - } else {
|
| - _mov(Dest, ReturnReg);
|
| - }
|
| + _mov(Dest, ReturnReg);
|
| }
|
| }
|
| }
|
| @@ -2453,7 +2745,64 @@ void TargetMIPS32::lowerCast(const InstCast *Instr) {
|
| }
|
|
|
| void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
|
| - UnimplementedLoweringError(this, Instr);
|
| + Variable *Dest = Instr->getDest();
|
| + Type DestTy = Dest->getType();
|
| + Operand *Src1 = Instr->getSrc(1);
|
| + if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| + const uint32_t Index = Imm->getValue();
|
| + Variable *TDest = makeReg(DestTy);
|
| + Variable *TReg = makeReg(DestTy);
|
| + auto *Src0 = legalizeUndef(Instr->getSrc(0));
|
| + auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
|
| + auto *SrcE = Src0R->getVecElementAtIndex(Index);
|
| + // Number of elements in each container
|
| + uint32_t ElemPerCont = Src0R->getNumElements() / Src0R->getNumContainers();
|
| + // Position of the element in the container
|
| + uint32_t PosInCont = Index % ElemPerCont;
|
| + if (ElemPerCont == 1) {
|
| + _mov(TDest, SrcE);
|
| + } else if (ElemPerCont == 2) {
|
| + switch (PosInCont) {
|
| + case 0:
|
| + _andi(TDest, SrcE, 0xffff);
|
| + break;
|
| + case 1:
|
| + _srl(TDest, SrcE, 16);
|
| + break;
|
| + default:
|
| + llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
|
| + break;
|
| + }
|
| + } else if (ElemPerCont == 4) {
|
| + switch (PosInCont) {
|
| + case 0:
|
| + _andi(TDest, SrcE, 0xff);
|
| + break;
|
| + case 1:
|
| + _srl(TReg, SrcE, 8);
|
| + _andi(TDest, TReg, 0xff);
|
| + break;
|
| + case 2:
|
| + _srl(TReg, SrcE, 16);
|
| + _andi(TDest, TReg, 0xff);
|
| + break;
|
| + case 3:
|
| + _srl(TDest, SrcE, 24);
|
| + break;
|
| + default:
|
| + llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
|
| + break;
|
| + }
|
| + }
|
| + if (Src0R->getElementType() == IceType_i1) {
|
| + _andi(TReg, TDest, 0x1);
|
| + _mov(Dest, TReg);
|
| + } else {
|
| + _mov(Dest, TDest);
|
| + }
|
| + return;
|
| + }
|
| + llvm::report_fatal_error("ExtractElement requires a constant index");
|
| }
|
|
|
| void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
|
| @@ -2765,7 +3114,108 @@ void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
|
| }
|
|
|
| void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
|
| - UnimplementedLoweringError(this, Instr);
|
| + Variable *Dest = Instr->getDest();
|
| + Type DestTy = Dest->getType();
|
| + Operand *Src2 = Instr->getSrc(2);
|
| + if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
|
| + const uint32_t Index = Imm->getValue();
|
| + // Vector to insert in
|
| + auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Instr->getSrc(0));
|
| + // Source Element
|
| + auto *SrcE = Src0R->getVecElementAtIndex(Index);
|
| + // Dest is a vector
|
| + auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
|
| + VDest->initVecElement(Func, DestTy);
|
| + // Temp vector variable
|
| + auto *TDest = makeReg(DestTy);
|
| + auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
|
| + TVDest->initVecElement(Func, DestTy);
|
| + // Destination element
|
| + auto *DstE = TVDest->getVecElementAtIndex(Index);
|
| + // Element to insert
|
| + auto *Src1R = legalizeToReg(Instr->getSrc(1));
|
| + auto *TReg1 = makeReg(Src1R->getType());
|
| + auto *TReg2 = makeReg(Src1R->getType());
|
| + auto *TReg3 = makeReg(Src1R->getType());
|
| + auto *TReg4 = makeReg(Src1R->getType());
|
| + auto *TReg5 = makeReg(Src1R->getType());
|
| + // Number of elements in each container
|
| + uint32_t ElemPerCont = Src0R->getNumElements() / Src0R->getNumContainers();
|
| + // Position of the element in the container
|
| + uint32_t PosInCont = Index % ElemPerCont;
|
| + // Load source vector in a temporary vector
|
| + for (size_t I = 0; I < TVDest->getNumContainers(); I++) {
|
| + auto *DCont = TVDest->getContainerAtIndex(I);
|
| + // Do not define DstE as we are going to redefine it
|
| + if (DCont == DstE)
|
| + continue;
|
| + auto *SCont = Src0R->getContainerAtIndex(I);
|
| + auto *TReg = makeReg(TVDest->getContainerType());
|
| + _mov(TReg, SCont);
|
| + _mov(DCont, TReg);
|
| + }
|
| + // Insert the element
|
| + if (ElemPerCont == 1) {
|
| + _mov(DstE, Src1R);
|
| + } else if (ElemPerCont == 2) {
|
| + switch (PosInCont) {
|
| + case 0:
|
| + _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
|
| + _srl(TReg2, SrcE, 16);
|
| + _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
|
| + _or(DstE, TReg1, TReg3);
|
| + break;
|
| + case 1:
|
| + _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source
|
| + _sll(TReg2, SrcE, 16);
|
| + _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
|
| + _or(DstE, TReg1, TReg3);
|
| + break;
|
| + default:
|
| + llvm::report_fatal_error("InsertElement: Invalid PosInCont");
|
| + break;
|
| + }
|
| + } else if (ElemPerCont == 4) {
|
| + switch (PosInCont) {
|
| + case 0:
|
| + _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
|
| + _srl(TReg2, SrcE, 8);
|
| + _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
|
| + _or(DstE, TReg1, TReg3);
|
| + break;
|
| + case 1:
|
| + _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
|
| + _sll(TReg5, TReg1, 8); // Position in the destination
|
| + _lui(TReg2, Ctx->getConstantInt32(0xffff));
|
| + _ori(TReg3, TReg2, 0x00ff);
|
| + _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
|
| + _or(DstE, TReg5, TReg4);
|
| + break;
|
| + case 2:
|
| + _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
|
| + _sll(TReg5, TReg1, 16); // Position in the destination
|
| + _lui(TReg2, Ctx->getConstantInt32(0xff00));
|
| + _ori(TReg3, TReg2, 0xffff);
|
| + _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
|
| + _or(DstE, TReg5, TReg4);
|
| + break;
|
| + case 3:
|
| + _srl(TReg1, Src1R, 24); // Position in the destination
|
| + _sll(TReg2, SrcE, 8);
|
| + _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
|
| + _or(DstE, TReg1, TReg3);
|
| + break;
|
| + default:
|
| + llvm::report_fatal_error("InsertElement: Invalid PosInCont");
|
| + break;
|
| + }
|
| + }
|
| + // Write back temporary vector to the destination
|
| + auto *Assign = InstAssign::create(Func, Dest, TDest);
|
| + lowerAssign(Assign);
|
| + return;
|
| + }
|
| + llvm::report_fatal_error("InsertElement requires a constant index");
|
| }
|
|
|
| void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| @@ -3201,8 +3651,47 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) {
|
| Context.insert<InstFakeUse>(R1);
|
| break;
|
| }
|
| + case IceType_v4i1:
|
| + case IceType_v8i1:
|
| + case IceType_v16i1:
|
| + case IceType_v16i8:
|
| + case IceType_v8i16:
|
| + case IceType_v4i32: {
|
| + auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(Src0);
|
| + Variable *V0 =
|
| + legalizeToReg(SrcVec->getContainerAtIndex(0), RegMIPS32::Reg_V0);
|
| + Variable *V1 =
|
| + legalizeToReg(SrcVec->getContainerAtIndex(1), RegMIPS32::Reg_V1);
|
| + Variable *A0 =
|
| + legalizeToReg(SrcVec->getContainerAtIndex(2), RegMIPS32::Reg_A0);
|
| + Variable *A1 =
|
| + legalizeToReg(SrcVec->getContainerAtIndex(3), RegMIPS32::Reg_A1);
|
| + Reg = V0;
|
| + Context.insert<InstFakeUse>(V1);
|
| + Context.insert<InstFakeUse>(A0);
|
| + Context.insert<InstFakeUse>(A1);
|
| + break;
|
| + }
|
| + case IceType_v4f32: {
|
| + auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(Src0);
|
| + Reg = Func->getImplicitRet();
|
| + auto *RegT = legalizeToReg(Reg);
|
| + // Return the vector through buffer in implicit argument a0
|
| + for (size_t I = 0; I < SrcVec->getNumContainers(); I++) {
|
| + OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
|
| + Func, IceType_f32, RegT,
|
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(I * 4)));
|
| + Variable *Var = legalizeToReg(SrcVec->getContainerAtIndex(I));
|
| + _sw(Var, Mem);
|
| + }
|
| + Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
|
| + _mov(V0, Reg); // move v0,a0
|
| + Context.insert<InstFakeUse>(Reg);
|
| + Context.insert<InstFakeUse>(V0);
|
| + break;
|
| + }
|
| default:
|
| - UnimplementedLoweringError(this, Instr);
|
| + break;
|
| }
|
| }
|
| _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
|
| @@ -3265,6 +3754,14 @@ void TargetMIPS32::lowerStore(const InstStore *Instr) {
|
| Variable *ValueLo = legalizeToReg(loOperand(Value));
|
| _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
|
| _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
|
| + } else if (isVectorType(Value->getType())) {
|
| + auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
|
| + for (size_t I = 0; I < DataVec->getNumContainers(); I++) {
|
| + auto *DCont = legalizeToReg(DataVec->getContainerAtIndex(I));
|
| + auto *MCont = llvm::cast<OperandMIPS32Mem>(
|
| + getOperandAtIndex(NewAddr, DataVec->getContainerType(), I));
|
| + _sw(DCont, MCont);
|
| + }
|
| } else {
|
| Variable *ValueR = legalizeToReg(Value);
|
| _sw(ValueR, NewAddr);
|
| @@ -3496,7 +3993,7 @@ Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
|
| Type Ty = Src->getType();
|
| Variable *Reg = makeReg(Ty, RegNum);
|
| if (isVectorType(Ty)) {
|
| - UnimplementedError(getFlags());
|
| + llvm::report_fatal_error("Invalid copy from vector type.");
|
| } else {
|
| if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
|
| _lw(Reg, Mem);
|
| @@ -3568,6 +4065,11 @@ Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
|
| }
|
|
|
| if (llvm::isa<Constant>(From)) {
|
| + if (llvm::isa<ConstantUndef>(From)) {
|
| + From = legalizeUndef(From, RegNum);
|
| + if (isVectorType(Ty))
|
| + return From;
|
| + }
|
| if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
|
| (void)C;
|
| // TODO(reed kotler): complete this case for proper implementation
|
| @@ -3576,23 +4078,15 @@ Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
|
| return Reg;
|
| } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
|
| const uint32_t Value = C32->getValue();
|
| - // Check if the immediate will fit in a Flexible second operand,
|
| - // if a Flexible second operand is allowed. We need to know the exact
|
| - // value, so that rules out relocatable constants.
|
| - // Also try the inverse and use MVN if possible.
|
| - // Do a movw/movt to a register.
|
| - Variable *Reg;
|
| - if (RegNum.hasValue())
|
| - Reg = getPhysicalRegister(RegNum);
|
| - else
|
| - Reg = makeReg(Ty, RegNum);
|
| + // Use addiu if the immediate is a 16bit value. Otherwise load it
|
| + // using a lui-ori instructions.
|
| + Variable *Reg = makeReg(Ty, RegNum);
|
| if (isInt<16>(int32_t(Value))) {
|
| Variable *Zero = getPhysicalRegister(RegMIPS32::Reg_ZERO, Ty);
|
| Context.insert<InstFakeDef>(Zero);
|
| _addiu(Reg, Zero, Value);
|
| } else {
|
| uint32_t UpperBits = (Value >> 16) & 0xFFFF;
|
| - (void)UpperBits;
|
| uint32_t LowerBits = Value & 0xFFFF;
|
| Variable *TReg = makeReg(Ty, RegNum);
|
| if (LowerBits) {
|
|
|