Chromium Code Reviews| Index: src/IceTargetLoweringMIPS32.cpp |
| diff --git a/src/IceTargetLoweringMIPS32.cpp b/src/IceTargetLoweringMIPS32.cpp |
| index 6d6c6b9316f43f8e48f4e759d7493ea7a7eea595..6acbac31bb8d31ac7b5c2949b0fdc14b3d0f12b6 100644 |
| --- a/src/IceTargetLoweringMIPS32.cpp |
| +++ b/src/IceTargetLoweringMIPS32.cpp |
| @@ -90,8 +90,9 @@ constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16; |
| // stack alignment required for the given type. |
| uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
| size_t typeAlignInBytes = typeWidthInBytes(Ty); |
| + // Vectors are stored on stack with the same alignment as that of int type |
| if (isVectorType(Ty)) |
| - UnimplementedError(getFlags()); |
| + typeAlignInBytes = typeWidthInBytes(IceType_i32); |
| return Utils::applyAlignment(Value, typeAlignInBytes); |
| } |
| @@ -228,19 +229,9 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { |
| const InstArithmetic::OpKind Op = |
| llvm::cast<InstArithmetic>(Instr)->getOp(); |
| if (isVectorType(DestTy)) { |
| - switch (Op) { |
| - default: |
| - break; |
| - case InstArithmetic::Fdiv: |
| - case InstArithmetic::Frem: |
| - case InstArithmetic::Sdiv: |
| - case InstArithmetic::Srem: |
| - case InstArithmetic::Udiv: |
| - case InstArithmetic::Urem: |
| - scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); |
| - Instr->setDeleted(); |
| - return; |
| - } |
| + scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1)); |
| + Instr->setDeleted(); |
| + return; |
| } |
| switch (DestTy) { |
| default: |
| @@ -303,7 +294,6 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { |
| const Type SrcTy = Src0->getType(); |
| auto *CastInstr = llvm::cast<InstCast>(Instr); |
| const InstCast::OpKind CastKind = CastInstr->getCastKind(); |
| - |
| switch (CastKind) { |
| default: |
| return; |
| @@ -424,6 +414,39 @@ void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) { |
| Variable *Dest = Instr->getDest(); |
| auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr); |
| Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID; |
| + if (Dest && isVectorType(Dest->getType()) && ID == Intrinsics::Fabs) { |
| + Operand *Src0 = IntrinsicCall->getArg(0); |
| + GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32"); |
| + Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat); |
| + GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32"); |
| + bool BadIntrinsic = false; |
| + const Intrinsics::FullIntrinsicInfo *FullInfo = |
| + Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic); |
| + Intrinsics::IntrinsicInfo Info = FullInfo->Info; |
| + |
| + Variable *T = Func->makeVariable(IceType_v4f32); |
| + auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(T); |
| + VarVecOn32->initVecElement(Func); |
| + Context.insert<InstFakeDef>(T); |
| + |
| + for (SizeT i = 0; i < VarVecOn32->ElementsPerContainer; ++i) { |
| + auto *Index = Ctx->getConstantInt32(i); |
| + auto *Op = Func->makeVariable(IceType_f32); |
| + Context.insert<InstExtractElement>(Op, Src0, Index); |
| + auto *Res = Func->makeVariable(IceType_f32); |
| + Variable *DestT = Func->makeVariable(IceType_v4f32); |
| + auto *Call = |
| + Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info); |
| + Call->addArg(Op); |
| + Context.insert<InstInsertElement>(DestT, T, Res, Index); |
| + T = DestT; |
| + } |
| + |
| + Context.insert<InstAssign>(Dest, T); |
| + |
| + Instr->setDeleted(); |
| + return; |
| + } |
| switch (ID) { |
| default: |
| return; |
| @@ -788,8 +811,17 @@ Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) { |
| // overestimated. If the constant being lowered is a 64 bit value, |
| // then the result should be split and the lo and hi components will |
| // need to go in uninitialized registers. |
| - if (isVectorType(Ty)) |
| - UnimplementedError(getFlags()); |
| + if (isVectorType(Ty)) { |
| + Variable *Var = makeReg(Ty, RegNum); |
| + auto *Reg = llvm::cast<VariableVecOn32>(Var); |
| + Reg->initVecElement(Func); |
| + auto *Zero = getZero(); |
| + Context.insert<InstFakeDef>(Zero); |
| + for (Variable *Var : Reg->getContainers()) { |
| + _mov(Var, Zero); |
| + } |
| + return Reg; |
| + } |
| return Ctx->getConstantZero(Ty); |
| } |
| return From; |
| @@ -859,7 +891,7 @@ TargetMIPS32::CallingConv::CallingConv() |
| // number to make register allocation decisions. |
| bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo, |
| RegNumT *Reg) { |
| - if (isScalarIntegerType(Ty)) |
| + if (isScalarIntegerType(Ty) || isVectorType(Ty)) |
| return argInGPR(Ty, Reg); |
| if (isScalarFloatingType(Ty)) { |
| if (ArgNo == 0) { |
| @@ -884,6 +916,13 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
| UnimplementedError(getFlags()); |
| return false; |
| } break; |
| + case IceType_v4i1: |
| + case IceType_v8i1: |
| + case IceType_v16i1: |
| + case IceType_v16i8: |
| + case IceType_v8i16: |
| + case IceType_v4i32: |
| + case IceType_v4f32: |
| case IceType_i32: |
| case IceType_f32: { |
| Source = &GPRArgs; |
| @@ -896,6 +935,12 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
| discardUnavailableGPRsAndTheirAliases(Source); |
| + // If $4 is used for any scalar type (or returining v4f32) then the next |
| + // vector type if passed in $6:$7:stack:stack |
| + if (isVectorType(Ty)) { |
| + alignGPR(Source); |
| + } |
| + |
| if (Source->empty()) { |
| GPRegsUsed.set(); |
| return false; |
| @@ -907,6 +952,21 @@ bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) { |
| // Source->back() is marked as unavailable, and it is thus implicitly popped |
| // from the stack. |
| GPRegsUsed |= RegisterAliases[*Reg]; |
| + |
| + // All vector arguments irrespective of their base type are passed in GP |
| + // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd |
| + // is passed in $6:$7:stack:stack. If it is 1st argument then discard |
| + // $4:$5:$6:$7 otherwise discard $6:$7 only. |
| + if (isVectorType(Ty)) { |
| + if (((unsigned)*Reg) == RegMIPS32::Reg_A0) { |
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1]; |
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2]; |
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3]; |
| + } else { |
| + GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3]; |
| + } |
| + } |
| + |
| return true; |
| } |
| @@ -997,11 +1057,32 @@ void TargetMIPS32::lowerArguments() { |
| Context.init(Func->getEntryNode()); |
| Context.setInsertPoint(Context.getCur()); |
| - for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
| - Variable *Arg = Args[I]; |
| + // v4f32 is returned through stack. $4 is setup by the caller and passed as |
| + // first argument implicitly. Callee then copies the return vector at $4. |
| + if (isVectorFloatingType(Func->getReturnType())) { |
| + Variable *ImplicitRetVec = Func->makeVariable(IceType_i32); |
| + ImplicitRetVec->setName(Func, "ImplicitRet_v4f32"); |
| + ImplicitRetVec->setIsArg(); |
| + Args.insert(Args.begin(), ImplicitRetVec); |
| + setImplicitRet(ImplicitRetVec); |
| + Context.insert<InstFakeDef>(ImplicitRetVec); |
| + for (CfgNode *Node : Func->getNodes()) { |
| + for (Inst &Instr : Node->getInsts()) { |
| + if (llvm::isa<InstRet>(&Instr)) { |
| + Context.setInsertPoint(Instr); |
| + Context.insert<InstFakeUse>(ImplicitRetVec); |
| + break; |
| + } |
| + } |
| + } |
| + Context.setInsertPoint(Context.getCur()); |
| + } |
| + |
| + for (SizeT i = 0, E = Args.size(); i < E; ++i) { |
| + Variable *Arg = Args[i]; |
| Type Ty = Arg->getType(); |
| RegNumT RegNum; |
| - if (!CC.argInReg(Ty, I, &RegNum)) { |
| + if (!CC.argInReg(Ty, i, &RegNum)) { |
| continue; |
| } |
| Variable *RegisterArg = Func->makeVariable(Ty); |
| @@ -1010,17 +1091,41 @@ void TargetMIPS32::lowerArguments() { |
| } |
| RegisterArg->setIsArg(); |
| Arg->setIsArg(false); |
| - Args[I] = RegisterArg; |
| - switch (Ty) { |
| - default: { RegisterArg->setRegNum(RegNum); } break; |
| - case IceType_i64: { |
| - auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); |
| - RegisterArg64->initHiLo(Func); |
| - RegisterArg64->getLo()->setRegNum( |
| - RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum))); |
| - RegisterArg64->getHi()->setRegNum( |
| - RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum))); |
| - } break; |
| + Args[i] = RegisterArg; |
| + |
| + if (isVectorType(Ty)) { |
| + auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg); |
| + RegisterArgVec->initVecElement(Func); |
| + RegisterArgVec->getContainers()[0]->setRegNum( |
| + RegNumT::fixme((unsigned)RegNum + 0)); |
| + RegisterArgVec->getContainers()[1]->setRegNum( |
| + RegNumT::fixme((unsigned)RegNum + 1)); |
| + // First two elements of second vector argument are passed |
| + // in $6:$7 and remaining two on stack. Do not assign register |
| + // to this is second vector argument. |
| + if (i == 0) { |
| + RegisterArgVec->getContainers()[2]->setRegNum( |
| + RegNumT::fixme((unsigned)RegNum + 2)); |
| + RegisterArgVec->getContainers()[3]->setRegNum( |
| + RegNumT::fixme((unsigned)RegNum + 3)); |
| + } else { |
| + RegisterArgVec->getContainers()[2]->setRegNum( |
| + RegNumT::fixme(RegNumT())); |
| + RegisterArgVec->getContainers()[3]->setRegNum( |
| + RegNumT::fixme(RegNumT())); |
| + } |
| + } else { |
| + switch (Ty) { |
| + default: { RegisterArg->setRegNum(RegNum); } break; |
| + case IceType_i64: { |
| + auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg); |
| + RegisterArg64->initHiLo(Func); |
| + RegisterArg64->getLo()->setRegNum( |
| + RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum))); |
| + RegisterArg64->getHi()->setRegNum( |
| + RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum))); |
| + } break; |
| + } |
| } |
| Context.insert<InstAssign>(Arg, RegisterArg); |
| } |
| @@ -1036,20 +1141,46 @@ Type TargetMIPS32::stackSlotType() { return IceType_i32; } |
| // recursively on the components, taking care to handle Lo first because of the |
| // little-endian architecture. Lastly, this function generates an instruction |
| // to copy Arg into its assigned register if applicable. |
| -void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| +void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack, |
| + Variable *FramePtr, |
| size_t BasicFrameOffset, |
| size_t *InArgsSizeBytes) { |
| const Type Ty = Arg->getType(); |
| *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty); |
| + // If $4 is used for any scalar type (or returining v4f32) then the next |
| + // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element |
| + // from agument stack. |
| + if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) { |
| + if (PartialOnStack == false) { |
| + auto *Elem0 = ArgVecOn32->getContainers()[0]; |
| + auto *Elem1 = ArgVecOn32->getContainers()[1]; |
| + finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset, |
| + InArgsSizeBytes); |
| + finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset, |
| + InArgsSizeBytes); |
| + } |
| + auto *Elem2 = ArgVecOn32->getContainers()[2]; |
| + auto *Elem3 = ArgVecOn32->getContainers()[3]; |
| + finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset, |
| + InArgsSizeBytes); |
| + finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset, |
| + InArgsSizeBytes); |
| + return; |
| + } |
| + |
| if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) { |
| Variable *const Lo = Arg64On32->getLo(); |
| Variable *const Hi = Arg64On32->getHi(); |
| - finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| - finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| + finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset, |
| + InArgsSizeBytes); |
| + finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset, |
| + InArgsSizeBytes); |
| return; |
| } |
| + |
| assert(Ty != IceType_i64); |
| + assert(!isVectorType(Ty)); |
| const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes; |
| *InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| @@ -1262,13 +1393,25 @@ void TargetMIPS32::addProlog(CfgNode *Node) { |
| for (Variable *Arg : Args) { |
| RegNumT DummyReg; |
| const Type Ty = Arg->getType(); |
| + bool PartialOnStack; |
| // Skip arguments passed in registers. |
| if (CC.argInReg(Ty, ArgNo, &DummyReg)) { |
| - ArgNo++; |
| - continue; |
| + // Load argument from stack: |
| + // 1. If this is first vector argument and return type is v4f32. |
| + // In this case $4 is used to pass stack address implicitly. |
| + // 3rd and 4th element of vector argument is passed through stack. |
| + // 2. If this is second vector argument. |
| + if (ArgNo != 0 && isVectorType(Ty)) { |
| + PartialOnStack = true; |
| + finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes, |
| + &InArgsSizeBytes); |
| + } |
| } else { |
| - finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes); |
| + PartialOnStack = false; |
| + finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes, |
| + &InArgsSizeBytes); |
| } |
| + ++ArgNo; |
| } |
| // Fill in stack offsets for locals. |
| @@ -1571,6 +1714,42 @@ Operand *TargetMIPS32::loOperand(Operand *Operand) { |
| return nullptr; |
| } |
| +Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType, |
| + uint32_t Index) { |
| + if (!isVectorType(Operand->getType())) { |
| + llvm::report_fatal_error("getOperandAtIndex: Operand is not vector"); |
| + return nullptr; |
| + } |
| + |
| + if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) { |
| + assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset); |
| + Variable *Base = Mem->getBase(); |
| + auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset()); |
| + assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); |
| + int32_t NextOffsetVal = |
| + Offset->getValue() + (Index * typeWidthInBytes(BaseType)); |
| + constexpr bool NoSignExt = false; |
| + if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) { |
| + Constant *_4 = Ctx->getConstantInt32(4); |
| + Variable *NewBase = Func->makeVariable(Base->getType()); |
| + lowerArithmetic( |
| + InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4)); |
| + Base = NewBase; |
| + } else { |
| + Offset = |
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); |
| + } |
| + return OperandMIPS32Mem::create(Func, BaseType, Base, Offset, |
| + Mem->getAddrMode()); |
| + } |
| + |
| + if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand)) |
| + return VarVecOn32->getContainers()[Index]; |
| + |
| + llvm_unreachable("Unsupported operand type"); |
| + return nullptr; |
| +} |
| + |
| Operand *TargetMIPS32::hiOperand(Operand *Operand) { |
| assert(Operand->getType() == IceType_i64); |
| if (Operand->getType() != IceType_i64) |
| @@ -2005,25 +2184,33 @@ void TargetMIPS32::lowerAssign(const InstAssign *Instr) { |
| _mov(DestLo, T_Lo); |
| _mov(T_Hi, Src0Hi); |
| _mov(DestHi, T_Hi); |
| - } else { |
| - Operand *SrcR; |
| - if (Dest->hasReg()) { |
| - // If Dest already has a physical register, then legalize the Src operand |
| - // into a Variable with the same register assignment. This especially |
| - // helps allow the use of Flex operands. |
| - SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum()); |
| - } else { |
| - // Dest could be a stack operand. Since we could potentially need |
| - // to do a Store (and store can only have Register operands), |
| - // legalize this to a register. |
| - SrcR = legalize(Src0, Legal_Reg); |
| - } |
| - if (isVectorType(Dest->getType())) { |
| - UnimplementedLoweringError(this, Instr); |
| - } else { |
| - _mov(Dest, SrcR); |
| + return; |
| + } |
| + if (isVectorType(Dest->getType())) { |
| + auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest); |
| + for (SizeT i = 0; i < DstVec->ElementsPerContainer; ++i) { |
| + auto *DCont = DstVec->getContainers()[i]; |
| + auto *SCont = |
| + legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg); |
| + auto *TReg = makeReg(IceType_i32); |
| + _mov(TReg, SCont); |
| + _mov(DCont, TReg); |
| } |
| + return; |
| } |
| + Operand *SrcR; |
| + if (Dest->hasReg()) { |
| + // If Dest already has a physical register, then legalize the Src operand |
| + // into a Variable with the same register assignment. This especially |
| + // helps allow the use of Flex operands. |
| + SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum()); |
| + } else { |
| + // Dest could be a stack operand. Since we could potentially need |
| + // to do a Store (and store can only have Register operands), |
| + // legalize this to a register. |
| + SrcR = legalize(Src0, Legal_Reg); |
| + } |
| + _mov(Dest, SrcR); |
| } |
| void TargetMIPS32::lowerBr(const InstBr *Instr) { |
| @@ -2112,6 +2299,7 @@ void TargetMIPS32::lowerBr(const InstBr *Instr) { |
| } |
| void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| + CfgVector<Variable *> RegArgs; |
| NeedsStackAlignment = true; |
| // Assign arguments to registers and stack. Also reserve stack. |
| @@ -2127,6 +2315,22 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| // Classify each argument operand according to the location where the |
| // argument is passed. |
| + // v4f32 is returned through stack. $4 is setup by the caller and passed as |
| + // first argument implicitly. Callee then copies the return vector at $4. |
| + SizeT ArgNum = 0; |
| + Variable *Dest = Instr->getDest(); |
| + Variable *RetVecFloat = nullptr; |
| + if (Dest && isVectorFloatingType(Dest->getType())) { |
| + ArgNum = 1; |
| + CC.discardReg(RegMIPS32::Reg_A0); |
| + RetVecFloat = Func->makeVariable(IceType_i32); |
| + auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16); |
| + constexpr SizeT Alignment = 4; |
| + lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment)); |
| + RegArgs.emplace_back( |
| + legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0))); |
| + } |
| + |
| for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| Operand *Arg = legalizeUndef(Instr->getArg(i)); |
| const Type Ty = Arg->getType(); |
| @@ -2136,14 +2340,52 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| InReg = CC.argInReg(Ty, i, &Reg); |
| if (!InReg) { |
| - ParameterAreaSizeBytes = |
| - applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
| - StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
| - ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); |
| + if (isVectorType(Ty)) { |
| + auto *ArgVec = llvm::cast<VariableVecOn32>(Arg); |
| + for (Variable *Elem : ArgVec->getContainers()) { |
| + ParameterAreaSizeBytes = |
| + applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32); |
| + StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes)); |
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); |
| + } |
| + } else { |
| + ParameterAreaSizeBytes = |
| + applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
| + StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); |
| + } |
| + ++ArgNum; |
| continue; |
| } |
| - if (Ty == IceType_i64) { |
| + if (isVectorType(Ty)) { |
| + auto *ArgVec = llvm::cast<VariableVecOn32>(Arg); |
| + Operand *Elem0 = ArgVec->getContainers()[0]; |
| + Operand *Elem1 = ArgVec->getContainers()[1]; |
| + GPRArgs.push_back( |
| + std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0))); |
| + GPRArgs.push_back( |
| + std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1))); |
| + Operand *Elem2 = ArgVec->getContainers()[2]; |
| + Operand *Elem3 = ArgVec->getContainers()[3]; |
| + // First argument is passed in $4:$5:$6:$7 |
| + // Second and rest arguments are passed in $6:$7:stack:stack |
| + if (ArgNum == 0) { |
| + GPRArgs.push_back( |
| + std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2))); |
| + GPRArgs.push_back( |
| + std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3))); |
| + } else { |
| + ParameterAreaSizeBytes = |
| + applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32); |
| + StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes)); |
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); |
| + ParameterAreaSizeBytes = |
| + applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i32); |
| + StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes)); |
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32); |
| + } |
| + } else if (Ty == IceType_i64) { |
| Operand *Lo = loOperand(Arg); |
| Operand *Hi = hiOperand(Arg); |
| GPRArgs.push_back( |
| @@ -2155,6 +2397,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| } else { |
| FPArgs.push_back(std::make_pair(Arg, Reg)); |
| } |
| + ArgNum++; |
|
Jim Stichnoth
2016/10/03 14:52:22
++ArgNum
|
| } |
| // Adjust the parameter area so that the stack is aligned. It is assumed that |
| @@ -2183,7 +2426,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| // Generate the call instruction. Assign its result to a temporary with high |
| // register allocation weight. |
| - Variable *Dest = Instr->getDest(); |
| + |
| // ReturnReg doubles as ReturnRegLo as necessary. |
| Variable *ReturnReg = nullptr; |
| Variable *ReturnRegHi = nullptr; |
| @@ -2215,10 +2458,19 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| case IceType_v16i1: |
| case IceType_v16i8: |
| case IceType_v8i16: |
| - case IceType_v4i32: |
| + case IceType_v4i32: { |
| + ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0); |
| + auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg); |
| + RetVec->initVecElement(Func); |
| + for (SizeT i = 0; i < RetVec->ElementsPerContainer; ++i) { |
| + auto *Var = RetVec->getContainers()[i]; |
| + Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i)); |
| + } |
| + break; |
| + } |
| case IceType_v4f32: |
| - UnimplementedLoweringError(this, Instr); |
| - return; |
| + ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0); |
| + break; |
| } |
| } |
| Operand *CallTarget = Instr->getCallTarget(); |
| @@ -2230,7 +2482,6 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| } |
| // Copy arguments to be passed in registers to the appropriate registers. |
| - CfgVector<Variable *> RegArgs; |
| for (auto &FPArg : FPArgs) { |
| RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); |
| } |
| @@ -2251,7 +2502,16 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| if (VariableAllocaUsed) |
| _addiu(SP, SP, -MaxOutArgsSizeBytes); |
| - Inst *NewCall = InstMIPS32Call::create(Func, ReturnReg, CallTarget); |
| + Inst *NewCall; |
| + |
| + // We don't need to define the return register if it is a vector. |
| + // We have inserted fake defs of it just after the call. |
| + if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) { |
| + Variable *RetReg = nullptr; |
| + NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget); |
| + } else { |
| + NewCall = InstMIPS32Call::create(Func, ReturnReg, CallTarget); |
| + } |
| Context.insert(NewCall); |
| if (VariableAllocaUsed) |
| @@ -2263,18 +2523,49 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| if (ReturnRegHi) |
| Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| + |
| + if (ReturnReg) { |
| + if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { |
| + for (Variable *Var : RetVec->getContainers()) { |
| + Context.insert(InstFakeDef::create(Func, Var)); |
| + } |
| + } |
| + } |
| + |
| // Insert a register-kill pseudo instruction. |
| Context.insert(InstFakeKill::create(Func, NewCall)); |
| + |
| // Generate a FakeUse to keep the call live if necessary. |
| if (Instr->hasSideEffects() && ReturnReg) { |
| - Context.insert<InstFakeUse>(ReturnReg); |
| + if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { |
| + for (Variable *Var : RetVec->getContainers()) { |
| + Context.insert<InstFakeUse>(Var); |
| + } |
| + } else { |
| + Context.insert<InstFakeUse>(ReturnReg); |
| + } |
| } |
| + |
| if (Dest == nullptr) |
| return; |
| // Assign the result of the call to Dest. |
| if (ReturnReg) { |
| - if (ReturnRegHi) { |
| + if (RetVecFloat) { |
| + auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest); |
| + for (SizeT i = 0; i < DestVecOn32->ElementsPerContainer; ++i) { |
| + auto *Var = DestVecOn32->getContainers()[i]; |
| + OperandMIPS32Mem *Mem = OperandMIPS32Mem::create( |
| + Func, IceType_i32, RetVecFloat, |
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4))); |
| + _lw(Var, Mem); |
| + } |
| + } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) { |
| + auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest); |
| + for (SizeT i = 0; i < DestVecOn32->ElementsPerContainer; ++i) { |
| + _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]); |
| + } |
| + } else if (ReturnRegHi) { |
| assert(Dest->getType() == IceType_i64); |
| auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
| Variable *DestLo = Dest64On32->getLo(); |
| @@ -2286,12 +2577,7 @@ void TargetMIPS32::lowerCall(const InstCall *Instr) { |
| Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| isScalarFloatingType(Dest->getType()) || |
| isVectorType(Dest->getType())); |
| - if (isVectorType(Dest->getType())) { |
| - UnimplementedLoweringError(this, Instr); |
| - return; |
| - } else { |
| - _mov(Dest, ReturnReg); |
| - } |
| + _mov(Dest, ReturnReg); |
| } |
| } |
| } |
| @@ -2453,7 +2739,65 @@ void TargetMIPS32::lowerCast(const InstCast *Instr) { |
| } |
| void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) { |
| - UnimplementedLoweringError(this, Instr); |
| + Variable *Dest = Instr->getDest(); |
| + const Type DestTy = Dest->getType(); |
| + Operand *Src1 = Instr->getSrc(1); |
| + if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) { |
| + const uint32_t Index = Imm->getValue(); |
| + Variable *TDest = makeReg(DestTy); |
| + Variable *TReg = makeReg(DestTy); |
| + auto *Src0 = legalizeUndef(Instr->getSrc(0)); |
| + auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0); |
| + // Number of elements in each container |
| + uint32_t ElemPerCont = |
| + typeNumElements(Src0->getType()) / Src0R->ElementsPerContainer; |
| + auto *SrcE = Src0R->getContainers()[Index / ElemPerCont]; |
| + // Position of the element in the container |
| + uint32_t PosInCont = Index % ElemPerCont; |
| + if (ElemPerCont == 1) { |
| + _mov(TDest, SrcE); |
| + } else if (ElemPerCont == 2) { |
| + switch (PosInCont) { |
| + case 0: |
| + _andi(TDest, SrcE, 0xffff); |
| + break; |
| + case 1: |
| + _srl(TDest, SrcE, 16); |
| + break; |
| + default: |
| + llvm::report_fatal_error("ExtractElement: Invalid PosInCont"); |
| + break; |
| + } |
| + } else if (ElemPerCont == 4) { |
| + switch (PosInCont) { |
| + case 0: |
| + _andi(TDest, SrcE, 0xff); |
| + break; |
| + case 1: |
| + _srl(TReg, SrcE, 8); |
| + _andi(TDest, TReg, 0xff); |
| + break; |
| + case 2: |
| + _srl(TReg, SrcE, 16); |
| + _andi(TDest, TReg, 0xff); |
| + break; |
| + case 3: |
| + _srl(TDest, SrcE, 24); |
| + break; |
| + default: |
| + llvm::report_fatal_error("ExtractElement: Invalid PosInCont"); |
| + break; |
| + } |
| + } |
| + if (typeElementType(Src0R->getType()) == IceType_i1) { |
| + _andi(TReg, TDest, 0x1); |
| + _mov(Dest, TReg); |
| + } else { |
| + _mov(Dest, TDest); |
| + } |
| + return; |
| + } |
| + llvm::report_fatal_error("ExtractElement requires a constant index"); |
| } |
| void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) { |
| @@ -2765,7 +3109,111 @@ void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) { |
| } |
| void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) { |
| - UnimplementedLoweringError(this, Instr); |
| + Variable *Dest = Instr->getDest(); |
| + const Type DestTy = Dest->getType(); |
| + Operand *Src2 = Instr->getSrc(2); |
| + if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) { |
| + const uint32_t Index = Imm->getValue(); |
| + // Vector to insert in |
| + auto *Src0 = Instr->getSrc(0); |
| + auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0); |
| + // Number of elements in each container |
| + uint32_t ElemPerCont = |
| + typeNumElements(Src0->getType()) / Src0R->ElementsPerContainer; |
| + // Source Element |
| + auto *SrcE = Src0R->getContainers()[Index / ElemPerCont]; |
| + Context.insert<InstFakeDef>(SrcE); |
| + // Dest is a vector |
| + auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest); |
| + VDest->initVecElement(Func); |
| + // Temp vector variable |
| + auto *TDest = makeReg(DestTy); |
| + auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest); |
| + TVDest->initVecElement(Func); |
| + // Destination element |
| + auto *DstE = TVDest->getContainers()[Index / ElemPerCont]; |
| + // Element to insert |
| + auto *Src1R = legalizeToReg(Instr->getSrc(1)); |
| + auto *TReg1 = makeReg(Src1R->getType()); |
| + auto *TReg2 = makeReg(Src1R->getType()); |
| + auto *TReg3 = makeReg(Src1R->getType()); |
| + auto *TReg4 = makeReg(Src1R->getType()); |
| + auto *TReg5 = makeReg(Src1R->getType()); |
| + // Position of the element in the container |
| + uint32_t PosInCont = Index % ElemPerCont; |
| + // Load source vector in a temporary vector |
| + for (SizeT i = 0; i < TVDest->ElementsPerContainer; ++i) { |
| + auto *DCont = TVDest->getContainers()[i]; |
| + // Do not define DstE as we are going to redefine it |
| + if (DCont == DstE) |
| + continue; |
| + auto *SCont = Src0R->getContainers()[i]; |
| + auto *TReg = makeReg(IceType_i32); |
| + _mov(TReg, SCont); |
| + _mov(DCont, TReg); |
| + } |
| + // Insert the element |
| + if (ElemPerCont == 1) { |
| + _mov(DstE, Src1R); |
| + } else if (ElemPerCont == 2) { |
| + switch (PosInCont) { |
| + case 0: |
| + _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source |
| + _srl(TReg2, SrcE, 16); |
| + _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element |
| + _or(DstE, TReg1, TReg3); |
| + break; |
| + case 1: |
| + _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source |
| + _sll(TReg2, SrcE, 16); |
| + _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element |
| + _or(DstE, TReg1, TReg3); |
| + break; |
| + default: |
| + llvm::report_fatal_error("InsertElement: Invalid PosInCont"); |
| + break; |
| + } |
| + } else if (ElemPerCont == 4) { |
| + switch (PosInCont) { |
| + case 0: |
| + _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source |
| + _srl(TReg2, SrcE, 8); |
| + _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element |
| + _or(DstE, TReg1, TReg3); |
| + break; |
| + case 1: |
| + _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source |
| + _sll(TReg5, TReg1, 8); // Position in the destination |
| + _lui(TReg2, Ctx->getConstantInt32(0xffff)); |
| + _ori(TReg3, TReg2, 0x00ff); |
| + _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element |
| + _or(DstE, TReg5, TReg4); |
| + break; |
| + case 2: |
| + _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source |
| + _sll(TReg5, TReg1, 16); // Position in the destination |
| + _lui(TReg2, Ctx->getConstantInt32(0xff00)); |
| + _ori(TReg3, TReg2, 0xffff); |
| + _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element |
| + _or(DstE, TReg5, TReg4); |
| + break; |
| + case 3: |
| + _srl(TReg1, Src1R, 24); // Position in the destination |
| + _sll(TReg2, SrcE, 8); |
| + _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element |
| + _or(DstE, TReg1, TReg3); |
| + break; |
| + default: |
| + llvm::report_fatal_error("InsertElement: Invalid PosInCont"); |
| + break; |
| + } |
| + } |
| + // Write back temporary vector to the destination |
| + auto *Assign = InstAssign::create(Func, Dest, TDest); |
| + lowerAssign(Assign); |
| + return; |
| + } |
| + llvm::report_fatal_error("InsertElement requires a constant index"); |
| } |
| void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| @@ -3201,8 +3649,48 @@ void TargetMIPS32::lowerRet(const InstRet *Instr) { |
| Context.insert<InstFakeUse>(R1); |
| break; |
| } |
| + case IceType_v4i1: |
| + case IceType_v8i1: |
| + case IceType_v16i1: |
| + case IceType_v16i8: |
| + case IceType_v8i16: |
| + case IceType_v4i32: { |
| + auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(Src0); |
| + Variable *V0 = |
| + legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0); |
| + Variable *V1 = |
| + legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1); |
| + Variable *A0 = |
| + legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0); |
| + Variable *A1 = |
| + legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1); |
| + Reg = V0; |
| + Context.insert<InstFakeUse>(V1); |
| + Context.insert<InstFakeUse>(A0); |
| + Context.insert<InstFakeUse>(A1); |
| + break; |
| + } |
| + case IceType_v4f32: { |
| + auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(Src0); |
| + Reg = getImplicitRet(); |
| + auto *RegT = legalizeToReg(Reg); |
| + // Return the vector through buffer in implicit argument a0 |
| + for (SizeT i = 0; i < SrcVec->ElementsPerContainer; ++i) { |
| + OperandMIPS32Mem *Mem = OperandMIPS32Mem::create( |
| + Func, IceType_f32, RegT, |
| + llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4))); |
| + Variable *Var = legalizeToReg(SrcVec->getContainers()[i]); |
| + _sw(Var, Mem); |
| + } |
| + Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0); |
| + _mov(V0, Reg); // move v0,a0 |
| + Context.insert<InstFakeUse>(Reg); |
| + Context.insert<InstFakeUse>(V0); |
| + break; |
| + } |
| default: |
| - UnimplementedLoweringError(this, Instr); |
| + llvm::report_fatal_error("Ret: Invalid type."); |
| + break; |
| } |
| } |
| _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg); |
| @@ -3265,6 +3753,14 @@ void TargetMIPS32::lowerStore(const InstStore *Instr) { |
| Variable *ValueLo = legalizeToReg(loOperand(Value)); |
| _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr))); |
| _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr))); |
| + } else if (isVectorType(Value->getType())) { |
| + auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value); |
| + for (SizeT i = 0; i < DataVec->ElementsPerContainer; ++i) { |
| + auto *DCont = legalizeToReg(DataVec->getContainers()[i]); |
| + auto *MCont = llvm::cast<OperandMIPS32Mem>( |
| + getOperandAtIndex(NewAddr, IceType_i32, i)); |
| + _sw(DCont, MCont); |
| + } |
| } else { |
| Variable *ValueR = legalizeToReg(Value); |
| _sw(ValueR, NewAddr); |
| @@ -3496,7 +3992,7 @@ Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) { |
| Type Ty = Src->getType(); |
| Variable *Reg = makeReg(Ty, RegNum); |
| if (isVectorType(Ty)) { |
| - UnimplementedError(getFlags()); |
| + llvm::report_fatal_error("Invalid copy from vector type."); |
| } else { |
| if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) { |
| _lw(Reg, Mem); |
| @@ -3568,6 +4064,11 @@ Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed, |
| } |
| if (llvm::isa<Constant>(From)) { |
| + if (llvm::isa<ConstantUndef>(From)) { |
| + From = legalizeUndef(From, RegNum); |
| + if (isVectorType(Ty)) |
| + return From; |
| + } |
| if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
| (void)C; |
| // TODO(reed kotler): complete this case for proper implementation |
| @@ -3576,23 +4077,15 @@ Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed, |
| return Reg; |
| } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { |
| const uint32_t Value = C32->getValue(); |
| - // Check if the immediate will fit in a Flexible second operand, |
| - // if a Flexible second operand is allowed. We need to know the exact |
| - // value, so that rules out relocatable constants. |
| - // Also try the inverse and use MVN if possible. |
| - // Do a movw/movt to a register. |
| - Variable *Reg; |
| - if (RegNum.hasValue()) |
| - Reg = getPhysicalRegister(RegNum); |
| - else |
| - Reg = makeReg(Ty, RegNum); |
| + // Use addiu if the immediate is a 16bit value. Otherwise load it |
| + // using a lui-ori instructions. |
| + Variable *Reg = makeReg(Ty, RegNum); |
| if (isInt<16>(int32_t(Value))) { |
| Variable *Zero = getPhysicalRegister(RegMIPS32::Reg_ZERO, Ty); |
| Context.insert<InstFakeDef>(Zero); |
| _addiu(Reg, Zero, Value); |
| } else { |
| uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
| - (void)UpperBits; |
| uint32_t LowerBits = Value & 0xFFFF; |
| Variable *TReg = makeReg(Ty, RegNum); |
| if (LowerBits) { |