Index: src/IceTargetLoweringX86BaseImpl.h |
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
index 90df3deea326ed2cb31f11e4bf940fac6d4f35d3..c392e038f46139516278d2ca09164b1863b81b92 100644 |
--- a/src/IceTargetLoweringX86BaseImpl.h |
+++ b/src/IceTargetLoweringX86BaseImpl.h |
@@ -1096,6 +1096,67 @@ void TargetX86Base<TraitsType>::lowerAlloca(const InstAlloca *Inst) { |
} |
} |
+template <typename TraitsType> |
+void TargetX86Base<TraitsType>::lowerArguments() { |
+ VarList &Args = Func->getArgs(); |
+ unsigned NumXmmArgs = 0; |
+ bool XmmSlotsRemain = true; |
+ unsigned NumGprArgs = 0; |
+ bool GprSlotsRemain = true; |
+ |
+ Context.init(Func->getEntryNode()); |
+ Context.setInsertPoint(Context.getCur()); |
+ |
+ for (SizeT i = 0, End = Args.size(); |
+ i < End && (XmmSlotsRemain || GprSlotsRemain); ++i) { |
+ Variable *Arg = Args[i]; |
+ Type Ty = Arg->getType(); |
+ Variable *RegisterArg = nullptr; |
+ int32_t RegNum = Variable::NoRegister; |
+ if (isVectorType(Ty)) { |
+ RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs); |
+ if (RegNum == Variable::NoRegister) { |
+ XmmSlotsRemain = false; |
+ continue; |
+ } |
+ ++NumXmmArgs; |
+ RegisterArg = Func->makeVariable(Ty); |
+ } else if (isScalarFloatingType(Ty)) { |
+ if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { |
+ continue; |
+ } |
+ RegNum = Traits::getRegisterForXmmArgNum(NumXmmArgs); |
+ if (RegNum == Variable::NoRegister) { |
+ XmmSlotsRemain = false; |
+ continue; |
+ } |
+ ++NumXmmArgs; |
+ RegisterArg = Func->makeVariable(Ty); |
+ } else if (isScalarIntegerType(Ty)) { |
+ RegNum = Traits::getRegisterForGprArgNum(Ty, NumGprArgs); |
+ if (RegNum == Variable::NoRegister) { |
+ GprSlotsRemain = false; |
+ continue; |
+ } |
+ ++NumGprArgs; |
+ RegisterArg = Func->makeVariable(Ty); |
+ } |
+ assert(RegNum != Variable::NoRegister); |
+ assert(RegisterArg != nullptr); |
+ // Replace Arg in the argument list with the home register. Then generate |
+ // an instruction in the prolog to copy the home register to the assigned |
+ // location of Arg. |
+ if (BuildDefs::dump()) |
+ RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
+ RegisterArg->setRegNum(RegNum); |
+ RegisterArg->setIsArg(); |
+ Arg->setIsArg(false); |
+ |
+ Args[i] = RegisterArg; |
+ Context.insert<InstAssign>(Arg, RegisterArg); |
+ } |
+} |
+ |
/// Strength-reduce scalar integer multiplication by a constant (for i32 or |
/// narrower) for certain constants. The lea instruction can be used to multiply |
/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of |
@@ -2028,6 +2089,214 @@ void TargetX86Base<TraitsType>::lowerBr(const InstBr *Br) { |
_br(Traits::Cond::Br_ne, Br->getTargetTrue(), Br->getTargetFalse()); |
} |
+namespace { |
+// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining |
+// OperandList in lowerCall. std::max() is supposed to work, but it doesn't. |
+constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; } |
+// |
+} // end of anonymous namespace |
+ |
+template <typename TraitsType> |
+void TargetX86Base<TraitsType>::lowerCall(const InstCall *Instr) { |
+ // Common x86 calling convention lowering: |
+ // |
+ // * At the point before the call, the stack must be aligned to 16 bytes. |
+ // |
+ // * Non-register arguments are pushed onto the stack in right-to-left order, |
+ // such that the left-most argument ends up on the top of the stack at the |
+ // lowest memory address. |
+ // |
+ // * Stack arguments of vector type are aligned to start at the next highest |
+ // multiple of 16 bytes. Other stack arguments are aligned to the next word |
+ // size boundary (4 or 8 bytes, respectively). |
+ NeedsStackAlignment = true; |
+ |
+ using OperandList = |
+ llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, |
+ Traits::X86_MAX_GPR_ARGS)>; |
+ OperandList XmmArgs; |
+ CfgVector<std::pair<const Type, Operand *>> GprArgs; |
+ OperandList StackArgs, StackArgLocations; |
+ int32_t ParameterAreaSizeBytes = 0; |
Jim Stichnoth
2016/01/19 20:54:06
Make this uint32_t instead.
sehr
2016/01/19 21:47:35
Done.
|
+ |
+ // Classify each argument operand according to the location where the |
Jim Stichnoth
2016/01/19 20:54:05
reflow to 80-col
sehr
2016/01/19 21:47:35
Done.
|
+ // argument is passed. |
+ for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
+ Operand *Arg = Instr->getArg(i); |
+ Type Ty = Arg->getType(); |
Jim Stichnoth
2016/01/19 20:54:06
const
sehr
2016/01/19 21:47:35
Done.
|
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
+ assert(typeWidthInBytes(Ty) >= 4); |
+ if (isVectorType(Ty) && (Traits::getRegisterForXmmArgNum(XmmArgs.size()) != |
+ Variable::NoRegister)) { |
+ XmmArgs.push_back(Arg); |
+ } else if (isScalarFloatingType(Ty) && Traits::X86_PASS_SCALAR_FP_IN_XMM && |
+ (Traits::getRegisterForXmmArgNum(0) != Variable::NoRegister)) { |
+ XmmArgs.push_back(Arg); |
+ } else if (isScalarIntegerType(Ty) && |
+ (Traits::getRegisterForGprArgNum(Ty, GprArgs.size()) != |
+ Variable::NoRegister)) { |
+ GprArgs.emplace_back(Ty, Arg); |
+ } else { |
+ // Place on stack. |
+ StackArgs.push_back(Arg); |
+ if (isVectorType(Arg->getType())) { |
+ ParameterAreaSizeBytes = |
+ Traits::applyStackAlignment(ParameterAreaSizeBytes); |
+ } |
+ Variable *esp = getPhysicalRegister(getStackReg(), Traits::WordType); |
+ Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
+ StackArgLocations.push_back( |
+ Traits::X86OperandMem::create(Func, Ty, esp, Loc)); |
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
+ } |
+ } |
+ // Ensure there is enough space for the fstp/movs for floating returns. |
+ Variable *Dest = Instr->getDest(); |
Jim Stichnoth
2016/01/19 20:54:06
Add something like:
const Type DestTy = Dest ? D
sehr
2016/01/19 21:47:35
Done.
|
+ if (Traits::X86_PASS_SCALAR_FP_IN_XMM) { |
+ if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
+ ParameterAreaSizeBytes = |
+ std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
+ typeWidthInBytesOnStack(Dest->getType())); |
+ } |
+ } |
+ // Adjust the parameter area so that the stack is aligned. It is assumed that |
+ // the stack is already aligned at the start of the calling sequence. |
+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
+ assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= |
Jim Stichnoth
2016/01/19 20:54:06
Remove this static_cast, assuming ParameterAreaSiz
sehr
2016/01/19 21:47:35
Done.
|
+ maxOutArgsSizeBytes()); |
+ // Copy arguments that are passed on the stack to the appropriate stack |
+ // locations. |
+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
Jim Stichnoth
2016/01/19 20:54:06
Use NumStackArgs instead of "e", for consistency w
sehr
2016/01/19 21:47:35
Done.
|
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
+ } |
+ // Copy arguments to be passed in registers to the appropriate registers. |
+ // TODO: Investigate the impact of lowering arguments passed in registers |
Jim Stichnoth
2016/01/19 20:54:06
I think this TODO should just be removed entirely.
sehr
2016/01/19 21:47:35
Done.
|
+ // after lowering stack arguments as opposed to the other way around. |
+ // Lowering register arguments after stack arguments may reduce register |
+ // pressure. On the other hand, lowering register arguments first (before |
+ // stack arguments) may result in more compact code, as the memory operand |
+ // displacements may end up being smaller before any stack adjustment is |
+ // done. |
+ for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
+ Variable *Reg = |
+ legalizeToReg(XmmArgs[i], Traits::getRegisterForXmmArgNum(i)); |
+ // Generate a FakeUse of register arguments so that they do not get dead |
+ // code eliminated as a result of the FakeKill of scratch registers after |
+ // the call. |
+ Context.insert<InstFakeUse>(Reg); |
+ } |
+ // Materialize moves for arguments passed in GPRs. |
+ for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { |
+ const Type SignatureTy = GprArgs[i].first; |
+ Operand *Arg = GprArgs[i].second; |
+ Variable *Reg = |
+ legalizeToReg(Arg, Traits::getRegisterForGprArgNum(Arg->getType(), i)); |
+ assert(SignatureTy == IceType_i64 || SignatureTy == IceType_i32); |
+ assert(SignatureTy == Arg->getType()); |
+ (void)SignatureTy; |
+ Context.insert<InstFakeUse>(Reg); |
+ } |
+ // Generate the call instruction. Assign its result to a temporary with high |
+ // register allocation weight. |
+ // ReturnReg doubles as ReturnRegLo as necessary. |
+ Variable *ReturnReg = nullptr; |
+ Variable *ReturnRegHi = nullptr; |
+ if (Dest) { |
+ const Type DestTy = Dest->getType(); |
+ switch (DestTy) { |
+ case IceType_NUM: |
+ case IceType_void: |
+ case IceType_i1: |
+ case IceType_i8: |
+ case IceType_i16: |
+ llvm::report_fatal_error("Invalid Call dest type"); |
+ break; |
+ case IceType_i32: |
+ ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_eax); |
+ break; |
+ case IceType_i64: |
+ if (Traits::Is64Bit) { |
+ ReturnReg = makeReg( |
+ IceType_i64, |
+ Traits::getGprForType(IceType_i64, Traits::RegisterSet::Reg_eax)); |
+ } else { |
+ ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
+ ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
+ } |
+ break; |
+ case IceType_f32: |
+ case IceType_f64: |
+ if (!Traits::X86_PASS_SCALAR_FP_IN_XMM) { |
+ // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with |
+ // the fstp instruction. |
+ break; |
+ } |
+ // Fallthrough intended. |
+ case IceType_v4i1: |
+ case IceType_v8i1: |
+ case IceType_v16i1: |
+ case IceType_v16i8: |
+ case IceType_v8i16: |
+ case IceType_v4i32: |
+ case IceType_v4f32: |
+ ReturnReg = makeReg(DestTy, Traits::RegisterSet::Reg_xmm0); |
+ break; |
+ } |
+ } |
+ // Emit the call to the function. |
+ Operand *CallTarget = |
+ legalize(Instr->getCallTarget(), Legal_Reg | Legal_Imm | Legal_AddrAbs); |
+ Inst *NewCall = emitCallToTarget(CallTarget, ReturnReg); |
+ // Keep the upper return register live on 32-bit platform. |
+ if (ReturnRegHi) |
+ Context.insert<InstFakeDef>(ReturnRegHi); |
+ // Mark the call as killing all the callee-saves registers. |
Jim Stichnoth
2016/01/19 20:54:06
caller-save
sehr
2016/01/19 21:47:35
Duh. Fixed.
|
+ Context.insert<InstFakeKill>(NewCall); |
+ // Handle x86-32 floating point returns. |
+ if (Dest != nullptr && isScalarFloatingType(Dest->getType()) && |
+ !Traits::X86_PASS_SCALAR_FP_IN_XMM) { |
+ // Special treatment for an FP function which returns its result in st(0). |
+ // If Dest ends up being a physical xmm register, the fstp emit code will |
+ // route st(0) through the space reserved in the function argument area |
+ // we allocated. |
+ _fstp(Dest); |
+ // Create a fake use of Dest in case it actually isn't used, because st(0) |
+ // still needs to be popped. |
+ Context.insert<InstFakeUse>(Dest); |
+ } |
+ // Generate a FakeUse to keep the call live if necessary. |
+ if (Instr->hasSideEffects() && ReturnReg) { |
+ Context.insert<InstFakeUse>(ReturnReg); |
+ } |
+ // Process the return value, if any. |
+ if (!Dest) |
Jim Stichnoth
2016/01/19 20:54:06
Dest == nullptr
sehr
2016/01/19 21:47:36
Done.
|
+ return; |
+ // Assign the result of the call to Dest. |
+ const Type DestTy = Dest->getType(); |
+ if (isVectorType(DestTy)) { |
+ assert(ReturnReg && "Vector type requires a return register"); |
+ _movp(Dest, ReturnReg); |
+ } else if (isScalarFloatingType(DestTy)) { |
+ if (Traits::X86_PASS_SCALAR_FP_IN_XMM) { |
+ assert(ReturnReg && "FP type requires a return register"); |
+ _mov(Dest, ReturnReg); |
+ } |
+ } else { |
+ assert(isScalarIntegerType(DestTy)); |
+ assert(ReturnReg && "Integer type requires a return register"); |
+ if (DestTy == IceType_i64 && !Traits::Is64Bit) { |
+ assert(ReturnRegHi && "64-bit type requires two return registers"); |
+ auto *Dest64On32 = llvm::cast<Variable64On32>(Dest); |
+ Variable *DestLo = Dest64On32->getLo(); |
+ Variable *DestHi = Dest64On32->getHi(); |
+ _mov(DestLo, ReturnReg); |
+ _mov(DestHi, ReturnRegHi); |
+ } else { |
+ _mov(Dest, ReturnReg); |
+ } |
+ } |
+} |
+ |
template <typename TraitsType> |
void TargetX86Base<TraitsType>::lowerCast(const InstCast *Inst) { |
// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) |
@@ -4821,6 +5090,25 @@ void TargetX86Base<TraitsType>::lowerPhi(const InstPhi * /*Inst*/) { |
} |
template <typename TraitsType> |
+void TargetX86Base<TraitsType>::lowerRet(const InstRet *Inst) { |
+ Variable *Reg = nullptr; |
+ if (Inst->hasRetValue()) { |
+ Operand *RetValue = legalize(Inst->getRetValue()); |
+ const Type ReturnType = RetValue->getType(); |
+ assert(isVectorType(ReturnType) || isScalarFloatingType(ReturnType) || |
+ (ReturnType == IceType_i32) || (ReturnType == IceType_i64)); |
+ Reg = moveReturnValueToRegister(RetValue, ReturnType); |
+ } |
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog |
+ // explicitly looks for a ret instruction as a marker for where to insert the |
+ // frame removal instructions. |
+ _ret(Reg); |
+ // Add a fake use of esp to make sure esp stays alive for the entire |
+ // function. Otherwise post-call esp adjustments get dead-code eliminated. |
+ keepEspLiveAtExit(); |
+} |
+ |
+template <typename TraitsType> |
void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) { |
Variable *Dest = Select->getDest(); |