Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 2669fcfb837f25e96c8c02ae000fe3548635308c..f8c38b22ab3e205cc00611537545386e396e883e 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -874,6 +874,275 @@ TargetX86Base<TraitsType>::stackVarToAsmOperand(const Variable *Var) const { |
| AssemblerFixup::NoFixup); |
| } |
| +template <typename TraitsType> |
| +void TargetX86Base<TraitsType>::addProlog(CfgNode *Node) { |
| + // Stack frame layout: |
| + // |
| + // +------------------------+ |
| + // | 1. return address | |
| + // +------------------------+ |
| + // | 2. preserved registers | |
| + // +------------------------+ |
| + // | 3. padding | |
| + // +------------------------+ |
| + // | 4. global spill area | |
| + // +------------------------+ |
| + // | 5. padding | |
| + // +------------------------+ |
| + // | 6. local spill area | |
| + // +------------------------+ |
| + // | 7. padding | |
| + // +------------------------+ |
| + // | 8. allocas | |
| + // +------------------------+ |
| + // | 9. padding | |
| + // +------------------------+ |
| + // | 10. out args | |
| + // +------------------------+ <--- StackPointer |
| + // |
| + // The following variables record the size in bytes of the given areas: |
| + // * X86_RET_IP_SIZE_BYTES: area 1 |
| + // * PreservedRegsSizeBytes: area 2 |
| + // * SpillAreaPaddingBytes: area 3 |
| + // * GlobalsSize: area 4 |
| + // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
| + // * LocalsSpillAreaSize: area 6 |
| + // * SpillAreaSizeBytes: areas 3 - 10 |
| + // * maxOutArgsSizeBytes(): area 10 |
| + |
| + // Determine stack frame offsets for each Variable without a register |
| + // assignment. This can be done as one variable per stack slot. Or, do |
| + // coalescing by running the register allocator again with an infinite set of |
| + // registers (as a side effect, this gives variables a second chance at |
| + // physical register assignment). |
| + // |
| + // A middle ground approach is to leverage sparsity and allocate one block of |
| + // space on the frame for globals (variables with multi-block lifetime), and |
| + // one block to share for locals (single-block lifetime). |
| + |
| + Context.init(Node); |
| + Context.setInsertPoint(Context.getCur()); |
| + |
| + llvm::SmallBitVector CalleeSaves = |
| + getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| + RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
| + VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; |
| + size_t GlobalsSize = 0; |
| + // If there is a separate locals area, this represents that area. Otherwise |
| + // it counts any variable not counted by GlobalsSize. |
| + SpillAreaSizeBytes = 0; |
| + // If there is a separate locals area, this specifies the alignment for it. |
| + uint32_t LocalsSlotsAlignmentBytes = 0; |
| + // The entire spill locations area gets aligned to largest natural alignment |
| + // of the variables that have a spill slot. |
| + uint32_t SpillAreaAlignmentBytes = 0; |
| + // A spill slot linked to a variable with a stack slot should reuse that |
| + // stack slot. |
| + std::function<bool(Variable *)> TargetVarHook = |
| + [&VariablesLinkedToSpillSlots](Variable *Var) { |
| + if (auto *SpillVar = |
| + llvm::dyn_cast<typename Traits::SpillVariable>(Var)) { |
| + assert(Var->mustNotHaveReg()); |
| + if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { |
| + VariablesLinkedToSpillSlots.push_back(Var); |
| + return true; |
| + } |
| + } |
| + return false; |
| + }; |
| + |
| + // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| + getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| + &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| + &LocalsSlotsAlignmentBytes, TargetVarHook); |
| + uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| + SpillAreaSizeBytes += GlobalsSize; |
| + |
| + // Add push instructions for preserved registers. |
| + uint32_t NumCallee = 0; |
| + size_t PreservedRegsSizeBytes = 0; |
| + llvm::SmallBitVector Pushed(CalleeSaves.size()); |
| + for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| + const int32_t Canonical = Traits::getBaseReg(i); |
| + assert(Canonical == Traits::getBaseReg(Canonical)); |
| + if (CalleeSaves[i] && RegsUsed[i]) { |
| + Pushed[Canonical] = true; |
| + } |
| + } |
| + for (SizeT i = 0; i < Pushed.size(); ++i) { |
| + if (!Pushed[i]) |
| + continue; |
| + assert(static_cast<int32_t>(i) == Traits::getBaseReg(i)); |
| + ++NumCallee; |
| + PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType); |
| + _push_reg(getPhysicalRegister(i, Traits::WordType)); |
| + } |
| + Ctx->statsUpdateRegistersSaved(NumCallee); |
| + |
| + // Generate "push frameptr; mov frameptr, stackptr" |
| + if (IsEbpBasedFrame) { |
| + assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
| + .count() == 0); |
| + PreservedRegsSizeBytes += typeWidthInBytes(Traits::WordType); |
| + _link_bp(); |
| + } |
| + |
| + // Align the variables area. SpillAreaPaddingBytes is the size of the region |
| + // after the preserved registers and before the spill areas. |
| + // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
| + // locals area if they are separate. |
| + assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); |
| + assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| + uint32_t SpillAreaPaddingBytes = 0; |
| + uint32_t LocalsSlotsPaddingBytes = 0; |
| + alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, |
| + SpillAreaAlignmentBytes, GlobalsSize, |
| + LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, |
| + &LocalsSlotsPaddingBytes); |
| + SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| + uint32_t GlobalsAndSubsequentPaddingSize = |
| + GlobalsSize + LocalsSlotsPaddingBytes; |
| + |
| + // Functions returning scalar floating point types may need to convert values |
| + // from an in-register xmm value to the top of the x87 floating point stack. |
| + // This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch |
| + // space on the stack for this. |
| + const Type ReturnType = Func->getReturnType(); |
| + if (!Traits::X86_PASS_SCALAR_FP_IN_XMM && isScalarFloatingType(ReturnType)) { |
|
John
2016/01/21 15:04:52
optional: I personally prefer
if (!Traits:BLAH) {
sehr
2016/01/21 16:02:10
Kernighan's "Elements of Programming Style" teache
|
| + // Avoid misaligned double-precicion load/store. |
| + NeedsStackAlignment = true; |
| + SpillAreaSizeBytes = |
| + std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); |
| + } |
| + |
| + // Align esp if necessary. |
| + if (NeedsStackAlignment) { |
| + uint32_t StackOffset = |
| + Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| + uint32_t StackSize = |
| + Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| + StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes()); |
| + SpillAreaSizeBytes = StackSize - StackOffset; |
| + } else { |
| + SpillAreaSizeBytes += maxOutArgsSizeBytes(); |
| + } |
| + |
| + // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the |
| + // fixed allocations in the prolog. |
| + if (PrologEmitsFixedAllocas) |
| + SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| + if (SpillAreaSizeBytes) { |
| + // Generate "sub stackptr, SpillAreaSizeBytes" |
| + _sub_sp(Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| + // If the fixed allocas are aligned more than the stack frame, align the |
| + // stack pointer accordingly. |
| + if (PrologEmitsFixedAllocas && |
| + FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { |
| + assert(IsEbpBasedFrame); |
| + _and(getPhysicalRegister(getStackReg(), Traits::WordType), |
| + Ctx->getConstantInt32(-FixedAllocaAlignBytes)); |
| + } |
| + } |
| + |
| + // Account for known-frame-offset alloca instructions that were not already |
| + // combined into the prolog. |
| + if (!PrologEmitsFixedAllocas) |
| + SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| + |
| + Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| + |
| + // Fill in stack offsets for stack args, and copy args into registers for |
| + // those that were register-allocated. Args are pushed right to left, so |
| + // Arg[0] is closest to the stack/frame pointer. |
| + Variable *FramePtr = |
| + getPhysicalRegister(getFrameOrStackReg(), Traits::WordType); |
| + size_t BasicFrameOffset = |
| + PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; |
| + if (!IsEbpBasedFrame) |
| + BasicFrameOffset += SpillAreaSizeBytes; |
| + |
| + emitGetIP(Node); |
| + |
| + const VarList &Args = Func->getArgs(); |
| + size_t InArgsSizeBytes = 0; |
| + unsigned NumXmmArgs = 0; |
| + unsigned NumGPRArgs = 0; |
| + for (Variable *Arg : Args) { |
| + // Skip arguments passed in registers. |
| + if (isVectorType(Arg->getType())) { |
| + if (Traits::getRegisterForXmmArgNum(NumXmmArgs) != Variable::NoRegister) { |
| + ++NumXmmArgs; |
| + continue; |
| + } |
| + } else if (isScalarFloatingType(Arg->getType())) { |
| + if (Traits::X86_PASS_SCALAR_FP_IN_XMM && |
| + Traits::getRegisterForXmmArgNum(NumXmmArgs) != Variable::NoRegister) { |
| + ++NumXmmArgs; |
| + continue; |
| + } |
| + } else { |
| + assert(isScalarIntegerType(Arg->getType())); |
| + if (Traits::getRegisterForGprArgNum(Traits::WordType, NumGPRArgs) != |
| + Variable::NoRegister) { |
| + ++NumGPRArgs; |
| + continue; |
| + } |
| + } |
| + // For esp-based frames where the allocas are done outside the prolog, the |
| + // esp value may not stabilize to its home value until after all the |
| + // fixed-size alloca instructions have executed. In this case, a stack |
| + // adjustment is needed when accessing in-args in order to copy them into |
| + // registers. |
| + size_t StackAdjBytes = 0; |
| + if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas) |
| + StackAdjBytes -= FixedAllocaSizeBytes; |
| + finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes, |
| + InArgsSizeBytes); |
| + } |
| + |
| + // Fill in stack offsets for locals. |
| + assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| + SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| + IsEbpBasedFrame); |
| + // Assign stack offsets to variables that have been linked to spilled |
| + // variables. |
| + for (Variable *Var : VariablesLinkedToSpillSlots) { |
| + Variable *Linked = |
| + (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); |
| + Var->setStackOffset(Linked->getStackOffset()); |
| + } |
| + this->HasComputedFrame = true; |
| + |
| + if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
| + OstreamLocker L(Func->getContext()); |
| + Ostream &Str = Func->getContext()->getStrDump(); |
| + |
| + Str << "Stack layout:\n"; |
| + uint32_t EspAdjustmentPaddingSize = |
| + SpillAreaSizeBytes - LocalsSpillAreaSize - |
| + GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
| + maxOutArgsSizeBytes(); |
| + Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| + << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| + << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| + << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| + << " globals spill area = " << GlobalsSize << " bytes\n" |
| + << " globals-locals spill areas intermediate padding = " |
| + << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| + << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| + << " esp alignment padding = " << EspAdjustmentPaddingSize |
| + << " bytes\n"; |
| + |
| + Str << "Stack details:\n" |
| + << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| + << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| + << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n" |
| + << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| + << " bytes\n" |
| + << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| + } |
| +} |
| + |
| /// Helper function for addProlog(). |
| /// |
| /// This assumes Arg is an argument passed on the stack. This sets the frame |
| @@ -920,6 +1189,63 @@ void TargetX86Base<TraitsType>::finishArgumentLowering( |
| } |
| } |
| +template <typename TraitsType> |
| +void TargetX86Base<TraitsType>::addEpilog(CfgNode *Node) { |
| + InstList &Insts = Node->getInsts(); |
| + InstList::reverse_iterator RI, E; |
| + for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| + if (llvm::isa<typename Traits::Insts::Ret>(*RI)) |
| + break; |
| + } |
| + if (RI == E) |
| + return; |
| + |
| + // Convert the reverse_iterator position into its corresponding (forward) |
| + // iterator position. |
| + InstList::iterator InsertPoint = RI.base(); |
| + --InsertPoint; |
| + Context.init(Node); |
| + Context.setInsertPoint(InsertPoint); |
| + |
| + if (IsEbpBasedFrame) { |
| + _unlink_bp(); |
| + } else { |
| + // add stackptr, SpillAreaSizeBytes |
| + if (SpillAreaSizeBytes != 0) { |
| + _add_sp(Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| + } |
| + } |
| + |
| + // Add pop instructions for preserved registers. |
| + llvm::SmallBitVector CalleeSaves = |
| + getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| + llvm::SmallBitVector Popped(CalleeSaves.size()); |
| + for (int32_t i = CalleeSaves.size() - 1; i >= 0; --i) { |
| + if (static_cast<SizeT>(i) == getFrameReg() && IsEbpBasedFrame) |
| + continue; |
| + const SizeT Canonical = Traits::getBaseReg(i); |
| + if (CalleeSaves[i] && RegsUsed[i]) { |
| + Popped[Canonical] = true; |
| + } |
| + } |
| + for (int32_t i = Popped.size() - 1; i >= 0; --i) { |
| + if (!Popped[i]) |
| + continue; |
| + assert(i == Traits::getBaseReg(i)); |
| + _pop(getPhysicalRegister(i, Traits::WordType)); |
| + } |
| + |
| + if (!NeedSandboxing) { |
| + return; |
| + } |
| + emitSandboxedReturn(); |
| + if (RI->getSrcSize()) { |
| + auto *RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| + Context.insert<InstFakeUse>(RetValue); |
| + } |
| + RI->setDeleted(); |
| +} |
| + |
| template <typename TraitsType> Type TargetX86Base<TraitsType>::stackSlotType() { |
| return Traits::WordType; |
| } |