src/IceTargetLoweringX8632.cpp - Issue 1616483003: Merged addProlog and addEpilog on x86.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 1616483003: Merged addProlog and addEpilog on x86. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index c154901f9ce6deded22d4001ad139172810b5160..e9164f624504a741752a2dd22d1150debc44ff7f 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -141,6 +141,67 @@ void TargetX8632::_sub_sp(Operand *Adjustment) {

_sub(esp, Adjustment);

}

+void TargetX8632::_link_bp() {

+ Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

+ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ _push(ebp);

+ _mov(ebp, esp);

+ // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).

+ Context.insert<InstFakeUse>(ebp);

+void TargetX8632::_unlink_bp() {

+ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake

+ // use of esp before the assignment of esp=ebp keeps previous esp

+ // adjustments from being dead-code eliminated.

+ Context.insert<InstFakeUse>(esp);

+ _mov(esp, ebp);

+ _pop(ebp);

+void TargetX8632::_push_reg(Variable *Reg) { _push(Reg); }

+void TargetX8632::emitGetIP(CfgNode *Node) {

+ // If there is a non-deleted InstX86GetIP instruction, we need to move it to

+ // the point after the stack frame has stabilized but before

+ // register-allocated in-args are copied into their home registers. It would

+ // be slightly faster to search for the GetIP instruction before other prolog

+ // instructions are inserted, but it's more clear to do the whole

+ // transformation in a single place.

+ Traits::Insts::GetIP *GetIPInst = nullptr;

+ if (Ctx->getFlags().getUseNonsfi()) {

+ for (Inst &Instr : Node->getInsts()) {

+ if (auto *GetIP = llvm::dyn_cast<Traits::Insts::GetIP>(&Instr)) {

+ if (!Instr.isDeleted())

+ GetIPInst = GetIP;

+ break;

+ }

+ // Delete any existing InstX86GetIP instruction and reinsert it here. Also,

+ // insert the call to the helper function and the spill to the stack, to

+ // simplify emission.

+ if (GetIPInst) {

+ GetIPInst->setDeleted();

+ Variable *Dest = GetIPInst->getDest();

+ Variable *CallDest =

+ Dest->hasReg() ? Dest

+ : getPhysicalRegister(Traits::RegisterSet::Reg_eax);

+ // Call the getIP_<reg> helper.

+ IceString RegName = Traits::getRegName(CallDest->getRegNum());

+ Constant *CallTarget = Ctx->getConstantExternSym(H_getIP_prefix + RegName);

+ Context.insert<Traits::Insts::Call>(CallDest, CallTarget);

+ // Insert a new version of InstX86GetIP.

+ Context.insert<Traits::Insts::GetIP>(CallDest);

+ // Spill the register to its home stack location if necessary.

+ if (!Dest->hasReg()) {

+ _mov(Dest, CallDest);

+ }

void TargetX8632::lowerIndirectJump(Variable *JumpTarget) {

AutoBundle _(this);

@@ -195,352 +256,7 @@ Variable *TargetX8632::moveReturnValueToRegister(Operand *Value,

}

-void TargetX8632::addProlog(CfgNode *Node) {

- // Stack frame layout:

- //

- // +------------------------+

- // | 1. return address |

- // +------------------------+

- // | 2. preserved registers |

- // +------------------------+

- // | 3. padding |

- // +------------------------+

- // | 4. global spill area |

- // +------------------------+

- // | 5. padding |

- // +------------------------+

- // | 6. local spill area |

- // +------------------------+

- // | 7. padding |

- // +------------------------+

- // | 8. allocas |

- // +------------------------+

- // | 9. padding |

- // +------------------------+

- // | 10. out args |

- // +------------------------+ <--- StackPointer

- //

- // The following variables record the size in bytes of the given areas:

- // * X86_RET_IP_SIZE_BYTES: area 1

- // * PreservedRegsSizeBytes: area 2

- // * SpillAreaPaddingBytes: area 3

- // * GlobalsSize: area 4

- // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

- // * LocalsSpillAreaSize: area 6

- // * SpillAreaSizeBytes: areas 3 - 10

- // * maxOutArgsSizeBytes(): area 10

- // Determine stack frame offsets for each Variable without a register

- // assignment. This can be done as one variable per stack slot. Or, do

- // coalescing by running the register allocator again with an infinite set of

- // registers (as a side effect, this gives variables a second chance at

- // physical register assignment).

- //

- // A middle ground approach is to leverage sparsity and allocate one block of

- // space on the frame for globals (variables with multi-block lifetime), and

- // one block to share for locals (single-block lifetime).

- Context.init(Node);

- Context.setInsertPoint(Context.getCur());

- llvm::SmallBitVector CalleeSaves =

- getRegisterSet(RegSet_CalleeSave, RegSet_None);

- RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

- VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;

- size_t GlobalsSize = 0;

- // If there is a separate locals area, this represents that area. Otherwise

- // it counts any variable not counted by GlobalsSize.

- SpillAreaSizeBytes = 0;

- // If there is a separate locals area, this specifies the alignment for it.

- uint32_t LocalsSlotsAlignmentBytes = 0;

- // The entire spill locations area gets aligned to largest natural alignment

- // of the variables that have a spill slot.

- uint32_t SpillAreaAlignmentBytes = 0;

- // A spill slot linked to a variable with a stack slot should reuse that

- // stack slot.

- std::function<bool(Variable *)> TargetVarHook =

- [&VariablesLinkedToSpillSlots](Variable *Var) {

- if (auto *SpillVar =

- llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {

- assert(Var->mustNotHaveReg());

- if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

- VariablesLinkedToSpillSlots.push_back(Var);

- return true;

- }

- return false;

- };

- // Compute the list of spilled variables and bounds for GlobalsSize, etc.

- getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

- &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

- &LocalsSlotsAlignmentBytes, TargetVarHook);

- uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

- SpillAreaSizeBytes += GlobalsSize;

- // Add push instructions for preserved registers.

- uint32_t NumCallee = 0;

- size_t PreservedRegsSizeBytes = 0;

- llvm::SmallBitVector Pushed(CalleeSaves.size());

- for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

- SizeT Canonical = Traits::getBaseReg(i);

- if (CalleeSaves[i] && RegsUsed[i]) {

- Pushed[Canonical] = true;

- }

- for (SizeT i = 0; i < Pushed.size(); ++i) {

- if (Pushed[i]) {

- ++NumCallee;

- PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

- _push(getPhysicalRegister(i));

- }

- Ctx->statsUpdateRegistersSaved(NumCallee);

- // Generate "push ebp; mov ebp, esp"

- if (IsEbpBasedFrame) {

- assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

- .count() == 0);

- PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

- Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

- Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

- _push(ebp);

- _mov(ebp, esp);

- // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode).

- Context.insert<InstFakeUse>(ebp);

- }

- // Align the variables area. SpillAreaPaddingBytes is the size of the region

- // after the preserved registers and before the spill areas.

- // LocalsSlotsPaddingBytes is the amount of padding between the globals and

- // locals area if they are separate.

- assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

- assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

- uint32_t SpillAreaPaddingBytes = 0;

- uint32_t LocalsSlotsPaddingBytes = 0;

- alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

- SpillAreaAlignmentBytes, GlobalsSize,

- LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

- &LocalsSlotsPaddingBytes);

- SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

- uint32_t GlobalsAndSubsequentPaddingSize =

- GlobalsSize + LocalsSlotsPaddingBytes;

- // Functions returning scalar floating point types may need to convert values

- // from an in-register xmm value to the top of the x87 floating point stack.

- // This is done by a movp[sd] and an fld[sd]. Ensure there is enough scratch

- // space on the stack for this.

- const Type ReturnType = Func->getReturnType();

- if (isScalarFloatingType(ReturnType)) {

- // Avoid misaligned double-precicion load/store.

- NeedsStackAlignment = true;

- SpillAreaSizeBytes =

- std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);

- }

- // Align esp if necessary.

- if (NeedsStackAlignment) {

- uint32_t StackOffset =

- Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

- uint32_t StackSize =

- Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

- StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());

- SpillAreaSizeBytes = StackSize - StackOffset;

- } else {

- SpillAreaSizeBytes += maxOutArgsSizeBytes();

- }

- // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the

- // fixed allocations in the prolog.

- if (PrologEmitsFixedAllocas)

- SpillAreaSizeBytes += FixedAllocaSizeBytes;

- if (SpillAreaSizeBytes) {

- // Generate "sub esp, SpillAreaSizeBytes"

- _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),

- Ctx->getConstantInt32(SpillAreaSizeBytes));

- // If the fixed allocas are aligned more than the stack frame, align the

- // stack pointer accordingly.

- if (PrologEmitsFixedAllocas &&

- FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {

- assert(IsEbpBasedFrame);

- _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),

- Ctx->getConstantInt32(-FixedAllocaAlignBytes));

- }

- // Account for known-frame-offset alloca instructions that were not already

- // combined into the prolog.

- if (!PrologEmitsFixedAllocas)

- SpillAreaSizeBytes += FixedAllocaSizeBytes;

- Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

- // Fill in stack offsets for stack args, and copy args into registers for

- // those that were register-allocated. Args are pushed right to left, so

- // Arg[0] is closest to the stack/frame pointer.

- Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

- size_t BasicFrameOffset =

- PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

- if (!IsEbpBasedFrame)

- BasicFrameOffset += SpillAreaSizeBytes;

- // If there is a non-deleted InstX86GetIP instruction, we need to move it to

- // the point after the stack frame has stabilized but before

- // register-allocated in-args are copied into their home registers. It would

- // be slightly faster to search for the GetIP instruction before other prolog

- // instructions are inserted, but it's more clear to do the whole

- // transformation in a single place.

- Traits::Insts::GetIP *GetIPInst = nullptr;

- if (Ctx->getFlags().getUseNonsfi()) {

- for (Inst &Instr : Node->getInsts()) {

- if (auto *GetIP = llvm::dyn_cast<Traits::Insts::GetIP>(&Instr)) {

- if (!Instr.isDeleted())

- GetIPInst = GetIP;

- break;

- }

- // Delete any existing InstX86GetIP instruction and reinsert it here. Also,

- // insert the call to the helper function and the spill to the stack, to

- // simplify emission.

- if (GetIPInst) {

- GetIPInst->setDeleted();

- Variable *Dest = GetIPInst->getDest();

- Variable *CallDest =

- Dest->hasReg() ? Dest

- : getPhysicalRegister(Traits::RegisterSet::Reg_eax);

- // Call the getIP_<reg> helper.

- IceString RegName = Traits::getRegName(CallDest->getRegNum());

- Constant *CallTarget = Ctx->getConstantExternSym(H_getIP_prefix + RegName);

- Context.insert<Traits::Insts::Call>(CallDest, CallTarget);

- // Insert a new version of InstX86GetIP.

- Context.insert<Traits::Insts::GetIP>(CallDest);

- // Spill the register to its home stack location if necessary.

- if (!Dest->hasReg()) {

- _mov(Dest, CallDest);

- }

- const VarList &Args = Func->getArgs();

- size_t InArgsSizeBytes = 0;

- unsigned NumXmmArgs = 0;

- for (Variable *Arg : Args) {

- // Skip arguments passed in registers.

- if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {

- ++NumXmmArgs;

- continue;

- }

- // For esp-based frames where the allocas are done outside the prolog, the

- // esp value may not stabilize to its home value until after all the

- // fixed-size alloca instructions have executed. In this case, a stack

- // adjustment is needed when accessing in-args in order to copy them into

- // registers.

- size_t StackAdjBytes = 0;

- if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)

- StackAdjBytes -= FixedAllocaSizeBytes;

- finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,

- InArgsSizeBytes);

- }

- // Fill in stack offsets for locals.

- assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

- SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

- IsEbpBasedFrame);

- // Assign stack offsets to variables that have been linked to spilled

- // variables.

- for (Variable *Var : VariablesLinkedToSpillSlots) {

- Variable *Linked =

- (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();

- Var->setStackOffset(Linked->getStackOffset());

- }

- this->HasComputedFrame = true;

- if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {

- OstreamLocker L(Func->getContext());

- Ostream &Str = Func->getContext()->getStrDump();

- Str << "Stack layout:\n";

- uint32_t EspAdjustmentPaddingSize =

- SpillAreaSizeBytes - LocalsSpillAreaSize -

- GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -

- maxOutArgsSizeBytes();

- Str << " in-args = " << InArgsSizeBytes << " bytes\n"

- << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"

- << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

- << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

- << " globals spill area = " << GlobalsSize << " bytes\n"

- << " globals-locals spill areas intermediate padding = "

- << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

- << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

- << " esp alignment padding = " << EspAdjustmentPaddingSize

- << " bytes\n";

- Str << "Stack details:\n"

- << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

- << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

- << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"

- << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

- << " bytes\n"

- << " is ebp based = " << IsEbpBasedFrame << "\n";

- }

-void TargetX8632::addEpilog(CfgNode *Node) {

- InstList &Insts = Node->getInsts();

- InstList::reverse_iterator RI, E;

- for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

- if (llvm::isa<typename Traits::Insts::Ret>(*RI))

- break;

- }

- if (RI == E)

- return;

- // Convert the reverse_iterator position into its corresponding (forward)

- // iterator position.

- InstList::iterator InsertPoint = RI.base();

- --InsertPoint;

- Context.init(Node);

- Context.setInsertPoint(InsertPoint);

- Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

- if (IsEbpBasedFrame) {

- Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

- // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake

- // use of esp before the assignment of esp=ebp keeps previous esp

- // adjustments from being dead-code eliminated.

- Context.insert<InstFakeUse>(esp);

- _mov(esp, ebp);

- _pop(ebp);

- } else {

- // add esp, SpillAreaSizeBytes

- if (SpillAreaSizeBytes)

- _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

- }

- // Add pop instructions for preserved registers.

- llvm::SmallBitVector CalleeSaves =

- getRegisterSet(RegSet_CalleeSave, RegSet_None);

- llvm::SmallBitVector Popped(CalleeSaves.size());

- for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

- SizeT Canonical = Traits::getBaseReg(i);

- if (CalleeSaves[i] && RegsUsed[i]) {

- Popped[Canonical] = true;

- }

- for (SizeT i = 0; i < Popped.size(); ++i) {

- SizeT j = Popped.size() - i - 1;

- SizeT Canonical = Traits::getBaseReg(j);

- if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)

- continue;

- if (Popped[j]) {

- _pop(getPhysicalRegister(Canonical));

- }

- if (!NeedSandboxing) {

- return;

- }

+void TargetX8632::emitSandboxedReturn() {

// Change the original ret instruction into a sandboxed return sequence.

// t:ecx = pop

// bundle_lock

@@ -551,11 +267,6 @@ void TargetX8632::addEpilog(CfgNode *Node) {

Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

_pop(T_ecx);

lowerIndirectJump(T_ecx);

- if (RI->getSrcSize()) {

- auto *RetValue = llvm::cast<Variable>(RI->getSrc(0));

- Context.insert<InstFakeUse>(RetValue);

- }

- RI->setDeleted();

}

void TargetX8632::emitJumpTable(const Cfg *Func,

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | src/IceTargetLoweringX8664.cpp » ('J')