src/IceTargetLoweringX8632.cpp - Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering.

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments. Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

index 8adfab218b9955444b5f9ef855e71c02d260da47..6724a6189f31e75cd09283710f7c14d26addd1ab 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX8632.cpp

@@ -89,6 +89,563 @@ const char *MachineTraits<TargetX8632>::TargetName = "X8632";

} // end of namespace X86Internal

+//------------------------------------------------------------------------------

+// __ ______ __ __ ______ ______ __ __ __ ______

+// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

+// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

+// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

+// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

+//

+//------------------------------------------------------------------------------

+void TargetX8632::lowerCall(const InstCall *Instr) {

+ // x86-32 calling convention:

+ //

+ // * At the point before the call, the stack must be aligned to 16

+ // bytes.

+ //

+ // * The first four arguments of vector type, regardless of their

+ // position relative to the other arguments in the argument list, are

+ // placed in registers xmm0 - xmm3.

+ //

+ // * Other arguments are pushed onto the stack in right-to-left order,

+ // such that the left-most argument ends up on the top of the stack at

+ // the lowest memory address.

+ //

+ // * Stack arguments of vector type are aligned to start at the next

+ // highest multiple of 16 bytes. Other stack arguments are aligned to

+ // 4 bytes.

+ //

+ // This intends to match the section "IA-32 Function Calling

+ // Convention" of the document "OS X ABI Function Call Guide" by

+ // Apple.

+ NeedsStackAlignment = true;

+ typedef std::vector<Operand *> OperandList;

+ OperandList XmmArgs;

+ OperandList StackArgs, StackArgLocations;

+ uint32_t ParameterAreaSizeBytes = 0;

+ // Classify each argument operand according to the location where the

+ // argument is passed.

+ for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

+ Operand *Arg = Instr->getArg(i);

+ Type Ty = Arg->getType();

+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.

+ assert(typeWidthInBytes(Ty) >= 4);

+ if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

+ XmmArgs.push_back(Arg);

+ } else {

+ StackArgs.push_back(Arg);

+ if (isVectorType(Arg->getType())) {

+ ParameterAreaSizeBytes =

+ Traits::applyStackAlignment(ParameterAreaSizeBytes);

+ }

+ Variable *esp =

+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

+ StackArgLocations.push_back(

+ Traits::X86OperandMem::create(Func, Ty, esp, Loc));

+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

+ }

+ // Adjust the parameter area so that the stack is aligned. It is

+ // assumed that the stack is already aligned at the start of the

+ // calling sequence.

+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

+ // Subtract the appropriate amount for the argument area. This also

+ // takes care of setting the stack adjustment during emission.

+ //

+ // TODO: If for some reason the call instruction gets dead-code

+ // eliminated after lowering, we would need to ensure that the

+ // pre-call and the post-call esp adjustment get eliminated as well.

+ if (ParameterAreaSizeBytes) {

+ _adjust_stack(ParameterAreaSizeBytes);

+ }

+ // Copy arguments that are passed on the stack to the appropriate

+ // stack locations.

+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

+ }

+ // Copy arguments to be passed in registers to the appropriate

+ // registers.

+ // TODO: Investigate the impact of lowering arguments passed in

+ // registers after lowering stack arguments as opposed to the other

+ // way around. Lowering register arguments after stack arguments may

+ // reduce register pressure. On the other hand, lowering register

+ // arguments first (before stack arguments) may result in more compact

+ // code, as the memory operand displacements may end up being smaller

+ // before any stack adjustment is done.

+ for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

+ Variable *Reg =

+ legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);

+ // Generate a FakeUse of register arguments so that they do not get

+ // dead code eliminated as a result of the FakeKill of scratch

+ // registers after the call.

+ Context.insert(InstFakeUse::create(Func, Reg));

+ }

+ // Generate the call instruction. Assign its result to a temporary

+ // with high register allocation weight.

+ Variable *Dest = Instr->getDest();

+ // ReturnReg doubles as ReturnRegLo as necessary.

+ Variable *ReturnReg = nullptr;

+ Variable *ReturnRegHi = nullptr;

+ if (Dest) {

+ switch (Dest->getType()) {

+ case IceType_NUM:

+ case IceType_void:

+ llvm::report_fatal_error("Invalid Call dest type");

+ break;

+ case IceType_i1:

+ case IceType_i8:

+ case IceType_i16:

+ case IceType_i32:

+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

+ break;

+ case IceType_i64:

+ ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

+ ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

+ break;

+ case IceType_f32:

+ case IceType_f64:

+ // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

+ // the fstp instruction.

+ break;

+ case IceType_v4i1:

+ case IceType_v8i1:

+ case IceType_v16i1:

+ case IceType_v16i8:

+ case IceType_v8i16:

+ case IceType_v4i32:

+ case IceType_v4f32:

+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

+ break;

+ }

+ Operand *CallTarget = legalize(Instr->getCallTarget());

+ const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

+ if (NeedSandboxing) {

+ if (llvm::isa<Constant>(CallTarget)) {

+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);

+ } else {

+ Variable *CallTargetVar = nullptr;

+ _mov(CallTargetVar, CallTarget);

+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);

+ const SizeT BundleSize =

+ 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

+ _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

+ CallTarget = CallTargetVar;

+ }

+ Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

+ Context.insert(NewCall);

+ if (NeedSandboxing)

+ _bundle_unlock();

+ if (ReturnRegHi)

+ Context.insert(InstFakeDef::create(Func, ReturnRegHi));

+ // Add the appropriate offset to esp. The call instruction takes care

+ // of resetting the stack offset during emission.

+ if (ParameterAreaSizeBytes) {

+ Variable *esp =

+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

+ }

+ // Insert a register-kill pseudo instruction.

+ Context.insert(InstFakeKill::create(Func, NewCall));

+ // Generate a FakeUse to keep the call live if necessary.

+ if (Instr->hasSideEffects() && ReturnReg) {

+ Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

+ Context.insert(FakeUse);

+ }

+ if (!Dest)

+ return;

+ // Assign the result of the call to Dest.

+ if (ReturnReg) {

+ if (ReturnRegHi) {

+ assert(Dest->getType() == IceType_i64);

+ split64(Dest);

+ Variable *DestLo = Dest->getLo();

+ Variable *DestHi = Dest->getHi();

+ _mov(DestLo, ReturnReg);

+ _mov(DestHi, ReturnRegHi);

+ } else {

+ assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||

+ Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||

+ isVectorType(Dest->getType()));

+ if (isVectorType(Dest->getType())) {

+ _movp(Dest, ReturnReg);

+ } else {

+ _mov(Dest, ReturnReg);

+ }

+ } else if (isScalarFloatingType(Dest->getType())) {

+ // Special treatment for an FP function which returns its result in

+ // st(0).

+ // If Dest ends up being a physical xmm register, the fstp emit code

+ // will route st(0) through a temporary stack slot.

+ _fstp(Dest);

+ // Create a fake use of Dest in case it actually isn't used,

+ // because st(0) still needs to be popped.

+ Context.insert(InstFakeUse::create(Func, Dest));

+ }

+void TargetX8632::lowerArguments() {

+ VarList &Args = Func->getArgs();

+ // The first four arguments of vector type, regardless of their

+ // position relative to the other arguments in the argument list, are

+ // passed in registers xmm0 - xmm3.

+ unsigned NumXmmArgs = 0;

+ Context.init(Func->getEntryNode());

+ Context.setInsertPoint(Context.getCur());

+ for (SizeT I = 0, E = Args.size();

+ I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {

+ Variable *Arg = Args[I];

+ Type Ty = Arg->getType();

+ if (!isVectorType(Ty))

+ continue;

+ // Replace Arg in the argument list with the home register. Then

+ // generate an instruction in the prolog to copy the home register

+ // to the assigned location of Arg.

+ int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;

+ ++NumXmmArgs;

+ Variable *RegisterArg = Func->makeVariable(Ty);

+ if (BuildDefs::dump())

+ RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

+ RegisterArg->setRegNum(RegNum);

+ RegisterArg->setIsArg();

+ Arg->setIsArg(false);

+ Args[I] = RegisterArg;

+ Context.insert(InstAssign::create(Func, Arg, RegisterArg));

+ }

+void TargetX8632::lowerRet(const InstRet *Inst) {

+ Variable *Reg = nullptr;

+ if (Inst->hasRetValue()) {

+ Operand *Src0 = legalize(Inst->getRetValue());

+ // TODO(jpp): this is not needed.

+ if (Src0->getType() == IceType_i64) {

+ Variable *eax =

+ legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);

+ Variable *edx =

+ legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);

+ Reg = eax;

+ Context.insert(InstFakeUse::create(Func, edx));

+ } else if (isScalarFloatingType(Src0->getType())) {

+ _fld(Src0);

+ } else if (isVectorType(Src0->getType())) {

+ Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);

+ } else {

+ _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);

+ }

+ // Add a ret instruction even if sandboxing is enabled, because

+ // addEpilog explicitly looks for a ret instruction as a marker for

+ // where to insert the frame removal instructions.

+ _ret(Reg);

+ // Add a fake use of esp to make sure esp stays alive for the entire

+ // function. Otherwise post-call esp adjustments get dead-code

+ // eliminated. TODO: Are there more places where the fake use

+ // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

+ // have a ret instruction.

+ Variable *esp =

+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ Context.insert(InstFakeUse::create(Func, esp));

+void TargetX8632::addProlog(CfgNode *Node) {

+ // Stack frame layout:

+ //

+ // +------------------------+

+ // | 1. return address |

+ // +------------------------+

+ // | 2. preserved registers |

+ // +------------------------+

+ // | 3. padding |

+ // +------------------------+

+ // | 4. global spill area |

+ // +------------------------+

+ // | 5. padding |

+ // +------------------------+

+ // | 6. local spill area |

+ // +------------------------+

+ // | 7. padding |

+ // +------------------------+

+ // | 8. allocas |

+ // +------------------------+

+ //

+ // The following variables record the size in bytes of the given areas:

+ // * X86_RET_IP_SIZE_BYTES: area 1

+ // * PreservedRegsSizeBytes: area 2

+ // * SpillAreaPaddingBytes: area 3

+ // * GlobalsSize: area 4

+ // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

+ // * LocalsSpillAreaSize: area 6

+ // * SpillAreaSizeBytes: areas 3 - 7

+ // Determine stack frame offsets for each Variable without a

+ // register assignment. This can be done as one variable per stack

+ // slot. Or, do coalescing by running the register allocator again

+ // with an infinite set of registers (as a side effect, this gives

+ // variables a second chance at physical register assignment).

+ //

+ // A middle ground approach is to leverage sparsity and allocate one

+ // block of space on the frame for globals (variables with

+ // multi-block lifetime), and one block to share for locals

+ // (single-block lifetime).

+ Context.init(Node);

+ Context.setInsertPoint(Context.getCur());

+ llvm::SmallBitVector CalleeSaves =

+ getRegisterSet(RegSet_CalleeSave, RegSet_None);

+ RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

+ VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;

+ size_t GlobalsSize = 0;

+ // If there is a separate locals area, this represents that area.

+ // Otherwise it counts any variable not counted by GlobalsSize.

+ SpillAreaSizeBytes = 0;

+ // If there is a separate locals area, this specifies the alignment

+ // for it.

+ uint32_t LocalsSlotsAlignmentBytes = 0;

+ // The entire spill locations area gets aligned to largest natural

+ // alignment of the variables that have a spill slot.

+ uint32_t SpillAreaAlignmentBytes = 0;

+ // A spill slot linked to a variable with a stack slot should reuse

+ // that stack slot.

+ std::function<bool(Variable *)> TargetVarHook =

+ [&VariablesLinkedToSpillSlots](Variable *Var) {

+ if (auto *SpillVar =

+ llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {

+ assert(Var->getWeight().isZero());

+ if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

+ VariablesLinkedToSpillSlots.push_back(Var);

+ return true;

+ }

+ return false;

+ };

+ // Compute the list of spilled variables and bounds for GlobalsSize, etc.

+ getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

+ &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

+ &LocalsSlotsAlignmentBytes, TargetVarHook);

+ uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

+ SpillAreaSizeBytes += GlobalsSize;

+ // Add push instructions for preserved registers.

+ uint32_t NumCallee = 0;

+ size_t PreservedRegsSizeBytes = 0;

+ for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

+ if (CalleeSaves[i] && RegsUsed[i]) {

+ ++NumCallee;

+ PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

+ _push(getPhysicalRegister(i));

+ }

+ Ctx->statsUpdateRegistersSaved(NumCallee);

+ // Generate "push ebp; mov ebp, esp"

+ if (IsEbpBasedFrame) {

+ assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

+ .count() == 0);

+ PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

+ Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

+ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ _push(ebp);

+ _mov(ebp, esp);

+ // Keep ebp live for late-stage liveness analysis

+ // (e.g. asm-verbose mode).

+ Context.insert(InstFakeUse::create(Func, ebp));

+ }

+ // Align the variables area. SpillAreaPaddingBytes is the size of

+ // the region after the preserved registers and before the spill areas.

+ // LocalsSlotsPaddingBytes is the amount of padding between the globals

+ // and locals area if they are separate.

+ assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

+ assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

+ uint32_t SpillAreaPaddingBytes = 0;

+ uint32_t LocalsSlotsPaddingBytes = 0;

+ alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

+ SpillAreaAlignmentBytes, GlobalsSize,

+ LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

+ &LocalsSlotsPaddingBytes);

+ SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

+ uint32_t GlobalsAndSubsequentPaddingSize =

+ GlobalsSize + LocalsSlotsPaddingBytes;

+ // Align esp if necessary.

+ if (NeedsStackAlignment) {

+ uint32_t StackOffset =

+ Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

+ uint32_t StackSize =

+ Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

+ SpillAreaSizeBytes = StackSize - StackOffset;

+ }

+ // Generate "sub esp, SpillAreaSizeBytes"

+ if (SpillAreaSizeBytes)

+ _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),

+ Ctx->getConstantInt32(SpillAreaSizeBytes));

+ Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

+ resetStackAdjustment();

+ // Fill in stack offsets for stack args, and copy args into registers

+ // for those that were register-allocated. Args are pushed right to

+ // left, so Arg[0] is closest to the stack/frame pointer.

+ Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

+ size_t BasicFrameOffset =

+ PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

+ if (!IsEbpBasedFrame)

+ BasicFrameOffset += SpillAreaSizeBytes;

+ const VarList &Args = Func->getArgs();

+ size_t InArgsSizeBytes = 0;

+ unsigned NumXmmArgs = 0;

+ for (Variable *Arg : Args) {

+ // Skip arguments passed in registers.

+ if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {

+ ++NumXmmArgs;

+ continue;

+ }

+ finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

+ }

+ // Fill in stack offsets for locals.

+ assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

+ SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

+ IsEbpBasedFrame);

+ // Assign stack offsets to variables that have been linked to spilled

+ // variables.

+ for (Variable *Var : VariablesLinkedToSpillSlots) {

+ Variable *Linked =

+ (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();

+ Var->setStackOffset(Linked->getStackOffset());

+ }

+ this->HasComputedFrame = true;

+ if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {

+ OstreamLocker L(Func->getContext());

+ Ostream &Str = Func->getContext()->getStrDump();

+ Str << "Stack layout:\n";

+ uint32_t EspAdjustmentPaddingSize =

+ SpillAreaSizeBytes - LocalsSpillAreaSize -

+ GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

+ Str << " in-args = " << InArgsSizeBytes << " bytes\n"

+ << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"

+ << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

+ << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

+ << " globals spill area = " << GlobalsSize << " bytes\n"

+ << " globals-locals spill areas intermediate padding = "

+ << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

+ << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

+ << " esp alignment padding = " << EspAdjustmentPaddingSize

+ << " bytes\n";

+ Str << "Stack details:\n"

+ << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

+ << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

+ << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

+ << " bytes\n"

+ << " is ebp based = " << IsEbpBasedFrame << "\n";

+ }

+void TargetX8632::addEpilog(CfgNode *Node) {

+ InstList &Insts = Node->getInsts();

+ InstList::reverse_iterator RI, E;

+ for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

+ if (llvm::isa<typename Traits::Insts::Ret>(*RI))

+ break;

+ }

+ if (RI == E)

+ return;

+ // Convert the reverse_iterator position into its corresponding

+ // (forward) iterator position.

+ InstList::iterator InsertPoint = RI.base();

+ --InsertPoint;

+ Context.init(Node);

+ Context.setInsertPoint(InsertPoint);

+ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ if (IsEbpBasedFrame) {

+ Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

+ // For late-stage liveness analysis (e.g. asm-verbose mode),

+ // adding a fake use of esp before the assignment of esp=ebp keeps

+ // previous esp adjustments from being dead-code eliminated.

+ Context.insert(InstFakeUse::create(Func, esp));

+ _mov(esp, ebp);

+ _pop(ebp);

+ } else {

+ // add esp, SpillAreaSizeBytes

+ if (SpillAreaSizeBytes)

+ _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

+ }

+ // Add pop instructions for preserved registers.

+ llvm::SmallBitVector CalleeSaves =

+ getRegisterSet(RegSet_CalleeSave, RegSet_None);

+ for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

+ SizeT j = CalleeSaves.size() - i - 1;

+ if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)

+ continue;

+ if (CalleeSaves[j] && RegsUsed[j]) {

+ _pop(getPhysicalRegister(j));

+ }

+ if (!Ctx->getFlags().getUseSandboxing())

+ return;

+ // Change the original ret instruction into a sandboxed return sequence.

+ // t:ecx = pop

+ // bundle_lock

+ // and t, ~31

+ // jmp *t

+ // bundle_unlock

+ // FakeUse <original_ret_operand>

+ Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

+ _pop(T_ecx);

+ lowerIndirectJump(T_ecx);

+ if (RI->getSrcSize()) {

+ Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));

+ Context.insert(InstFakeUse::create(Func, RetValue));

+ }

+ RI->setDeleted();

+void TargetX8632::emitJumpTable(const Cfg *Func,

+ const InstJumpTable *JumpTable) const {

+ if (!BuildDefs::dump())

+ return;

+ Ostream &Str = Ctx->getStrEmit();

+ IceString MangledName = Ctx->mangleName(Func->getFunctionName());

+ Str << "\t.section\t.rodata." << MangledName

+ << "$jumptable,\"a\",@progbits\n";

+ Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

+ Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

+ // On X8632 pointers are 32-bit hence the use of .long

+ for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

+ Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

+ Str << "\n";

TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)

: TargetDataLowering(Ctx) {}

@@ -159,23 +716,6 @@ const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";

const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";

} // end of anonymous namespace

-void TargetX8632::emitJumpTable(const Cfg *Func,

- const InstJumpTable *JumpTable) const {

- if (!BuildDefs::dump())

- return;

- Ostream &Str = Ctx->getStrEmit();

- IceString MangledName = Ctx->mangleName(Func->getFunctionName());

- Str << "\t.section\t.rodata." << MangledName

- << "$jumptable,\"a\",@progbits\n";

- Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

- Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

- // On X8632 pointers are 32-bit hence the use of .long

- for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

- Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

- Str << "\n";

template <typename T>

void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {

if (!BuildDefs::dump())

@@ -407,214 +947,4 @@ ICETYPE_TABLE

} // end of namespace dummy3

} // end of anonymous namespace

-//------------------------------------------------------------------------------

-// __ ______ __ __ ______ ______ __ __ __ ______

-// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

-// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

-// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

-// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

-//

-//------------------------------------------------------------------------------

-void TargetX8632::lowerCall(const InstCall *Instr) {

- // x86-32 calling convention:

- //

- // * At the point before the call, the stack must be aligned to 16

- // bytes.

- //

- // * The first four arguments of vector type, regardless of their

- // position relative to the other arguments in the argument list, are

- // placed in registers xmm0 - xmm3.

- //

- // * Other arguments are pushed onto the stack in right-to-left order,

- // such that the left-most argument ends up on the top of the stack at

- // the lowest memory address.

- //

- // * Stack arguments of vector type are aligned to start at the next

- // highest multiple of 16 bytes. Other stack arguments are aligned to

- // 4 bytes.

- //

- // This intends to match the section "IA-32 Function Calling

- // Convention" of the document "OS X ABI Function Call Guide" by

- // Apple.

- NeedsStackAlignment = true;

- typedef std::vector<Operand *> OperandList;

- OperandList XmmArgs;

- OperandList StackArgs, StackArgLocations;

- uint32_t ParameterAreaSizeBytes = 0;

- // Classify each argument operand according to the location where the

- // argument is passed.

- for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

- Operand *Arg = Instr->getArg(i);

- Type Ty = Arg->getType();

- // The PNaCl ABI requires the width of arguments to be at least 32 bits.

- assert(typeWidthInBytes(Ty) >= 4);

- if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

- XmmArgs.push_back(Arg);

- } else {

- StackArgs.push_back(Arg);

- if (isVectorType(Arg->getType())) {

- ParameterAreaSizeBytes =

- Traits::applyStackAlignment(ParameterAreaSizeBytes);

- }

- Variable *esp =

- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

- Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

- StackArgLocations.push_back(

- Traits::X86OperandMem::create(Func, Ty, esp, Loc));

- ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

- }

- // Adjust the parameter area so that the stack is aligned. It is

- // assumed that the stack is already aligned at the start of the

- // calling sequence.

- ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

- // Subtract the appropriate amount for the argument area. This also

- // takes care of setting the stack adjustment during emission.

- //

- // TODO: If for some reason the call instruction gets dead-code

- // eliminated after lowering, we would need to ensure that the

- // pre-call and the post-call esp adjustment get eliminated as well.

- if (ParameterAreaSizeBytes) {

- _adjust_stack(ParameterAreaSizeBytes);

- }

- // Copy arguments that are passed on the stack to the appropriate

- // stack locations.

- for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

- lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

- }

- // Copy arguments to be passed in registers to the appropriate

- // registers.

- // TODO: Investigate the impact of lowering arguments passed in

- // registers after lowering stack arguments as opposed to the other

- // way around. Lowering register arguments after stack arguments may

- // reduce register pressure. On the other hand, lowering register

- // arguments first (before stack arguments) may result in more compact

- // code, as the memory operand displacements may end up being smaller

- // before any stack adjustment is done.

- for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

- Variable *Reg =

- legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);

- // Generate a FakeUse of register arguments so that they do not get

- // dead code eliminated as a result of the FakeKill of scratch

- // registers after the call.

- Context.insert(InstFakeUse::create(Func, Reg));

- }

- // Generate the call instruction. Assign its result to a temporary

- // with high register allocation weight.

- Variable *Dest = Instr->getDest();

- // ReturnReg doubles as ReturnRegLo as necessary.

- Variable *ReturnReg = nullptr;

- Variable *ReturnRegHi = nullptr;

- if (Dest) {

- switch (Dest->getType()) {

- case IceType_NUM:

- llvm_unreachable("Invalid Call dest type");

- break;

- case IceType_void:

- break;

- case IceType_i1:

- case IceType_i8:

- case IceType_i16:

- case IceType_i32:

- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

- break;

- case IceType_i64:

- ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

- ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

- break;

- case IceType_f32:

- case IceType_f64:

- // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

- // the fstp instruction.

- break;

- case IceType_v4i1:

- case IceType_v8i1:

- case IceType_v16i1:

- case IceType_v16i8:

- case IceType_v8i16:

- case IceType_v4i32:

- case IceType_v4f32:

- ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

- break;

- }

- Operand *CallTarget = legalize(Instr->getCallTarget());

- const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

- if (NeedSandboxing) {

- if (llvm::isa<Constant>(CallTarget)) {

- _bundle_lock(InstBundleLock::Opt_AlignToEnd);

- } else {

- Variable *CallTargetVar = nullptr;

- _mov(CallTargetVar, CallTarget);

- _bundle_lock(InstBundleLock::Opt_AlignToEnd);

- const SizeT BundleSize =

- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

- _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

- CallTarget = CallTargetVar;

- }

- Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

- Context.insert(NewCall);

- if (NeedSandboxing)

- _bundle_unlock();

- if (ReturnRegHi)

- Context.insert(InstFakeDef::create(Func, ReturnRegHi));

- // Add the appropriate offset to esp. The call instruction takes care

- // of resetting the stack offset during emission.

- if (ParameterAreaSizeBytes) {

- Variable *esp =

- Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

- _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

- }

- // Insert a register-kill pseudo instruction.

- Context.insert(InstFakeKill::create(Func, NewCall));

- // Generate a FakeUse to keep the call live if necessary.

- if (Instr->hasSideEffects() && ReturnReg) {

- Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

- Context.insert(FakeUse);

- }

- if (!Dest)

- return;

- // Assign the result of the call to Dest.

- if (ReturnReg) {

- if (ReturnRegHi) {

- assert(Dest->getType() == IceType_i64);

- split64(Dest);

- Variable *DestLo = Dest->getLo();

- Variable *DestHi = Dest->getHi();

- _mov(DestLo, ReturnReg);

- _mov(DestHi, ReturnRegHi);

- } else {

- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||

- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||

- isVectorType(Dest->getType()));

- if (isVectorType(Dest->getType())) {

- _movp(Dest, ReturnReg);

- } else {

- _mov(Dest, ReturnReg);

- }

- } else if (isScalarFloatingType(Dest->getType())) {

- // Special treatment for an FP function which returns its result in

- // st(0).

- // If Dest ends up being a physical xmm register, the fstp emit code

- // will route st(0) through a temporary stack slot.

- _fstp(Dest);

- // Create a fake use of Dest in case it actually isn't used,

- // because st(0) still needs to be popped.

- Context.insert(InstFakeUse::create(Func, Dest));

- }

} // end of namespace Ice

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | no next file with comments »