src/IceTargetLoweringX8664.cpp - Issue 1266673003: Subzero. Implements x86-64 lowerCall.

Unified Diff: src/IceTargetLoweringX8664.cpp

Issue 1266673003: Subzero. Implements x86-64 lowerCall. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: git pull & painful merge. Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8664.cpp

diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp

index ed586a375da8c9b76c9a72d93cf23d4e85205703..f0ebe57c36cb9b946e9f64555e6175d4773e015f 100644

--- a/src/IceTargetLoweringX8664.cpp

+++ b/src/IceTargetLoweringX8664.cpp

@@ -21,6 +21,14 @@

namespace Ice {

+//------------------------------------------------------------------------------

+// ______ ______ ______ __ ______ ______

+// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\

+// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \

+// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\

+// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/

+//

+//------------------------------------------------------------------------------

namespace X86Internal {

const MachineTraits<TargetX8664>::TableFcmpType

MachineTraits<TargetX8664>::TableFcmp[] = {

@@ -81,6 +89,297 @@ const char *MachineTraits<TargetX8664>::TargetName = "X8664";

} // end of namespace X86Internal

+//------------------------------------------------------------------------------

+// __ ______ __ __ ______ ______ __ __ __ ______

+// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

+// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

+// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

+// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

+//

+//------------------------------------------------------------------------------

+namespace {

+static inline TargetX8664::Traits::RegisterSet::AllRegisters

+getRegisterForXmmArgNum(uint32_t ArgNum) {

+ assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);

+ return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(

+ TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);

+static inline TargetX8664::Traits::RegisterSet::AllRegisters

+getRegisterForGprArgNum(uint32_t ArgNum) {

+ assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);

+ static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {

+ TargetX8664::Traits::RegisterSet::Reg_edi,

+ TargetX8664::Traits::RegisterSet::Reg_esi,

+ TargetX8664::Traits::RegisterSet::Reg_edx,

+ TargetX8664::Traits::RegisterSet::Reg_ecx,

+ TargetX8664::Traits::RegisterSet::Reg_r8d,

+ TargetX8664::Traits::RegisterSet::Reg_r9d,

+ };

+ static_assert(llvm::array_lengthof(GprForArgNum) ==

+ TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,

+ "Mismtach between MAX_GPR_ARGS and GprForArgNum.");

jvoung (off chromium) 2015/07/31 23:53:49 "Mismatch"

John 2015/08/01 15:49:55 "Doen." :)

+ return GprForArgNum[ArgNum];

+void TargetX8664::lowerCall(const InstCall *Instr) {

+ // x86-64 calling convention:

+ //

+ // * At the point before the call, the stack must be aligned to 16

+ // bytes.

+ //

+ // * The first eight arguments of vector/fp type, regardless of their

+ // position relative to the other arguments in the argument list, are

+ // placed in registers %xmm0 - %xmm7.

+ //

+ // * The first six arguments of integer types, regardless of their

+ // position relative to the other arguments in the argument list, are

+ // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.

+ //

+ // * Other arguments are pushed onto the stack in right-to-left order,

+ // such that the left-most argument ends up on the top of the stack at

+ // the lowest memory address.

+ //

+ // * Stack arguments of vector type are aligned to start at the next

+ // highest multiple of 16 bytes. Other stack arguments are aligned to

+ // 8 bytes.

+ //

+ // This intends to match the section "Function Calling Sequence" of the

+ // document "System V Application Binary Interface."

+ NeedsStackAlignment = true;

+ using OperandList = std::vector<Operand *>;

jvoung (off chromium) 2015/08/03 17:04:55 Might be able to use "llvm::SmallVector<Operand *,

John 2015/08/05 16:53:03 Done.

+ OperandList XmmArgs;

+ OperandList GprArgs;

+ OperandList StackArgs, StackArgLocations;

+ uint32_t ParameterAreaSizeBytes = 0;

+ // Classify each argument operand according to the location where the

+ // argument is passed.

+ for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

+ Operand *Arg = Instr->getArg(i);

+ Type Ty = Arg->getType();

+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.

+ assert(typeWidthInBytes(Ty) >= 4);

+ if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

+ XmmArgs.push_back(Arg);

+ } else if (isScalarFloatingType(Ty) &&

+ XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

+ XmmArgs.push_back(Arg);

+ } else if (isScalarIntegerType(Ty) &&

+ GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {

+ GprArgs.push_back(Arg);

+ } else {

+ StackArgs.push_back(Arg);

+ if (isVectorType(Arg->getType())) {

+ ParameterAreaSizeBytes =

+ Traits::applyStackAlignment(ParameterAreaSizeBytes);

+ }

+ Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

+ StackArgLocations.push_back(

+ Traits::X86OperandMem::create(Func, Ty, esp, Loc));

+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

jvoung (off chromium) 2015/07/31 23:53:49 Is typeWidthInBytesOnStack defined differently for

John 2015/08/01 15:49:55 We could avoid the overhead, but if we want to be

jvoung (off chromium) 2015/08/03 17:04:55 Yes, my understanding is that the calling conventi

John 2015/08/05 16:53:03 Oh, now I see what you mean. :) I added a TODO in

+ }

+ // Adjust the parameter area so that the stack is aligned. It is

+ // assumed that the stack is already aligned at the start of the

+ // calling sequence.

+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

+ // Subtract the appropriate amount for the argument area. This also

+ // takes care of setting the stack adjustment during emission.

+ //

+ // TODO: If for some reason the call instruction gets dead-code

+ // eliminated after lowering, we would need to ensure that the

+ // pre-call and the post-call esp adjustment get eliminated as well.

+ if (ParameterAreaSizeBytes) {

+ _adjust_stack(ParameterAreaSizeBytes);

+ }

+ // Copy arguments that are passed on the stack to the appropriate

+ // stack locations.

+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

+ }

+ // Copy arguments to be passed in registers to the appropriate

+ // registers.

+ // TODO: Investigate the impact of lowering arguments passed in

+ // registers after lowering stack arguments as opposed to the other

+ // way around. Lowering register arguments after stack arguments may

+ // reduce register pressure. On the other hand, lowering register

+ // arguments first (before stack arguments) may result in more compact

+ // code, as the memory operand displacements may end up being smaller

+ // before any stack adjustment is done.

+ for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

+ Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));

+ // Generate a FakeUse of register arguments so that they do not get

+ // dead code eliminated as a result of the FakeKill of scratch

+ // registers after the call.

+ Context.insert(InstFakeUse::create(Func, Reg));

+ }

+ for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {

+ Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));

+ Context.insert(InstFakeUse::create(Func, Reg));

+ }

+ // Generate the call instruction. Assign its result to a temporary

+ // with high register allocation weight.

+ Variable *Dest = Instr->getDest();

+ // ReturnReg doubles as ReturnRegLo as necessary.

+ Variable *ReturnReg = nullptr;

+ Variable *ReturnRegHi = nullptr;

+ if (Dest) {

+ switch (Dest->getType()) {

+ case IceType_NUM:

+ llvm_unreachable("Invalid Call dest type");

+ break;

+ case IceType_void:

jvoung (off chromium) 2015/07/31 23:53:49 This is the same in the other targets, but it seem

Jim Stichnoth 2015/08/05 04:43:51 Yeah, I don't remember why it ended up this way, b

+ break;

+ case IceType_i1:

+ case IceType_i8:

+ case IceType_i16:

+ case IceType_i32:

+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

+ break;

+ case IceType_i64:

+ // TODO(jpp): return i64 in a GPR.

+ ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

+ ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

+ break;

+ case IceType_f32:

+ case IceType_f64:

+ case IceType_v4i1:

+ case IceType_v8i1:

+ case IceType_v16i1:

+ case IceType_v16i8:

+ case IceType_v8i16:

+ case IceType_v4i32:

+ case IceType_v4f32:

+ ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

+ break;

+ }

+ Operand *CallTarget = legalize(Instr->getCallTarget());

+ const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

+ if (NeedSandboxing) {

+ if (llvm::isa<Constant>(CallTarget)) {

+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);

+ } else {

+ Variable *CallTargetVar = nullptr;

+ _mov(CallTargetVar, CallTarget);

+ _bundle_lock(InstBundleLock::Opt_AlignToEnd);

+ const SizeT BundleSize =

+ 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

+ _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

+ CallTarget = CallTargetVar;

+ }

+ Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

+ Context.insert(NewCall);

+ if (NeedSandboxing)

+ _bundle_unlock();

+ if (ReturnRegHi)

+ Context.insert(InstFakeDef::create(Func, ReturnRegHi));

+ // Add the appropriate offset to esp. The call instruction takes care

+ // of resetting the stack offset during emission.

+ if (ParameterAreaSizeBytes) {

+ Variable *Esp =

+ Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

+ _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

+ }

+ // Insert a register-kill pseudo instruction.

+ Context.insert(InstFakeKill::create(Func, NewCall));

+ // Generate a FakeUse to keep the call live if necessary.

+ if (Instr->hasSideEffects() && ReturnReg) {

+ Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

+ Context.insert(FakeUse);

+ }

+ if (!Dest)

+ return;

+ if (!ReturnReg)

jvoung (off chromium) 2015/07/31 23:53:49 Seems a bit odd to have a Dest, but no ReturnReg.

John 2015/08/01 15:49:55 Yes, indeed. This became an assert(ReturnReg);

jvoung (off chromium) 2015/08/03 17:04:55 New patch upload?

John 2015/08/05 16:53:03 Done.

+ return;

+ // Assign the result of the call to Dest.

+ if (ReturnRegHi) {

+ assert(Dest->getType() == IceType_i64);

+ split64(Dest);

+ Variable *DestLo = Dest->getLo();

+ Variable *DestHi = Dest->getHi();

+ _mov(DestLo, ReturnReg);

+ _mov(DestHi, ReturnRegHi);

+ return;

+ }

+ assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||

+ Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||

+ Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||

+ isVectorType(Dest->getType()));

+ if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {

+ _movp(Dest, ReturnReg);

+ } else {

+ _mov(Dest, ReturnReg);

+ }

+void TargetX8664::emitJumpTable(const Cfg *Func,

+ const InstJumpTable *JumpTable) const {

+ if (!BuildDefs::dump())

+ return;

+ Ostream &Str = Ctx->getStrEmit();

+ IceString MangledName = Ctx->mangleName(Func->getFunctionName());

+ Str << "\t.section\t.rodata." << MangledName

+ << "$jumptable,\"a\",@progbits\n";

+ Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

+ Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

+ // On X8664 ILP32 pointers are 32-bit hence the use of .long

+ for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

+ Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

+ Str << "\n";

+void TargetDataX8664::lowerJumpTables() {

+ switch (Ctx->getFlags().getOutFileType()) {

+ case FT_Elf: {

+ ELFObjectWriter *Writer = Ctx->getObjectWriter();

+ for (const JumpTableData &JumpTable : *Ctx->getJumpTables())

+ // TODO(jpp): not 386.

+ Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);

+ } break;

+ case FT_Asm:

+ // Already emitted from Cfg

+ break;

+ case FT_Iasm: {

+ if (!BuildDefs::dump())

+ return;

+ Ostream &Str = Ctx->getStrEmit();

+ for (const JumpTableData &JT : *Ctx->getJumpTables()) {

+ Str << "\t.section\t.rodata." << JT.getFunctionName()

+ << "$jumptable,\"a\",@progbits\n";

+ Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

+ Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";

+ // On X8664 ILP32 pointers are 32-bit hence the use of .long

+ for (intptr_t TargetOffset : JT.getTargetOffsets())

+ Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;

+ Str << "\n";

+ }

+ } break;

+ }

namespace {

template <typename T> struct PoolTypeConverter {};

@@ -219,53 +518,6 @@ void TargetDataX8664::lowerConstants() {

}

-void TargetX8664::emitJumpTable(const Cfg *Func,

- const InstJumpTable *JumpTable) const {

- if (!BuildDefs::dump())

- return;

- Ostream &Str = Ctx->getStrEmit();

- IceString MangledName = Ctx->mangleName(Func->getFunctionName());

- Str << "\t.section\t.rodata." << MangledName

- << "$jumptable,\"a\",@progbits\n";

- Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

- Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

- // On X8664 ILP32 pointers are 32-bit hence the use of .long

- for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

- Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

- Str << "\n";

-void TargetDataX8664::lowerJumpTables() {

- switch (Ctx->getFlags().getOutFileType()) {

- case FT_Elf: {

- ELFObjectWriter *Writer = Ctx->getObjectWriter();

- for (const JumpTableData &JumpTable : *Ctx->getJumpTables())

- // TODO(jpp): not 386.

- Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);

- } break;

- case FT_Asm:

- // Already emitted from Cfg

- break;

- case FT_Iasm: {

- if (!BuildDefs::dump())

- return;

- Ostream &Str = Ctx->getStrEmit();

- for (const JumpTableData &JT : *Ctx->getJumpTables()) {

- Str << "\t.section\t.rodata." << JT.getFunctionName()

- << "$jumptable,\"a\",@progbits\n";

- Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

- Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";

- // On X8664 ILP32 pointers are 32-bit hence the use of .long

- for (intptr_t TargetOffset : JT.getTargetOffsets())

- Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;

- Str << "\n";

- }

- } break;

- }

void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,

const IceString &SectionSuffix) {

switch (Ctx->getFlags().getOutFileType()) {

« src/IceInstX8632.h ('K') | « src/IceTargetLoweringX8664.h ('k') | src/IceTargetLoweringX8664Traits.h » ('j') | no next file with comments »