| Index: src/IceTargetLoweringX8664.cpp
|
| diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp
|
| index 1fcf0b9254c3c7ae67db51349fc44289bb7313ad..f5d4eadae59ddd5a81699b24a4682d72e0b824d9 100644
|
| --- a/src/IceTargetLoweringX8664.cpp
|
| +++ b/src/IceTargetLoweringX8664.cpp
|
| @@ -21,6 +21,14 @@
|
|
|
| namespace Ice {
|
|
|
| +//------------------------------------------------------------------------------
|
| +// ______ ______ ______ __ ______ ______
|
| +// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
|
| +// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
|
| +// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
|
| +// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
|
| +//
|
| +//------------------------------------------------------------------------------
|
| namespace X86Internal {
|
| const MachineTraits<TargetX8664>::TableFcmpType
|
| MachineTraits<TargetX8664>::TableFcmp[] = {
|
| @@ -81,6 +89,286 @@ const char *MachineTraits<TargetX8664>::TargetName = "X8664";
|
|
|
| } // end of namespace X86Internal
|
|
|
| +//------------------------------------------------------------------------------
|
| +// __ ______ __ __ ______ ______ __ __ __ ______
|
| +// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
|
| +// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
|
| +// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
|
| +// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
|
| +//
|
| +//------------------------------------------------------------------------------
|
| +namespace {
|
| +static inline TargetX8664::Traits::RegisterSet::AllRegisters
|
| +getRegisterForXmmArgNum(uint32_t ArgNum) {
|
| + assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
|
| + return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
|
| + TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
|
| +}
|
| +
|
| +static inline TargetX8664::Traits::RegisterSet::AllRegisters
|
| +getRegisterForGprArgNum(uint32_t ArgNum) {
|
| + assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
|
| + static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
|
| + TargetX8664::Traits::RegisterSet::Reg_edi,
|
| + TargetX8664::Traits::RegisterSet::Reg_esi,
|
| + TargetX8664::Traits::RegisterSet::Reg_edx,
|
| + TargetX8664::Traits::RegisterSet::Reg_ecx,
|
| + TargetX8664::Traits::RegisterSet::Reg_r8d,
|
| + TargetX8664::Traits::RegisterSet::Reg_r9d,
|
| + };
|
| + static_assert(llvm::array_lengthof(GprForArgNum) ==
|
| + TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
|
| + "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
|
| + return GprForArgNum[ArgNum];
|
| +}
|
| +
|
| +// constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
|
| +// OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
|
| +constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
|
| +
|
| +} // end of anonymous namespace
|
| +
|
| +void TargetX8664::lowerCall(const InstCall *Instr) {
|
| + // x86-64 calling convention:
|
| + //
|
| + // * At the point before the call, the stack must be aligned to 16
|
| + // bytes.
|
| + //
|
| + // * The first eight arguments of vector/fp type, regardless of their
|
| + // position relative to the other arguments in the argument list, are
|
| + // placed in registers %xmm0 - %xmm7.
|
| + //
|
| + // * The first six arguments of integer types, regardless of their
|
| + // position relative to the other arguments in the argument list, are
|
| + // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
|
| + //
|
| + // * Other arguments are pushed onto the stack in right-to-left order,
|
| + // such that the left-most argument ends up on the top of the stack at
|
| + // the lowest memory address.
|
| + //
|
| + // * Stack arguments of vector type are aligned to start at the next
|
| + // highest multiple of 16 bytes. Other stack arguments are aligned to
|
| + // 8 bytes.
|
| + //
|
| + // This intends to match the section "Function Calling Sequence" of the
|
| + // document "System V Application Binary Interface."
|
| + NeedsStackAlignment = true;
|
| +
|
| + using OperandList =
|
| + llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
|
| + Traits::X86_MAX_GPR_ARGS)>;
|
| + OperandList XmmArgs;
|
| + OperandList GprArgs;
|
| + OperandList StackArgs, StackArgLocations;
|
| + uint32_t ParameterAreaSizeBytes = 0;
|
| +
|
| + // Classify each argument operand according to the location where the
|
| + // argument is passed.
|
| + for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
|
| + Operand *Arg = Instr->getArg(i);
|
| + Type Ty = Arg->getType();
|
| + // The PNaCl ABI requires the width of arguments to be at least 32 bits.
|
| + assert(typeWidthInBytes(Ty) >= 4);
|
| + if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
|
| + XmmArgs.push_back(Arg);
|
| + } else if (isScalarFloatingType(Ty) &&
|
| + XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
|
| + XmmArgs.push_back(Arg);
|
| + } else if (isScalarIntegerType(Ty) &&
|
| + GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
|
| + GprArgs.push_back(Arg);
|
| + } else {
|
| + StackArgs.push_back(Arg);
|
| + if (isVectorType(Arg->getType())) {
|
| + ParameterAreaSizeBytes =
|
| + Traits::applyStackAlignment(ParameterAreaSizeBytes);
|
| + }
|
| + Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
|
| + Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
|
| + StackArgLocations.push_back(
|
| + Traits::X86OperandMem::create(Func, Ty, esp, Loc));
|
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
|
| + }
|
| + }
|
| +
|
| + // Adjust the parameter area so that the stack is aligned. It is
|
| + // assumed that the stack is already aligned at the start of the
|
| + // calling sequence.
|
| + ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
|
| +
|
| + // Subtract the appropriate amount for the argument area. This also
|
| + // takes care of setting the stack adjustment during emission.
|
| + //
|
| + // TODO: If for some reason the call instruction gets dead-code
|
| + // eliminated after lowering, we would need to ensure that the
|
| + // pre-call and the post-call esp adjustment get eliminated as well.
|
| + if (ParameterAreaSizeBytes) {
|
| + _adjust_stack(ParameterAreaSizeBytes);
|
| + }
|
| +
|
| + // Copy arguments that are passed on the stack to the appropriate
|
| + // stack locations.
|
| + for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
|
| + lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
|
| + }
|
| +
|
| + // Copy arguments to be passed in registers to the appropriate
|
| + // registers.
|
| + // TODO: Investigate the impact of lowering arguments passed in
|
| + // registers after lowering stack arguments as opposed to the other
|
| + // way around. Lowering register arguments after stack arguments may
|
| + // reduce register pressure. On the other hand, lowering register
|
| + // arguments first (before stack arguments) may result in more compact
|
| + // code, as the memory operand displacements may end up being smaller
|
| + // before any stack adjustment is done.
|
| + for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
|
| + Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
|
| + // Generate a FakeUse of register arguments so that they do not get
|
| + // dead code eliminated as a result of the FakeKill of scratch
|
| + // registers after the call.
|
| + Context.insert(InstFakeUse::create(Func, Reg));
|
| + }
|
| +
|
| + for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
|
| + Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));
|
| + Context.insert(InstFakeUse::create(Func, Reg));
|
| + }
|
| +
|
| + // Generate the call instruction. Assign its result to a temporary
|
| + // with high register allocation weight.
|
| + Variable *Dest = Instr->getDest();
|
| + // ReturnReg doubles as ReturnRegLo as necessary.
|
| + Variable *ReturnReg = nullptr;
|
| + Variable *ReturnRegHi = nullptr;
|
| + if (Dest) {
|
| + switch (Dest->getType()) {
|
| + case IceType_NUM:
|
| + llvm_unreachable("Invalid Call dest type");
|
| + break;
|
| + case IceType_void:
|
| + break;
|
| + case IceType_i1:
|
| + case IceType_i8:
|
| + case IceType_i16:
|
| + case IceType_i32:
|
| + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
|
| + break;
|
| + case IceType_i64:
|
| + // TODO(jpp): return i64 in a GPR.
|
| + ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
|
| + ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
|
| + break;
|
| + case IceType_f32:
|
| + case IceType_f64:
|
| + case IceType_v4i1:
|
| + case IceType_v8i1:
|
| + case IceType_v16i1:
|
| + case IceType_v16i8:
|
| + case IceType_v8i16:
|
| + case IceType_v4i32:
|
| + case IceType_v4f32:
|
| + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
|
| + break;
|
| + }
|
| + }
|
| +
|
| + Operand *CallTarget = legalize(Instr->getCallTarget());
|
| + const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
|
| + if (NeedSandboxing) {
|
| + if (llvm::isa<Constant>(CallTarget)) {
|
| + _bundle_lock(InstBundleLock::Opt_AlignToEnd);
|
| + } else {
|
| + Variable *CallTargetVar = nullptr;
|
| + _mov(CallTargetVar, CallTarget);
|
| + _bundle_lock(InstBundleLock::Opt_AlignToEnd);
|
| + const SizeT BundleSize =
|
| + 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
|
| + _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
|
| + CallTarget = CallTargetVar;
|
| + }
|
| + }
|
| + Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
|
| + Context.insert(NewCall);
|
| + if (NeedSandboxing)
|
| + _bundle_unlock();
|
| + if (ReturnRegHi)
|
| + Context.insert(InstFakeDef::create(Func, ReturnRegHi));
|
| +
|
| + // Add the appropriate offset to esp. The call instruction takes care
|
| + // of resetting the stack offset during emission.
|
| + if (ParameterAreaSizeBytes) {
|
| + Variable *Esp =
|
| + Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
|
| + _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
|
| + }
|
| +
|
| + // Insert a register-kill pseudo instruction.
|
| + Context.insert(InstFakeKill::create(Func, NewCall));
|
| +
|
| + // Generate a FakeUse to keep the call live if necessary.
|
| + if (Instr->hasSideEffects() && ReturnReg) {
|
| + Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
|
| + Context.insert(FakeUse);
|
| + }
|
| +
|
| + if (!Dest)
|
| + return;
|
| +
|
| + assert(ReturnReg && "x86-64 always returns value on registers.");
|
| +
|
| + // Assign the result of the call to Dest.
|
| + if (ReturnRegHi) {
|
| + assert(Dest->getType() == IceType_i64);
|
| + split64(Dest);
|
| + Variable *DestLo = Dest->getLo();
|
| + Variable *DestHi = Dest->getHi();
|
| + _mov(DestLo, ReturnReg);
|
| + _mov(DestHi, ReturnRegHi);
|
| + return;
|
| + }
|
| +
|
| + assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
|
| + Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
|
| + Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
|
| + isVectorType(Dest->getType()));
|
| +
|
| + if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
|
| + _movp(Dest, ReturnReg);
|
| + } else {
|
| + _mov(Dest, ReturnReg);
|
| + }
|
| +}
|
| +
|
| +void TargetDataX8664::lowerJumpTables() {
|
| + switch (Ctx->getFlags().getOutFileType()) {
|
| + case FT_Elf: {
|
| + ELFObjectWriter *Writer = Ctx->getObjectWriter();
|
| + for (const JumpTableData &JumpTable : Ctx->getJumpTables())
|
| + // TODO(jpp): not 386.
|
| + Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
|
| + } break;
|
| + case FT_Asm:
|
| + // Already emitted from Cfg
|
| + break;
|
| + case FT_Iasm: {
|
| + if (!BuildDefs::dump())
|
| + return;
|
| + Ostream &Str = Ctx->getStrEmit();
|
| + for (const JumpTableData &JT : Ctx->getJumpTables()) {
|
| + Str << "\t.section\t.rodata." << JT.getFunctionName()
|
| + << "$jumptable,\"a\",@progbits\n";
|
| + Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
|
| + Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
|
| +
|
| + // On X8664 ILP32 pointers are 32-bit hence the use of .long
|
| + for (intptr_t TargetOffset : JT.getTargetOffsets())
|
| + Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
|
| + Str << "\n";
|
| + }
|
| + } break;
|
| + }
|
| +}
|
| +
|
| namespace {
|
| template <typename T> struct PoolTypeConverter {};
|
|
|
| @@ -236,36 +524,6 @@ void TargetX8664::emitJumpTable(const Cfg *Func,
|
| Str << "\n";
|
| }
|
|
|
| -void TargetDataX8664::lowerJumpTables() {
|
| - switch (Ctx->getFlags().getOutFileType()) {
|
| - case FT_Elf: {
|
| - ELFObjectWriter *Writer = Ctx->getObjectWriter();
|
| - for (const JumpTableData &JT : Ctx->getJumpTables())
|
| - // TODO(jpp): not 386.
|
| - Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
|
| - } break;
|
| - case FT_Asm:
|
| - // Already emitted from Cfg
|
| - break;
|
| - case FT_Iasm: {
|
| - if (!BuildDefs::dump())
|
| - return;
|
| - Ostream &Str = Ctx->getStrEmit();
|
| - for (const JumpTableData &JT : Ctx->getJumpTables()) {
|
| - Str << "\t.section\t.rodata." << JT.getFunctionName()
|
| - << "$jumptable,\"a\",@progbits\n";
|
| - Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
|
| - Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
|
| -
|
| - // On X8664 ILP32 pointers are 32-bit hence the use of .long
|
| - for (intptr_t TargetOffset : JT.getTargetOffsets())
|
| - Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
|
| - Str << "\n";
|
| - }
|
| - } break;
|
| - }
|
| -}
|
| -
|
| void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
|
| const IceString &SectionSuffix) {
|
| switch (Ctx->getFlags().getOutFileType()) {
|
|
|