Chromium Code Reviews| Index: src/IceTargetLoweringX8664.cpp |
| diff --git a/src/IceTargetLoweringX8664.cpp b/src/IceTargetLoweringX8664.cpp |
| index ed586a375da8c9b76c9a72d93cf23d4e85205703..f0ebe57c36cb9b946e9f64555e6175d4773e015f 100644 |
| --- a/src/IceTargetLoweringX8664.cpp |
| +++ b/src/IceTargetLoweringX8664.cpp |
| @@ -21,6 +21,14 @@ |
| namespace Ice { |
| +//------------------------------------------------------------------------------ |
| +// ______ ______ ______ __ ______ ______ |
| +// /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\ |
| +// \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \ |
| +// \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\ |
| +// \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/ |
| +// |
| +//------------------------------------------------------------------------------ |
| namespace X86Internal { |
| const MachineTraits<TargetX8664>::TableFcmpType |
| MachineTraits<TargetX8664>::TableFcmp[] = { |
| @@ -81,6 +89,297 @@ const char *MachineTraits<TargetX8664>::TargetName = "X8664"; |
| } // end of namespace X86Internal |
| +//------------------------------------------------------------------------------ |
| +// __ ______ __ __ ______ ______ __ __ __ ______ |
| +// /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\ |
| +// \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \ |
| +// \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\ |
| +// \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/ |
| +// |
| +//------------------------------------------------------------------------------ |
| +namespace { |
| +static inline TargetX8664::Traits::RegisterSet::AllRegisters |
| +getRegisterForXmmArgNum(uint32_t ArgNum) { |
| + assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS); |
| + return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>( |
| + TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum); |
| +} |
| + |
| +static inline TargetX8664::Traits::RegisterSet::AllRegisters |
| +getRegisterForGprArgNum(uint32_t ArgNum) { |
| + assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS); |
| + static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = { |
| + TargetX8664::Traits::RegisterSet::Reg_edi, |
| + TargetX8664::Traits::RegisterSet::Reg_esi, |
| + TargetX8664::Traits::RegisterSet::Reg_edx, |
| + TargetX8664::Traits::RegisterSet::Reg_ecx, |
| + TargetX8664::Traits::RegisterSet::Reg_r8d, |
| + TargetX8664::Traits::RegisterSet::Reg_r9d, |
| + }; |
| + static_assert(llvm::array_lengthof(GprForArgNum) == |
| + TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS, |
| + "Mismtach between MAX_GPR_ARGS and GprForArgNum."); |
|
jvoung (off chromium)
2015/07/31 23:53:49
"Mismatch"
John
2015/08/01 15:49:55
"Doen." :)
|
| + return GprForArgNum[ArgNum]; |
| +} |
| +} |
| + |
| +void TargetX8664::lowerCall(const InstCall *Instr) { |
| + // x86-64 calling convention: |
| + // |
| + // * At the point before the call, the stack must be aligned to 16 |
| + // bytes. |
| + // |
| + // * The first eight arguments of vector/fp type, regardless of their |
| + // position relative to the other arguments in the argument list, are |
| + // placed in registers %xmm0 - %xmm7. |
| + // |
| + // * The first six arguments of integer types, regardless of their |
| + // position relative to the other arguments in the argument list, are |
| + // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9. |
| + // |
| + // * Other arguments are pushed onto the stack in right-to-left order, |
| + // such that the left-most argument ends up on the top of the stack at |
| + // the lowest memory address. |
| + // |
| + // * Stack arguments of vector type are aligned to start at the next |
| + // highest multiple of 16 bytes. Other stack arguments are aligned to |
| + // 8 bytes. |
| + // |
| + // This intends to match the section "Function Calling Sequence" of the |
| + // document "System V Application Binary Interface." |
| + NeedsStackAlignment = true; |
| + |
| + using OperandList = std::vector<Operand *>; |
|
jvoung (off chromium)
2015/08/03 17:04:55
Might be able to use "llvm::SmallVector<Operand *,
John
2015/08/05 16:53:03
Done.
|
| + OperandList XmmArgs; |
| + OperandList GprArgs; |
| + OperandList StackArgs, StackArgLocations; |
| + uint32_t ParameterAreaSizeBytes = 0; |
| + |
| + // Classify each argument operand according to the location where the |
| + // argument is passed. |
| + for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| + Operand *Arg = Instr->getArg(i); |
| + Type Ty = Arg->getType(); |
| + // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| + assert(typeWidthInBytes(Ty) >= 4); |
| + if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| + XmmArgs.push_back(Arg); |
| + } else if (isScalarFloatingType(Ty) && |
| + XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| + XmmArgs.push_back(Arg); |
| + } else if (isScalarIntegerType(Ty) && |
| + GprArgs.size() < Traits::X86_MAX_GPR_ARGS) { |
| + GprArgs.push_back(Arg); |
| + } else { |
| + StackArgs.push_back(Arg); |
| + if (isVectorType(Arg->getType())) { |
| + ParameterAreaSizeBytes = |
| + Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| + } |
| + Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| + Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| + StackArgLocations.push_back( |
| + Traits::X86OperandMem::create(Func, Ty, esp, Loc)); |
| + ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
|
jvoung (off chromium)
2015/07/31 23:53:49
Is typeWidthInBytesOnStack defined differently for
John
2015/08/01 15:49:55
We could avoid the overhead, but if we want to be
jvoung (off chromium)
2015/08/03 17:04:55
Yes, my understanding is that the calling conventi
John
2015/08/05 16:53:03
Oh, now I see what you mean. :)
I added a TODO in
|
| + } |
| + } |
| + |
| + // Adjust the parameter area so that the stack is aligned. It is |
| + // assumed that the stack is already aligned at the start of the |
| + // calling sequence. |
| + ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| + |
| + // Subtract the appropriate amount for the argument area. This also |
| + // takes care of setting the stack adjustment during emission. |
| + // |
| + // TODO: If for some reason the call instruction gets dead-code |
| + // eliminated after lowering, we would need to ensure that the |
| + // pre-call and the post-call esp adjustment get eliminated as well. |
| + if (ParameterAreaSizeBytes) { |
| + _adjust_stack(ParameterAreaSizeBytes); |
| + } |
| + |
| + // Copy arguments that are passed on the stack to the appropriate |
| + // stack locations. |
| + for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| + lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| + } |
| + |
| + // Copy arguments to be passed in registers to the appropriate |
| + // registers. |
| + // TODO: Investigate the impact of lowering arguments passed in |
| + // registers after lowering stack arguments as opposed to the other |
| + // way around. Lowering register arguments after stack arguments may |
| + // reduce register pressure. On the other hand, lowering register |
| + // arguments first (before stack arguments) may result in more compact |
| + // code, as the memory operand displacements may end up being smaller |
| + // before any stack adjustment is done. |
| + for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| + Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i)); |
| + // Generate a FakeUse of register arguments so that they do not get |
| + // dead code eliminated as a result of the FakeKill of scratch |
| + // registers after the call. |
| + Context.insert(InstFakeUse::create(Func, Reg)); |
| + } |
| + |
| + for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) { |
| + Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i)); |
| + Context.insert(InstFakeUse::create(Func, Reg)); |
| + } |
| + |
| + // Generate the call instruction. Assign its result to a temporary |
| + // with high register allocation weight. |
| + Variable *Dest = Instr->getDest(); |
| + // ReturnReg doubles as ReturnRegLo as necessary. |
| + Variable *ReturnReg = nullptr; |
| + Variable *ReturnRegHi = nullptr; |
| + if (Dest) { |
| + switch (Dest->getType()) { |
| + case IceType_NUM: |
| + llvm_unreachable("Invalid Call dest type"); |
| + break; |
| + case IceType_void: |
|
jvoung (off chromium)
2015/07/31 23:53:49
This is the same in the other targets, but it seem
Jim Stichnoth
2015/08/05 04:43:51
Yeah, I don't remember why it ended up this way, b
|
| + break; |
| + case IceType_i1: |
| + case IceType_i8: |
| + case IceType_i16: |
| + case IceType_i32: |
| + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax); |
| + break; |
| + case IceType_i64: |
| + // TODO(jpp): return i64 in a GPR. |
| + ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); |
| + ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); |
| + break; |
| + case IceType_f32: |
| + case IceType_f64: |
| + case IceType_v4i1: |
| + case IceType_v8i1: |
| + case IceType_v16i1: |
| + case IceType_v16i8: |
| + case IceType_v8i16: |
| + case IceType_v4i32: |
| + case IceType_v4f32: |
| + ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0); |
| + break; |
| + } |
| + } |
| + |
| + Operand *CallTarget = legalize(Instr->getCallTarget()); |
| + const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); |
| + if (NeedSandboxing) { |
| + if (llvm::isa<Constant>(CallTarget)) { |
| + _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| + } else { |
| + Variable *CallTargetVar = nullptr; |
| + _mov(CallTargetVar, CallTarget); |
| + _bundle_lock(InstBundleLock::Opt_AlignToEnd); |
| + const SizeT BundleSize = |
| + 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes(); |
| + _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); |
| + CallTarget = CallTargetVar; |
| + } |
| + } |
| + Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget); |
| + Context.insert(NewCall); |
| + if (NeedSandboxing) |
| + _bundle_unlock(); |
| + if (ReturnRegHi) |
| + Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| + |
| + // Add the appropriate offset to esp. The call instruction takes care |
| + // of resetting the stack offset during emission. |
| + if (ParameterAreaSizeBytes) { |
| + Variable *Esp = |
| + Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| + _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes)); |
| + } |
| + |
| + // Insert a register-kill pseudo instruction. |
| + Context.insert(InstFakeKill::create(Func, NewCall)); |
| + |
| + // Generate a FakeUse to keep the call live if necessary. |
| + if (Instr->hasSideEffects() && ReturnReg) { |
| + Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| + Context.insert(FakeUse); |
| + } |
| + |
| + if (!Dest) |
| + return; |
| + |
| + if (!ReturnReg) |
|
jvoung (off chromium)
2015/07/31 23:53:49
Seems a bit odd to have a Dest, but no ReturnReg.
John
2015/08/01 15:49:55
Yes, indeed. This became an assert(ReturnReg);
jvoung (off chromium)
2015/08/03 17:04:55
New patch upload?
John
2015/08/05 16:53:03
Done.
|
| + return; |
| + |
| + // Assign the result of the call to Dest. |
| + if (ReturnRegHi) { |
| + assert(Dest->getType() == IceType_i64); |
| + split64(Dest); |
| + Variable *DestLo = Dest->getLo(); |
| + Variable *DestHi = Dest->getHi(); |
| + _mov(DestLo, ReturnReg); |
| + _mov(DestHi, ReturnRegHi); |
| + return; |
| + } |
| + |
| + assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 || |
| + Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 || |
| + Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 || |
| + isVectorType(Dest->getType())); |
| + |
| + if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) { |
| + _movp(Dest, ReturnReg); |
| + } else { |
| + _mov(Dest, ReturnReg); |
| + } |
| +} |
| + |
| +void TargetX8664::emitJumpTable(const Cfg *Func, |
| + const InstJumpTable *JumpTable) const { |
| + if (!BuildDefs::dump()) |
| + return; |
| + Ostream &Str = Ctx->getStrEmit(); |
| + IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| + Str << "\t.section\t.rodata." << MangledName |
| + << "$jumptable,\"a\",@progbits\n"; |
| + Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; |
| + Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; |
| + |
| + // On X8664 ILP32 pointers are 32-bit hence the use of .long |
| + for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) |
| + Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); |
| + Str << "\n"; |
| +} |
| + |
| +void TargetDataX8664::lowerJumpTables() { |
| + switch (Ctx->getFlags().getOutFileType()) { |
| + case FT_Elf: { |
| + ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
| + for (const JumpTableData &JumpTable : *Ctx->getJumpTables()) |
| + // TODO(jpp): not 386. |
| + Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32); |
| + } break; |
| + case FT_Asm: |
| + // Already emitted from Cfg |
| + break; |
| + case FT_Iasm: { |
| + if (!BuildDefs::dump()) |
| + return; |
| + Ostream &Str = Ctx->getStrEmit(); |
| + for (const JumpTableData &JT : *Ctx->getJumpTables()) { |
| + Str << "\t.section\t.rodata." << JT.getFunctionName() |
| + << "$jumptable,\"a\",@progbits\n"; |
| + Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; |
| + Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":"; |
| + |
| + // On X8664 ILP32 pointers are 32-bit hence the use of .long |
| + for (intptr_t TargetOffset : JT.getTargetOffsets()) |
| + Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset; |
| + Str << "\n"; |
| + } |
| + } break; |
| + } |
| +} |
| + |
| namespace { |
| template <typename T> struct PoolTypeConverter {}; |
| @@ -219,53 +518,6 @@ void TargetDataX8664::lowerConstants() { |
| } |
| } |
| -void TargetX8664::emitJumpTable(const Cfg *Func, |
| - const InstJumpTable *JumpTable) const { |
| - if (!BuildDefs::dump()) |
| - return; |
| - Ostream &Str = Ctx->getStrEmit(); |
| - IceString MangledName = Ctx->mangleName(Func->getFunctionName()); |
| - Str << "\t.section\t.rodata." << MangledName |
| - << "$jumptable,\"a\",@progbits\n"; |
| - Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; |
| - Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; |
| - |
| - // On X8664 ILP32 pointers are 32-bit hence the use of .long |
| - for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) |
| - Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); |
| - Str << "\n"; |
| -} |
| - |
| -void TargetDataX8664::lowerJumpTables() { |
| - switch (Ctx->getFlags().getOutFileType()) { |
| - case FT_Elf: { |
| - ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
| - for (const JumpTableData &JumpTable : *Ctx->getJumpTables()) |
| - // TODO(jpp): not 386. |
| - Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32); |
| - } break; |
| - case FT_Asm: |
| - // Already emitted from Cfg |
| - break; |
| - case FT_Iasm: { |
| - if (!BuildDefs::dump()) |
| - return; |
| - Ostream &Str = Ctx->getStrEmit(); |
| - for (const JumpTableData &JT : *Ctx->getJumpTables()) { |
| - Str << "\t.section\t.rodata." << JT.getFunctionName() |
| - << "$jumptable,\"a\",@progbits\n"; |
| - Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; |
| - Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":"; |
| - |
| - // On X8664 ILP32 pointers are 32-bit hence the use of .long |
| - for (intptr_t TargetOffset : JT.getTargetOffsets()) |
| - Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset; |
| - Str << "\n"; |
| - } |
| - } break; |
| - } |
| -} |
| - |
| void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars, |
| const IceString &SectionSuffix) { |
| switch (Ctx->getFlags().getOutFileType()) { |