src/IceTargetLoweringX8664.cpp - Issue 1266673003: Subzero. Implements x86-64 lowerCall.

Side by Side Diff: src/IceTargetLoweringX8664.cpp

Issue 1266673003: Subzero. Implements x86-64 lowerCall. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: git pull && addresses comments && git format Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// This file implements the TargetLoweringX8664 class, which	11 /// This file implements the TargetLoweringX8664 class, which

12 /// consists almost entirely of the lowering sequence for each	12 /// consists almost entirely of the lowering sequence for each

13 /// high-level instruction.	13 /// high-level instruction.

14 ///	14 ///

15 //===----------------------------------------------------------------------===//	15 //===----------------------------------------------------------------------===//

16	16

17 #include "IceTargetLoweringX8664.h"	17 #include "IceTargetLoweringX8664.h"

18	18

19 #include "IceTargetLoweringX8664Traits.h"	19 #include "IceTargetLoweringX8664Traits.h"

20 #include "IceTargetLoweringX86Base.h"	20 #include "IceTargetLoweringX86Base.h"

21	21

22 namespace Ice {	22 namespace Ice {

23	23

	24 //------------------------------------------------------------------------------

	25 // ______ ______ ______ __ ______ ______

	26 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\

	27 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \

	28 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\

	29 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/

	30 //

	31 //------------------------------------------------------------------------------

24 namespace X86Internal {	32 namespace X86Internal {

25 const MachineTraits<TargetX8664>::TableFcmpType	33 const MachineTraits<TargetX8664>::TableFcmpType

26 MachineTraits<TargetX8664>::TableFcmp[] = {	34 MachineTraits<TargetX8664>::TableFcmp[] = {

27 #define X(val, dflt, swapS, C1, C2, swapV, pred) \	35 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

28 { \	36 { \

29 dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV, \	37 dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV, \

30 X8664::Traits::Cond::pred \	38 X8664::Traits::Cond::pred \

31 } \	39 } \

32 ,	40 ,

33 FCMPX8664_TABLE	41 FCMPX8664_TABLE

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
74 };	82 };

75	83

76 const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize =	84 const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize =

77 llvm::array_lengthof(TableTypeX8664Attributes);	85 llvm::array_lengthof(TableTypeX8664Attributes);

78	86

79 const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16;	87 const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16;

80 const char *MachineTraits<TargetX8664>::TargetName = "X8664";	88 const char *MachineTraits<TargetX8664>::TargetName = "X8664";

81	89

82 } // end of namespace X86Internal	90 } // end of namespace X86Internal

83	91

	92 //------------------------------------------------------------------------------

	93 // __ ______ __ __ ______ ______ __ __ __ ______

	94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

	95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

	96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

	97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

	98 //

	99 //------------------------------------------------------------------------------

	100 namespace {

	101 static inline TargetX8664::Traits::RegisterSet::AllRegisters

	102 getRegisterForXmmArgNum(uint32_t ArgNum) {

	103 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);

	104 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(

	105 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);

	106 }

	107

	108 static inline TargetX8664::Traits::RegisterSet::AllRegisters

	109 getRegisterForGprArgNum(uint32_t ArgNum) {

	110 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);

	111 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {

	112 TargetX8664::Traits::RegisterSet::Reg_edi,

	113 TargetX8664::Traits::RegisterSet::Reg_esi,

	114 TargetX8664::Traits::RegisterSet::Reg_edx,

	115 TargetX8664::Traits::RegisterSet::Reg_ecx,

	116 TargetX8664::Traits::RegisterSet::Reg_r8d,

	117 TargetX8664::Traits::RegisterSet::Reg_r9d,

	118 };

	119 static_assert(llvm::array_lengthof(GprForArgNum) ==

	120 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,

	121 "Mismatch between MAX_GPR_ARGS and GprForArgNum.");

	122 return GprForArgNum[ArgNum];

	123 }

	124

	125 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining

	126 // OperandList in lowerCall. std::max() was supposed to work, but it doesn't.

	127 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }

	128

	129 } // end of anonymous namespace

	130

	131 void TargetX8664::lowerCall(const InstCall *Instr) {

	132 // x86-64 calling convention:

	133 //

	134 // * At the point before the call, the stack must be aligned to 16

	135 // bytes.

	136 //

	137 // * The first eight arguments of vector/fp type, regardless of their

	138 // position relative to the other arguments in the argument list, are

	139 // placed in registers %xmm0 - %xmm7.

	140 //

	141 // * The first six arguments of integer types, regardless of their

	142 // position relative to the other arguments in the argument list, are

	143 // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.

	144 //

	145 // * Other arguments are pushed onto the stack in right-to-left order,

	146 // such that the left-most argument ends up on the top of the stack at

	147 // the lowest memory address.

	148 //

	149 // * Stack arguments of vector type are aligned to start at the next

	150 // highest multiple of 16 bytes. Other stack arguments are aligned to

	151 // 8 bytes.

	152 //

	153 // This intends to match the section "Function Calling Sequence" of the

	154 // document "System V Application Binary Interface."

	155 NeedsStackAlignment = true;

	156

	157 using OperandList =

	158 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,

	159 Traits::X86_MAX_GPR_ARGS)>;

	160 OperandList XmmArgs;

	161 OperandList GprArgs;

	162 OperandList StackArgs, StackArgLocations;

	163 uint32_t ParameterAreaSizeBytes = 0;

	164

	165 // Classify each argument operand according to the location where the

	166 // argument is passed.

	167 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

	168 Operand *Arg = Instr->getArg(i);

	169 Type Ty = Arg->getType();

	170 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

	171 assert(typeWidthInBytes(Ty) >= 4);

	172 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

	173 XmmArgs.push_back(Arg);

	174 } else if (isScalarFloatingType(Ty) &&

	175 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

	176 XmmArgs.push_back(Arg);

	177 } else if (isScalarIntegerType(Ty) &&

	178 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {

	179 GprArgs.push_back(Arg);

	180 } else {

	181 StackArgs.push_back(Arg);

	182 if (isVectorType(Arg->getType())) {

	183 ParameterAreaSizeBytes =

	184 Traits::applyStackAlignment(ParameterAreaSizeBytes);

	185 }

	186 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	187 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

	188 StackArgLocations.push_back(

	189 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

	190 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

	191 }

	192 }

	193

	194 // Adjust the parameter area so that the stack is aligned. It is

	195 // assumed that the stack is already aligned at the start of the

	196 // calling sequence.

	197 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

	198

	199 // Subtract the appropriate amount for the argument area. This also

	200 // takes care of setting the stack adjustment during emission.

	201 //

	202 // TODO: If for some reason the call instruction gets dead-code

	203 // eliminated after lowering, we would need to ensure that the

	204 // pre-call and the post-call esp adjustment get eliminated as well.

	205 if (ParameterAreaSizeBytes) {

	206 _adjust_stack(ParameterAreaSizeBytes);

	207 }

	208

	209 // Copy arguments that are passed on the stack to the appropriate

	210 // stack locations.

	211 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

	212 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

	213 }

	214

	215 // Copy arguments to be passed in registers to the appropriate

	216 // registers.

	217 // TODO: Investigate the impact of lowering arguments passed in

	218 // registers after lowering stack arguments as opposed to the other

	219 // way around. Lowering register arguments after stack arguments may

	220 // reduce register pressure. On the other hand, lowering register

	221 // arguments first (before stack arguments) may result in more compact

	222 // code, as the memory operand displacements may end up being smaller

	223 // before any stack adjustment is done.

	224 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

	225 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));

	226 // Generate a FakeUse of register arguments so that they do not get

	227 // dead code eliminated as a result of the FakeKill of scratch

	228 // registers after the call.

	229 Context.insert(InstFakeUse::create(Func, Reg));

	230 }

	231

	232 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {

	233 Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));

	234 Context.insert(InstFakeUse::create(Func, Reg));

	235 }

	236

	237 // Generate the call instruction. Assign its result to a temporary

	238 // with high register allocation weight.

	239 Variable *Dest = Instr->getDest();

	240 // ReturnReg doubles as ReturnRegLo as necessary.

	241 Variable *ReturnReg = nullptr;

	242 Variable *ReturnRegHi = nullptr;

	243 if (Dest) {

	244 switch (Dest->getType()) {

	245 case IceType_NUM:

	246 llvm_unreachable("Invalid Call dest type");

	247 break;

	248 case IceType_void:

	249 break;

	250 case IceType_i1:

	251 case IceType_i8:

	252 case IceType_i16:

	253 case IceType_i32:

	254 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

	255 break;

	256 case IceType_i64:

	257 // TODO(jpp): return i64 in a GPR.

	258 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

	259 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

	260 break;

	261 case IceType_f32:

	262 case IceType_f64:

	263 case IceType_v4i1:

	264 case IceType_v8i1:

	265 case IceType_v16i1:

	266 case IceType_v16i8:

	267 case IceType_v8i16:

	268 case IceType_v4i32:

	269 case IceType_v4f32:

	270 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

	271 break;

	272 }

	273 }

	274

	275 Operand *CallTarget = legalize(Instr->getCallTarget());

	276 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

	277 if (NeedSandboxing) {

	278 if (llvm::isa<Constant>(CallTarget)) {

	279 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

	280 } else {

	281 Variable *CallTargetVar = nullptr;

	282 _mov(CallTargetVar, CallTarget);

	283 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

	284 const SizeT BundleSize =

	285 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

	286 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

	287 CallTarget = CallTargetVar;

	288 }

	289 }

	290 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

	291 Context.insert(NewCall);

	292 if (NeedSandboxing)

	293 _bundle_unlock();

	294 if (ReturnRegHi)

	295 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

	296

	297 // Add the appropriate offset to esp. The call instruction takes care

	298 // of resetting the stack offset during emission.

	299 if (ParameterAreaSizeBytes) {

	300 Variable *Esp =

	301 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	302 _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

	303 }

	304

	305 // Insert a register-kill pseudo instruction.

	306 Context.insert(InstFakeKill::create(Func, NewCall));

	307

	308 // Generate a FakeUse to keep the call live if necessary.

	309 if (Instr->hasSideEffects() && ReturnReg) {

	310 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

	311 Context.insert(FakeUse);

	312 }

	313

	314 if (!Dest)

	315 return;

	316

	317 assert(ReturnReg && "x86-64 always returns value on registers.");

	318

	319 // Assign the result of the call to Dest.

	320 if (ReturnRegHi) {

	321 assert(Dest->getType() == IceType_i64);

	322 split64(Dest);

	323 Variable *DestLo = Dest->getLo();

	324 Variable *DestHi = Dest->getHi();

	325 _mov(DestLo, ReturnReg);

	326 _mov(DestHi, ReturnRegHi);

	327 return;

	328 }

	329

	330 assert(Dest->getType() == IceType_f32 \|\| Dest->getType() == IceType_f64 \|\|

	331 Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|

	332 Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|

	333 isVectorType(Dest->getType()));

	334

	335 if (isScalarFloatingType(Dest->getType()) \|\| isVectorType(Dest->getType())) {

	336 _movp(Dest, ReturnReg);

	337 } else {

	338 _mov(Dest, ReturnReg);

	339 }

	340 }

	341

	342 void TargetDataX8664::lowerJumpTables() {

	343 switch (Ctx->getFlags().getOutFileType()) {

	344 case FT_Elf: {

	345 ELFObjectWriter *Writer = Ctx->getObjectWriter();

	346 for (const JumpTableData &JumpTable : Ctx->getJumpTables())

	347 // TODO(jpp): not 386.

	348 Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);

	349 } break;

	350 case FT_Asm:

	351 // Already emitted from Cfg

	352 break;

	353 case FT_Iasm: {

	354 if (!BuildDefs::dump())

	355 return;

	356 Ostream &Str = Ctx->getStrEmit();

	357 for (const JumpTableData &JT : Ctx->getJumpTables()) {

	358 Str << "\t.section\t.rodata." << JT.getFunctionName()

	359 << "$jumptable,\"a\",@progbits\n";

	360 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

	361 Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";

	362

	363 // On X8664 ILP32 pointers are 32-bit hence the use of .long

	364 for (intptr_t TargetOffset : JT.getTargetOffsets())

	365 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;

	366 Str << "\n";

	367 }

	368 } break;

	369 }

	370 }

	371

84 namespace {	372 namespace {

85 template <typename T> struct PoolTypeConverter {};	373 template <typename T> struct PoolTypeConverter {};

86	374

87 template <> struct PoolTypeConverter<float> {	375 template <> struct PoolTypeConverter<float> {

88 typedef uint32_t PrimitiveIntType;	376 typedef uint32_t PrimitiveIntType;

89 typedef ConstantFloat IceType;	377 typedef ConstantFloat IceType;

90 static const Type Ty = IceType_f32;	378 static const Type Ty = IceType_f32;

91 static const char *TypeName;	379 static const char *TypeName;

92 static const char *AsmTag;	380 static const char *AsmTag;

93 static const char *PrintfString;	381 static const char *PrintfString;

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
229 << "$jumptable,\"a\",@progbits\n";	517 << "$jumptable,\"a\",@progbits\n";

230 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";	518 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

231 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";	519 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

232	520

233 // On X8664 ILP32 pointers are 32-bit hence the use of .long	521 // On X8664 ILP32 pointers are 32-bit hence the use of .long

234 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)	522 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

235 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();	523 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

236 Str << "\n";	524 Str << "\n";

237 }	525 }

238	526

239 void TargetDataX8664::lowerJumpTables() {

240 switch (Ctx->getFlags().getOutFileType()) {

241 case FT_Elf: {

242 ELFObjectWriter *Writer = Ctx->getObjectWriter();

243 for (const JumpTableData &JT : Ctx->getJumpTables())

244 // TODO(jpp): not 386.

245 Writer->writeJumpTable(JT, llvm::ELF::R_386_32);

246 } break;

247 case FT_Asm:

248 // Already emitted from Cfg

249 break;

250 case FT_Iasm: {

251 if (!BuildDefs::dump())

252 return;

253 Ostream &Str = Ctx->getStrEmit();

254 for (const JumpTableData &JT : Ctx->getJumpTables()) {

255 Str << "\t.section\t.rodata." << JT.getFunctionName()

256 << "$jumptable,\"a\",@progbits\n";

257 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

258 Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";

259

260 // On X8664 ILP32 pointers are 32-bit hence the use of .long

261 for (intptr_t TargetOffset : JT.getTargetOffsets())

262 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;

263 Str << "\n";

264 }

265 } break;

266 }

267 }

268

269 void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,	527 void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,

270 const IceString &SectionSuffix) {	528 const IceString &SectionSuffix) {

271 switch (Ctx->getFlags().getOutFileType()) {	529 switch (Ctx->getFlags().getOutFileType()) {

272 case FT_Elf: {	530 case FT_Elf: {

273 ELFObjectWriter *Writer = Ctx->getObjectWriter();	531 ELFObjectWriter *Writer = Ctx->getObjectWriter();

274 // TODO(jpp): not 386.	532 // TODO(jpp): not 386.

275 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);	533 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);

276 } break;	534 } break;

277 case FT_Asm:	535 case FT_Asm:

278 case FT_Iasm: {	536 case FT_Iasm: {

(...skipping 110 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
389 // entries in case the high-level table has extra entries.	647 // entries in case the high-level table has extra entries.

390 #define X(tag, sizeLog2, align, elts, elty, str) \	648 #define X(tag, sizeLog2, align, elts, elty, str) \

391 static_assert(_table1_##tag == _table2_##tag, \	649 static_assert(_table1_##tag == _table2_##tag, \

392 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");	650 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");

393 ICETYPE_TABLE	651 ICETYPE_TABLE

394 #undef X	652 #undef X

395 } // end of namespace dummy3	653 } // end of namespace dummy3

396 } // end of anonymous namespace	654 } // end of anonymous namespace

397	655

398 } // end of namespace Ice	656 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8664.h ('k') | src/IceTargetLoweringX8664Traits.h » ('j') | no next file with comments »