Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(467)

Side by Side Diff: src/IceTargetLoweringX8664.cpp

Issue 1266673003: Subzero. Implements x86-64 lowerCall. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: git pull && addresses comments && git format Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8664.h ('k') | src/IceTargetLoweringX8664Traits.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the TargetLoweringX8664 class, which 11 /// This file implements the TargetLoweringX8664 class, which
12 /// consists almost entirely of the lowering sequence for each 12 /// consists almost entirely of the lowering sequence for each
13 /// high-level instruction. 13 /// high-level instruction.
14 /// 14 ///
15 //===----------------------------------------------------------------------===// 15 //===----------------------------------------------------------------------===//
16 16
17 #include "IceTargetLoweringX8664.h" 17 #include "IceTargetLoweringX8664.h"
18 18
19 #include "IceTargetLoweringX8664Traits.h" 19 #include "IceTargetLoweringX8664Traits.h"
20 #include "IceTargetLoweringX86Base.h" 20 #include "IceTargetLoweringX86Base.h"
21 21
22 namespace Ice { 22 namespace Ice {
23 23
24 //------------------------------------------------------------------------------
25 // ______ ______ ______ __ ______ ______
26 // /\__ _\ /\ == \ /\ __ \ /\ \ /\__ _\ /\ ___\
27 // \/_/\ \/ \ \ __< \ \ __ \ \ \ \ \/_/\ \/ \ \___ \
28 // \ \_\ \ \_\ \_\ \ \_\ \_\ \ \_\ \ \_\ \/\_____\
29 // \/_/ \/_/ /_/ \/_/\/_/ \/_/ \/_/ \/_____/
30 //
31 //------------------------------------------------------------------------------
24 namespace X86Internal { 32 namespace X86Internal {
25 const MachineTraits<TargetX8664>::TableFcmpType 33 const MachineTraits<TargetX8664>::TableFcmpType
26 MachineTraits<TargetX8664>::TableFcmp[] = { 34 MachineTraits<TargetX8664>::TableFcmp[] = {
27 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ 35 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
28 { \ 36 { \
29 dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV, \ 37 dflt, swapS, X8664::Traits::Cond::C1, X8664::Traits::Cond::C2, swapV, \
30 X8664::Traits::Cond::pred \ 38 X8664::Traits::Cond::pred \
31 } \ 39 } \
32 , 40 ,
33 FCMPX8664_TABLE 41 FCMPX8664_TABLE
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 }; 82 };
75 83
76 const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize = 84 const size_t MachineTraits<TargetX8664>::TableTypeX8664AttributesSize =
77 llvm::array_lengthof(TableTypeX8664Attributes); 85 llvm::array_lengthof(TableTypeX8664Attributes);
78 86
79 const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16; 87 const uint32_t MachineTraits<TargetX8664>::X86_STACK_ALIGNMENT_BYTES = 16;
80 const char *MachineTraits<TargetX8664>::TargetName = "X8664"; 88 const char *MachineTraits<TargetX8664>::TargetName = "X8664";
81 89
82 } // end of namespace X86Internal 90 } // end of namespace X86Internal
83 91
92 //------------------------------------------------------------------------------
93 // __ ______ __ __ ______ ______ __ __ __ ______
94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
98 //
99 //------------------------------------------------------------------------------
100 namespace {
101 static inline TargetX8664::Traits::RegisterSet::AllRegisters
102 getRegisterForXmmArgNum(uint32_t ArgNum) {
103 assert(ArgNum < TargetX8664::Traits::X86_MAX_XMM_ARGS);
104 return static_cast<TargetX8664::Traits::RegisterSet::AllRegisters>(
105 TargetX8664::Traits::RegisterSet::Reg_xmm0 + ArgNum);
106 }
107
108 static inline TargetX8664::Traits::RegisterSet::AllRegisters
109 getRegisterForGprArgNum(uint32_t ArgNum) {
110 assert(ArgNum < TargetX8664::Traits::X86_MAX_GPR_ARGS);
111 static const TargetX8664::Traits::RegisterSet::AllRegisters GprForArgNum[] = {
112 TargetX8664::Traits::RegisterSet::Reg_edi,
113 TargetX8664::Traits::RegisterSet::Reg_esi,
114 TargetX8664::Traits::RegisterSet::Reg_edx,
115 TargetX8664::Traits::RegisterSet::Reg_ecx,
116 TargetX8664::Traits::RegisterSet::Reg_r8d,
117 TargetX8664::Traits::RegisterSet::Reg_r9d,
118 };
119 static_assert(llvm::array_lengthof(GprForArgNum) ==
120 TargetX8664::TargetX8664::Traits::X86_MAX_GPR_ARGS,
121 "Mismatch between MAX_GPR_ARGS and GprForArgNum.");
122 return GprForArgNum[ArgNum];
123 }
124
125 // constexprMax returns a (constexpr) max(S0, S1), and it is used for defining
126 // OperandList in lowerCall. std::max() was supposed to work, but it doesn't.
127 constexpr SizeT constexprMax(SizeT S0, SizeT S1) { return S0 < S1 ? S1 : S0; }
128
129 } // end of anonymous namespace
130
131 void TargetX8664::lowerCall(const InstCall *Instr) {
132 // x86-64 calling convention:
133 //
134 // * At the point before the call, the stack must be aligned to 16
135 // bytes.
136 //
137 // * The first eight arguments of vector/fp type, regardless of their
138 // position relative to the other arguments in the argument list, are
139 // placed in registers %xmm0 - %xmm7.
140 //
141 // * The first six arguments of integer types, regardless of their
142 // position relative to the other arguments in the argument list, are
143 // placed in registers %rdi, %rsi, %rdx, %rcx, %r8, and %r9.
144 //
145 // * Other arguments are pushed onto the stack in right-to-left order,
146 // such that the left-most argument ends up on the top of the stack at
147 // the lowest memory address.
148 //
149 // * Stack arguments of vector type are aligned to start at the next
150 // highest multiple of 16 bytes. Other stack arguments are aligned to
151 // 8 bytes.
152 //
153 // This intends to match the section "Function Calling Sequence" of the
154 // document "System V Application Binary Interface."
155 NeedsStackAlignment = true;
156
157 using OperandList =
158 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
159 Traits::X86_MAX_GPR_ARGS)>;
160 OperandList XmmArgs;
161 OperandList GprArgs;
162 OperandList StackArgs, StackArgLocations;
163 uint32_t ParameterAreaSizeBytes = 0;
164
165 // Classify each argument operand according to the location where the
166 // argument is passed.
167 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
168 Operand *Arg = Instr->getArg(i);
169 Type Ty = Arg->getType();
170 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
171 assert(typeWidthInBytes(Ty) >= 4);
172 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
173 XmmArgs.push_back(Arg);
174 } else if (isScalarFloatingType(Ty) &&
175 XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
176 XmmArgs.push_back(Arg);
177 } else if (isScalarIntegerType(Ty) &&
178 GprArgs.size() < Traits::X86_MAX_GPR_ARGS) {
179 GprArgs.push_back(Arg);
180 } else {
181 StackArgs.push_back(Arg);
182 if (isVectorType(Arg->getType())) {
183 ParameterAreaSizeBytes =
184 Traits::applyStackAlignment(ParameterAreaSizeBytes);
185 }
186 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
187 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
188 StackArgLocations.push_back(
189 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
190 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
191 }
192 }
193
194 // Adjust the parameter area so that the stack is aligned. It is
195 // assumed that the stack is already aligned at the start of the
196 // calling sequence.
197 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
198
199 // Subtract the appropriate amount for the argument area. This also
200 // takes care of setting the stack adjustment during emission.
201 //
202 // TODO: If for some reason the call instruction gets dead-code
203 // eliminated after lowering, we would need to ensure that the
204 // pre-call and the post-call esp adjustment get eliminated as well.
205 if (ParameterAreaSizeBytes) {
206 _adjust_stack(ParameterAreaSizeBytes);
207 }
208
209 // Copy arguments that are passed on the stack to the appropriate
210 // stack locations.
211 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
212 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
213 }
214
215 // Copy arguments to be passed in registers to the appropriate
216 // registers.
217 // TODO: Investigate the impact of lowering arguments passed in
218 // registers after lowering stack arguments as opposed to the other
219 // way around. Lowering register arguments after stack arguments may
220 // reduce register pressure. On the other hand, lowering register
221 // arguments first (before stack arguments) may result in more compact
222 // code, as the memory operand displacements may end up being smaller
223 // before any stack adjustment is done.
224 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
225 Variable *Reg = legalizeToReg(XmmArgs[i], getRegisterForXmmArgNum(i));
226 // Generate a FakeUse of register arguments so that they do not get
227 // dead code eliminated as a result of the FakeKill of scratch
228 // registers after the call.
229 Context.insert(InstFakeUse::create(Func, Reg));
230 }
231
232 for (SizeT i = 0, NumGprArgs = GprArgs.size(); i < NumGprArgs; ++i) {
233 Variable *Reg = legalizeToReg(GprArgs[i], getRegisterForGprArgNum(i));
234 Context.insert(InstFakeUse::create(Func, Reg));
235 }
236
237 // Generate the call instruction. Assign its result to a temporary
238 // with high register allocation weight.
239 Variable *Dest = Instr->getDest();
240 // ReturnReg doubles as ReturnRegLo as necessary.
241 Variable *ReturnReg = nullptr;
242 Variable *ReturnRegHi = nullptr;
243 if (Dest) {
244 switch (Dest->getType()) {
245 case IceType_NUM:
246 llvm_unreachable("Invalid Call dest type");
247 break;
248 case IceType_void:
249 break;
250 case IceType_i1:
251 case IceType_i8:
252 case IceType_i16:
253 case IceType_i32:
254 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
255 break;
256 case IceType_i64:
257 // TODO(jpp): return i64 in a GPR.
258 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
259 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
260 break;
261 case IceType_f32:
262 case IceType_f64:
263 case IceType_v4i1:
264 case IceType_v8i1:
265 case IceType_v16i1:
266 case IceType_v16i8:
267 case IceType_v8i16:
268 case IceType_v4i32:
269 case IceType_v4f32:
270 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
271 break;
272 }
273 }
274
275 Operand *CallTarget = legalize(Instr->getCallTarget());
276 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
277 if (NeedSandboxing) {
278 if (llvm::isa<Constant>(CallTarget)) {
279 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
280 } else {
281 Variable *CallTargetVar = nullptr;
282 _mov(CallTargetVar, CallTarget);
283 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
284 const SizeT BundleSize =
285 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
286 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
287 CallTarget = CallTargetVar;
288 }
289 }
290 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
291 Context.insert(NewCall);
292 if (NeedSandboxing)
293 _bundle_unlock();
294 if (ReturnRegHi)
295 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
296
297 // Add the appropriate offset to esp. The call instruction takes care
298 // of resetting the stack offset during emission.
299 if (ParameterAreaSizeBytes) {
300 Variable *Esp =
301 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
302 _add(Esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
303 }
304
305 // Insert a register-kill pseudo instruction.
306 Context.insert(InstFakeKill::create(Func, NewCall));
307
308 // Generate a FakeUse to keep the call live if necessary.
309 if (Instr->hasSideEffects() && ReturnReg) {
310 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
311 Context.insert(FakeUse);
312 }
313
314 if (!Dest)
315 return;
316
317 assert(ReturnReg && "x86-64 always returns value on registers.");
318
319 // Assign the result of the call to Dest.
320 if (ReturnRegHi) {
321 assert(Dest->getType() == IceType_i64);
322 split64(Dest);
323 Variable *DestLo = Dest->getLo();
324 Variable *DestHi = Dest->getHi();
325 _mov(DestLo, ReturnReg);
326 _mov(DestHi, ReturnRegHi);
327 return;
328 }
329
330 assert(Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64 ||
331 Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
332 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
333 isVectorType(Dest->getType()));
334
335 if (isScalarFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
336 _movp(Dest, ReturnReg);
337 } else {
338 _mov(Dest, ReturnReg);
339 }
340 }
341
342 void TargetDataX8664::lowerJumpTables() {
343 switch (Ctx->getFlags().getOutFileType()) {
344 case FT_Elf: {
345 ELFObjectWriter *Writer = Ctx->getObjectWriter();
346 for (const JumpTableData &JumpTable : Ctx->getJumpTables())
347 // TODO(jpp): not 386.
348 Writer->writeJumpTable(JumpTable, llvm::ELF::R_386_32);
349 } break;
350 case FT_Asm:
351 // Already emitted from Cfg
352 break;
353 case FT_Iasm: {
354 if (!BuildDefs::dump())
355 return;
356 Ostream &Str = Ctx->getStrEmit();
357 for (const JumpTableData &JT : Ctx->getJumpTables()) {
358 Str << "\t.section\t.rodata." << JT.getFunctionName()
359 << "$jumptable,\"a\",@progbits\n";
360 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
361 Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
362
363 // On X8664 ILP32 pointers are 32-bit hence the use of .long
364 for (intptr_t TargetOffset : JT.getTargetOffsets())
365 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
366 Str << "\n";
367 }
368 } break;
369 }
370 }
371
84 namespace { 372 namespace {
85 template <typename T> struct PoolTypeConverter {}; 373 template <typename T> struct PoolTypeConverter {};
86 374
87 template <> struct PoolTypeConverter<float> { 375 template <> struct PoolTypeConverter<float> {
88 typedef uint32_t PrimitiveIntType; 376 typedef uint32_t PrimitiveIntType;
89 typedef ConstantFloat IceType; 377 typedef ConstantFloat IceType;
90 static const Type Ty = IceType_f32; 378 static const Type Ty = IceType_f32;
91 static const char *TypeName; 379 static const char *TypeName;
92 static const char *AsmTag; 380 static const char *AsmTag;
93 static const char *PrintfString; 381 static const char *PrintfString;
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 << "$jumptable,\"a\",@progbits\n"; 517 << "$jumptable,\"a\",@progbits\n";
230 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n"; 518 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
231 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":"; 519 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
232 520
233 // On X8664 ILP32 pointers are 32-bit hence the use of .long 521 // On X8664 ILP32 pointers are 32-bit hence the use of .long
234 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I) 522 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
235 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName(); 523 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
236 Str << "\n"; 524 Str << "\n";
237 } 525 }
238 526
239 void TargetDataX8664::lowerJumpTables() {
240 switch (Ctx->getFlags().getOutFileType()) {
241 case FT_Elf: {
242 ELFObjectWriter *Writer = Ctx->getObjectWriter();
243 for (const JumpTableData &JT : Ctx->getJumpTables())
244 // TODO(jpp): not 386.
245 Writer->writeJumpTable(JT, llvm::ELF::R_386_32);
246 } break;
247 case FT_Asm:
248 // Already emitted from Cfg
249 break;
250 case FT_Iasm: {
251 if (!BuildDefs::dump())
252 return;
253 Ostream &Str = Ctx->getStrEmit();
254 for (const JumpTableData &JT : Ctx->getJumpTables()) {
255 Str << "\t.section\t.rodata." << JT.getFunctionName()
256 << "$jumptable,\"a\",@progbits\n";
257 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
258 Str << InstJumpTable::makeName(JT.getFunctionName(), JT.getId()) << ":";
259
260 // On X8664 ILP32 pointers are 32-bit hence the use of .long
261 for (intptr_t TargetOffset : JT.getTargetOffsets())
262 Str << "\n\t.long\t" << JT.getFunctionName() << "+" << TargetOffset;
263 Str << "\n";
264 }
265 } break;
266 }
267 }
268
269 void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars, 527 void TargetDataX8664::lowerGlobals(const VariableDeclarationList &Vars,
270 const IceString &SectionSuffix) { 528 const IceString &SectionSuffix) {
271 switch (Ctx->getFlags().getOutFileType()) { 529 switch (Ctx->getFlags().getOutFileType()) {
272 case FT_Elf: { 530 case FT_Elf: {
273 ELFObjectWriter *Writer = Ctx->getObjectWriter(); 531 ELFObjectWriter *Writer = Ctx->getObjectWriter();
274 // TODO(jpp): not 386. 532 // TODO(jpp): not 386.
275 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix); 533 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
276 } break; 534 } break;
277 case FT_Asm: 535 case FT_Asm:
278 case FT_Iasm: { 536 case FT_Iasm: {
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 // entries in case the high-level table has extra entries. 647 // entries in case the high-level table has extra entries.
390 #define X(tag, sizeLog2, align, elts, elty, str) \ 648 #define X(tag, sizeLog2, align, elts, elty, str) \
391 static_assert(_table1_##tag == _table2_##tag, \ 649 static_assert(_table1_##tag == _table2_##tag, \
392 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); 650 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
393 ICETYPE_TABLE 651 ICETYPE_TABLE
394 #undef X 652 #undef X
395 } // end of namespace dummy3 653 } // end of namespace dummy3
396 } // end of anonymous namespace 654 } // end of anonymous namespace
397 655
398 } // end of namespace Ice 656 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8664.h ('k') | src/IceTargetLoweringX8664Traits.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698