Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(90)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
82 }; 82 };
83 83
84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize = 84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =
85 llvm::array_lengthof(TableTypeX8632Attributes); 85 llvm::array_lengthof(TableTypeX8632Attributes);
86 86
87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16; 87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
88 const char *MachineTraits<TargetX8632>::TargetName = "X8632"; 88 const char *MachineTraits<TargetX8632>::TargetName = "X8632";
89 89
90 } // end of namespace X86Internal 90 } // end of namespace X86Internal
91 91
92 //------------------------------------------------------------------------------
93 // __ ______ __ __ ______ ______ __ __ __ ______
94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
98 //
99 //------------------------------------------------------------------------------
100 void TargetX8632::lowerCall(const InstCall *Instr) {
101 // x86-32 calling convention:
102 //
103 // * At the point before the call, the stack must be aligned to 16
104 // bytes.
105 //
106 // * The first four arguments of vector type, regardless of their
107 // position relative to the other arguments in the argument list, are
108 // placed in registers xmm0 - xmm3.
109 //
110 // * Other arguments are pushed onto the stack in right-to-left order,
111 // such that the left-most argument ends up on the top of the stack at
112 // the lowest memory address.
113 //
114 // * Stack arguments of vector type are aligned to start at the next
115 // highest multiple of 16 bytes. Other stack arguments are aligned to
116 // 4 bytes.
117 //
118 // This intends to match the section "IA-32 Function Calling
119 // Convention" of the document "OS X ABI Function Call Guide" by
120 // Apple.
121 NeedsStackAlignment = true;
122
123 typedef std::vector<Operand *> OperandList;
124 OperandList XmmArgs;
125 OperandList StackArgs, StackArgLocations;
126 uint32_t ParameterAreaSizeBytes = 0;
127
128 // Classify each argument operand according to the location where the
129 // argument is passed.
130 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
131 Operand *Arg = Instr->getArg(i);
132 Type Ty = Arg->getType();
133 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
134 assert(typeWidthInBytes(Ty) >= 4);
135 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
136 XmmArgs.push_back(Arg);
137 } else {
138 StackArgs.push_back(Arg);
139 if (isVectorType(Arg->getType())) {
140 ParameterAreaSizeBytes =
141 Traits::applyStackAlignment(ParameterAreaSizeBytes);
142 }
143 Variable *esp =
144 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
145 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
146 StackArgLocations.push_back(
147 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
148 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
149 }
150 }
151
152 // Adjust the parameter area so that the stack is aligned. It is
153 // assumed that the stack is already aligned at the start of the
154 // calling sequence.
155 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
156
157 // Subtract the appropriate amount for the argument area. This also
158 // takes care of setting the stack adjustment during emission.
159 //
160 // TODO: If for some reason the call instruction gets dead-code
161 // eliminated after lowering, we would need to ensure that the
162 // pre-call and the post-call esp adjustment get eliminated as well.
163 if (ParameterAreaSizeBytes) {
164 _adjust_stack(ParameterAreaSizeBytes);
165 }
166
167 // Copy arguments that are passed on the stack to the appropriate
168 // stack locations.
169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
171 }
172
173 // Copy arguments to be passed in registers to the appropriate
174 // registers.
175 // TODO: Investigate the impact of lowering arguments passed in
176 // registers after lowering stack arguments as opposed to the other
177 // way around. Lowering register arguments after stack arguments may
178 // reduce register pressure. On the other hand, lowering register
179 // arguments first (before stack arguments) may result in more compact
180 // code, as the memory operand displacements may end up being smaller
181 // before any stack adjustment is done.
182 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
183 Variable *Reg =
184 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
185 // Generate a FakeUse of register arguments so that they do not get
186 // dead code eliminated as a result of the FakeKill of scratch
187 // registers after the call.
188 Context.insert(InstFakeUse::create(Func, Reg));
189 }
190 // Generate the call instruction. Assign its result to a temporary
191 // with high register allocation weight.
192 Variable *Dest = Instr->getDest();
193 // ReturnReg doubles as ReturnRegLo as necessary.
194 Variable *ReturnReg = nullptr;
195 Variable *ReturnRegHi = nullptr;
196 if (Dest) {
197 switch (Dest->getType()) {
198 case IceType_NUM:
199 case IceType_void:
200 llvm::report_fatal_error("Invalid Call dest type");
201 break;
202 case IceType_i1:
203 case IceType_i8:
204 case IceType_i16:
205 case IceType_i32:
206 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
207 break;
208 case IceType_i64:
209 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
210 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
211 break;
212 case IceType_f32:
213 case IceType_f64:
214 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
215 // the fstp instruction.
216 break;
217 case IceType_v4i1:
218 case IceType_v8i1:
219 case IceType_v16i1:
220 case IceType_v16i8:
221 case IceType_v8i16:
222 case IceType_v4i32:
223 case IceType_v4f32:
224 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
225 break;
226 }
227 }
228 Operand *CallTarget = legalize(Instr->getCallTarget());
229 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
230 if (NeedSandboxing) {
231 if (llvm::isa<Constant>(CallTarget)) {
232 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
233 } else {
234 Variable *CallTargetVar = nullptr;
235 _mov(CallTargetVar, CallTarget);
236 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
237 const SizeT BundleSize =
238 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
239 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
240 CallTarget = CallTargetVar;
241 }
242 }
243 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
244 Context.insert(NewCall);
245 if (NeedSandboxing)
246 _bundle_unlock();
247 if (ReturnRegHi)
248 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
249
250 // Add the appropriate offset to esp. The call instruction takes care
251 // of resetting the stack offset during emission.
252 if (ParameterAreaSizeBytes) {
253 Variable *esp =
254 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
255 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
256 }
257
258 // Insert a register-kill pseudo instruction.
259 Context.insert(InstFakeKill::create(Func, NewCall));
260
261 // Generate a FakeUse to keep the call live if necessary.
262 if (Instr->hasSideEffects() && ReturnReg) {
263 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
264 Context.insert(FakeUse);
265 }
266
267 if (!Dest)
268 return;
269
270 // Assign the result of the call to Dest.
271 if (ReturnReg) {
272 if (ReturnRegHi) {
273 assert(Dest->getType() == IceType_i64);
274 split64(Dest);
275 Variable *DestLo = Dest->getLo();
276 Variable *DestHi = Dest->getHi();
277 _mov(DestLo, ReturnReg);
278 _mov(DestHi, ReturnRegHi);
279 } else {
280 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
281 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
282 isVectorType(Dest->getType()));
283 if (isVectorType(Dest->getType())) {
284 _movp(Dest, ReturnReg);
285 } else {
286 _mov(Dest, ReturnReg);
287 }
288 }
289 } else if (isScalarFloatingType(Dest->getType())) {
290 // Special treatment for an FP function which returns its result in
291 // st(0).
292 // If Dest ends up being a physical xmm register, the fstp emit code
293 // will route st(0) through a temporary stack slot.
294 _fstp(Dest);
295 // Create a fake use of Dest in case it actually isn't used,
296 // because st(0) still needs to be popped.
297 Context.insert(InstFakeUse::create(Func, Dest));
298 }
299 }
300
301 void TargetX8632::lowerArguments() {
302 VarList &Args = Func->getArgs();
303 // The first four arguments of vector type, regardless of their
304 // position relative to the other arguments in the argument list, are
305 // passed in registers xmm0 - xmm3.
306 unsigned NumXmmArgs = 0;
307
308 Context.init(Func->getEntryNode());
309 Context.setInsertPoint(Context.getCur());
310
311 for (SizeT I = 0, E = Args.size();
312 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
313 Variable *Arg = Args[I];
314 Type Ty = Arg->getType();
315 if (!isVectorType(Ty))
316 continue;
317 // Replace Arg in the argument list with the home register. Then
318 // generate an instruction in the prolog to copy the home register
319 // to the assigned location of Arg.
320 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
321 ++NumXmmArgs;
322 Variable *RegisterArg = Func->makeVariable(Ty);
323 if (BuildDefs::dump())
324 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
325 RegisterArg->setRegNum(RegNum);
326 RegisterArg->setIsArg();
327 Arg->setIsArg(false);
328
329 Args[I] = RegisterArg;
330 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
331 }
332 }
333
334 void TargetX8632::lowerRet(const InstRet *Inst) {
335 Variable *Reg = nullptr;
336 if (Inst->hasRetValue()) {
337 Operand *Src0 = legalize(Inst->getRetValue());
338 // TODO(jpp): this is not needed.
339 if (Src0->getType() == IceType_i64) {
340 Variable *eax =
341 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
342 Variable *edx =
343 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
344 Reg = eax;
345 Context.insert(InstFakeUse::create(Func, edx));
346 } else if (isScalarFloatingType(Src0->getType())) {
347 _fld(Src0);
348 } else if (isVectorType(Src0->getType())) {
349 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
350 } else {
351 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
352 }
353 }
354 // Add a ret instruction even if sandboxing is enabled, because
355 // addEpilog explicitly looks for a ret instruction as a marker for
356 // where to insert the frame removal instructions.
357 _ret(Reg);
358 // Add a fake use of esp to make sure esp stays alive for the entire
359 // function. Otherwise post-call esp adjustments get dead-code
360 // eliminated. TODO: Are there more places where the fake use
361 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
362 // have a ret instruction.
363 Variable *esp =
364 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
365 Context.insert(InstFakeUse::create(Func, esp));
366 }
367
368 void TargetX8632::addProlog(CfgNode *Node) {
369 // Stack frame layout:
370 //
371 // +------------------------+
372 // | 1. return address |
373 // +------------------------+
374 // | 2. preserved registers |
375 // +------------------------+
376 // | 3. padding |
377 // +------------------------+
378 // | 4. global spill area |
379 // +------------------------+
380 // | 5. padding |
381 // +------------------------+
382 // | 6. local spill area |
383 // +------------------------+
384 // | 7. padding |
385 // +------------------------+
386 // | 8. allocas |
387 // +------------------------+
388 //
389 // The following variables record the size in bytes of the given areas:
390 // * X86_RET_IP_SIZE_BYTES: area 1
391 // * PreservedRegsSizeBytes: area 2
392 // * SpillAreaPaddingBytes: area 3
393 // * GlobalsSize: area 4
394 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
395 // * LocalsSpillAreaSize: area 6
396 // * SpillAreaSizeBytes: areas 3 - 7
397
398 // Determine stack frame offsets for each Variable without a
399 // register assignment. This can be done as one variable per stack
400 // slot. Or, do coalescing by running the register allocator again
401 // with an infinite set of registers (as a side effect, this gives
402 // variables a second chance at physical register assignment).
403 //
404 // A middle ground approach is to leverage sparsity and allocate one
405 // block of space on the frame for globals (variables with
406 // multi-block lifetime), and one block to share for locals
407 // (single-block lifetime).
408
409 Context.init(Node);
410 Context.setInsertPoint(Context.getCur());
411
412 llvm::SmallBitVector CalleeSaves =
413 getRegisterSet(RegSet_CalleeSave, RegSet_None);
414 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
415 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
416 size_t GlobalsSize = 0;
417 // If there is a separate locals area, this represents that area.
418 // Otherwise it counts any variable not counted by GlobalsSize.
419 SpillAreaSizeBytes = 0;
420 // If there is a separate locals area, this specifies the alignment
421 // for it.
422 uint32_t LocalsSlotsAlignmentBytes = 0;
423 // The entire spill locations area gets aligned to largest natural
424 // alignment of the variables that have a spill slot.
425 uint32_t SpillAreaAlignmentBytes = 0;
426 // A spill slot linked to a variable with a stack slot should reuse
427 // that stack slot.
428 std::function<bool(Variable *)> TargetVarHook =
429 [&VariablesLinkedToSpillSlots](Variable *Var) {
430 if (auto *SpillVar =
431 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
432 assert(Var->getWeight().isZero());
433 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
434 VariablesLinkedToSpillSlots.push_back(Var);
435 return true;
436 }
437 }
438 return false;
439 };
440
441 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
442 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
443 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
444 &LocalsSlotsAlignmentBytes, TargetVarHook);
445 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
446 SpillAreaSizeBytes += GlobalsSize;
447
448 // Add push instructions for preserved registers.
449 uint32_t NumCallee = 0;
450 size_t PreservedRegsSizeBytes = 0;
451 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
452 if (CalleeSaves[i] && RegsUsed[i]) {
453 ++NumCallee;
454 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
455 _push(getPhysicalRegister(i));
456 }
457 }
458 Ctx->statsUpdateRegistersSaved(NumCallee);
459
460 // Generate "push ebp; mov ebp, esp"
461 if (IsEbpBasedFrame) {
462 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
463 .count() == 0);
464 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
465 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
466 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
467 _push(ebp);
468 _mov(ebp, esp);
469 // Keep ebp live for late-stage liveness analysis
470 // (e.g. asm-verbose mode).
471 Context.insert(InstFakeUse::create(Func, ebp));
472 }
473
474 // Align the variables area. SpillAreaPaddingBytes is the size of
475 // the region after the preserved registers and before the spill areas.
476 // LocalsSlotsPaddingBytes is the amount of padding between the globals
477 // and locals area if they are separate.
478 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
479 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
480 uint32_t SpillAreaPaddingBytes = 0;
481 uint32_t LocalsSlotsPaddingBytes = 0;
482 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
483 SpillAreaAlignmentBytes, GlobalsSize,
484 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
485 &LocalsSlotsPaddingBytes);
486 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
487 uint32_t GlobalsAndSubsequentPaddingSize =
488 GlobalsSize + LocalsSlotsPaddingBytes;
489
490 // Align esp if necessary.
491 if (NeedsStackAlignment) {
492 uint32_t StackOffset =
493 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
494 uint32_t StackSize =
495 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
496 SpillAreaSizeBytes = StackSize - StackOffset;
497 }
498
499 // Generate "sub esp, SpillAreaSizeBytes"
500 if (SpillAreaSizeBytes)
501 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
502 Ctx->getConstantInt32(SpillAreaSizeBytes));
503 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
504
505 resetStackAdjustment();
506
507 // Fill in stack offsets for stack args, and copy args into registers
508 // for those that were register-allocated. Args are pushed right to
509 // left, so Arg[0] is closest to the stack/frame pointer.
510 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
511 size_t BasicFrameOffset =
512 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
513 if (!IsEbpBasedFrame)
514 BasicFrameOffset += SpillAreaSizeBytes;
515
516 const VarList &Args = Func->getArgs();
517 size_t InArgsSizeBytes = 0;
518 unsigned NumXmmArgs = 0;
519 for (Variable *Arg : Args) {
520 // Skip arguments passed in registers.
521 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
522 ++NumXmmArgs;
523 continue;
524 }
525 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
526 }
527
528 // Fill in stack offsets for locals.
529 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
530 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
531 IsEbpBasedFrame);
532 // Assign stack offsets to variables that have been linked to spilled
533 // variables.
534 for (Variable *Var : VariablesLinkedToSpillSlots) {
535 Variable *Linked =
536 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
537 Var->setStackOffset(Linked->getStackOffset());
538 }
539 this->HasComputedFrame = true;
540
541 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
542 OstreamLocker L(Func->getContext());
543 Ostream &Str = Func->getContext()->getStrDump();
544
545 Str << "Stack layout:\n";
546 uint32_t EspAdjustmentPaddingSize =
547 SpillAreaSizeBytes - LocalsSpillAreaSize -
548 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
549 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
550 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
551 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
552 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
553 << " globals spill area = " << GlobalsSize << " bytes\n"
554 << " globals-locals spill areas intermediate padding = "
555 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
556 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
557 << " esp alignment padding = " << EspAdjustmentPaddingSize
558 << " bytes\n";
559
560 Str << "Stack details:\n"
561 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
562 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
563 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
564 << " bytes\n"
565 << " is ebp based = " << IsEbpBasedFrame << "\n";
566 }
567 }
568
569 void TargetX8632::addEpilog(CfgNode *Node) {
570 InstList &Insts = Node->getInsts();
571 InstList::reverse_iterator RI, E;
572 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
573 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
574 break;
575 }
576 if (RI == E)
577 return;
578
579 // Convert the reverse_iterator position into its corresponding
580 // (forward) iterator position.
581 InstList::iterator InsertPoint = RI.base();
582 --InsertPoint;
583 Context.init(Node);
584 Context.setInsertPoint(InsertPoint);
585
586 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
587 if (IsEbpBasedFrame) {
588 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
589 // For late-stage liveness analysis (e.g. asm-verbose mode),
590 // adding a fake use of esp before the assignment of esp=ebp keeps
591 // previous esp adjustments from being dead-code eliminated.
592 Context.insert(InstFakeUse::create(Func, esp));
593 _mov(esp, ebp);
594 _pop(ebp);
595 } else {
596 // add esp, SpillAreaSizeBytes
597 if (SpillAreaSizeBytes)
598 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
599 }
600
601 // Add pop instructions for preserved registers.
602 llvm::SmallBitVector CalleeSaves =
603 getRegisterSet(RegSet_CalleeSave, RegSet_None);
604 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
605 SizeT j = CalleeSaves.size() - i - 1;
606 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
607 continue;
608 if (CalleeSaves[j] && RegsUsed[j]) {
609 _pop(getPhysicalRegister(j));
610 }
611 }
612
613 if (!Ctx->getFlags().getUseSandboxing())
614 return;
615 // Change the original ret instruction into a sandboxed return sequence.
616 // t:ecx = pop
617 // bundle_lock
618 // and t, ~31
619 // jmp *t
620 // bundle_unlock
621 // FakeUse <original_ret_operand>
622 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
623 _pop(T_ecx);
624 lowerIndirectJump(T_ecx);
625 if (RI->getSrcSize()) {
626 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
627 Context.insert(InstFakeUse::create(Func, RetValue));
628 }
629 RI->setDeleted();
630 }
631
632 void TargetX8632::emitJumpTable(const Cfg *Func,
633 const InstJumpTable *JumpTable) const {
634 if (!BuildDefs::dump())
635 return;
636 Ostream &Str = Ctx->getStrEmit();
637 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
638 Str << "\t.section\t.rodata." << MangledName
639 << "$jumptable,\"a\",@progbits\n";
640 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
641 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
642
643 // On X8632 pointers are 32-bit hence the use of .long
644 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
645 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
646 Str << "\n";
647 }
648
92 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) 649 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
93 : TargetDataLowering(Ctx) {} 650 : TargetDataLowering(Ctx) {}
94 651
95 namespace { 652 namespace {
96 template <typename T> struct PoolTypeConverter {}; 653 template <typename T> struct PoolTypeConverter {};
97 654
98 template <> struct PoolTypeConverter<float> { 655 template <> struct PoolTypeConverter<float> {
99 typedef uint32_t PrimitiveIntType; 656 typedef uint32_t PrimitiveIntType;
100 typedef ConstantFloat IceType; 657 typedef ConstantFloat IceType;
101 static const Type Ty = IceType_f32; 658 static const Type Ty = IceType_f32;
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 static const Type Ty = IceType_i8; 709 static const Type Ty = IceType_i8;
153 static const char *TypeName; 710 static const char *TypeName;
154 static const char *AsmTag; 711 static const char *AsmTag;
155 static const char *PrintfString; 712 static const char *PrintfString;
156 }; 713 };
157 const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; 714 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
158 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; 715 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
159 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; 716 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
160 } // end of anonymous namespace 717 } // end of anonymous namespace
161 718
162 void TargetX8632::emitJumpTable(const Cfg *Func,
163 const InstJumpTable *JumpTable) const {
164 if (!BuildDefs::dump())
165 return;
166 Ostream &Str = Ctx->getStrEmit();
167 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
168 Str << "\t.section\t.rodata." << MangledName
169 << "$jumptable,\"a\",@progbits\n";
170 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
171 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
172
173 // On X8632 pointers are 32-bit hence the use of .long
174 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
175 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
176 Str << "\n";
177 }
178
179 template <typename T> 719 template <typename T>
180 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { 720 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
181 if (!BuildDefs::dump()) 721 if (!BuildDefs::dump())
182 return; 722 return;
183 Ostream &Str = Ctx->getStrEmit(); 723 Ostream &Str = Ctx->getStrEmit();
184 Type Ty = T::Ty; 724 Type Ty = T::Ty;
185 SizeT Align = typeAlignInBytes(Ty); 725 SizeT Align = typeAlignInBytes(Ty);
186 ConstantList Pool = Ctx->getConstantPool(Ty); 726 ConstantList Pool = Ctx->getConstantPool(Ty);
187 727
188 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align 728 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
400 // Repeat the static asserts with respect to the high-level table 940 // Repeat the static asserts with respect to the high-level table
401 // entries in case the high-level table has extra entries. 941 // entries in case the high-level table has extra entries.
402 #define X(tag, sizeLog2, align, elts, elty, str) \ 942 #define X(tag, sizeLog2, align, elts, elty, str) \
403 static_assert(_table1_##tag == _table2_##tag, \ 943 static_assert(_table1_##tag == _table2_##tag, \
404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); 944 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
405 ICETYPE_TABLE 945 ICETYPE_TABLE
406 #undef X 946 #undef X
407 } // end of namespace dummy3 947 } // end of namespace dummy3
408 } // end of anonymous namespace 948 } // end of anonymous namespace
409 949
410 //------------------------------------------------------------------------------
411 // __ ______ __ __ ______ ______ __ __ __ ______
412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
416 //
417 //------------------------------------------------------------------------------
418 void TargetX8632::lowerCall(const InstCall *Instr) {
419 // x86-32 calling convention:
420 //
421 // * At the point before the call, the stack must be aligned to 16
422 // bytes.
423 //
424 // * The first four arguments of vector type, regardless of their
425 // position relative to the other arguments in the argument list, are
426 // placed in registers xmm0 - xmm3.
427 //
428 // * Other arguments are pushed onto the stack in right-to-left order,
429 // such that the left-most argument ends up on the top of the stack at
430 // the lowest memory address.
431 //
432 // * Stack arguments of vector type are aligned to start at the next
433 // highest multiple of 16 bytes. Other stack arguments are aligned to
434 // 4 bytes.
435 //
436 // This intends to match the section "IA-32 Function Calling
437 // Convention" of the document "OS X ABI Function Call Guide" by
438 // Apple.
439 NeedsStackAlignment = true;
440
441 typedef std::vector<Operand *> OperandList;
442 OperandList XmmArgs;
443 OperandList StackArgs, StackArgLocations;
444 uint32_t ParameterAreaSizeBytes = 0;
445
446 // Classify each argument operand according to the location where the
447 // argument is passed.
448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
449 Operand *Arg = Instr->getArg(i);
450 Type Ty = Arg->getType();
451 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
452 assert(typeWidthInBytes(Ty) >= 4);
453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
454 XmmArgs.push_back(Arg);
455 } else {
456 StackArgs.push_back(Arg);
457 if (isVectorType(Arg->getType())) {
458 ParameterAreaSizeBytes =
459 Traits::applyStackAlignment(ParameterAreaSizeBytes);
460 }
461 Variable *esp =
462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
464 StackArgLocations.push_back(
465 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
467 }
468 }
469
470 // Adjust the parameter area so that the stack is aligned. It is
471 // assumed that the stack is already aligned at the start of the
472 // calling sequence.
473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
474
475 // Subtract the appropriate amount for the argument area. This also
476 // takes care of setting the stack adjustment during emission.
477 //
478 // TODO: If for some reason the call instruction gets dead-code
479 // eliminated after lowering, we would need to ensure that the
480 // pre-call and the post-call esp adjustment get eliminated as well.
481 if (ParameterAreaSizeBytes) {
482 _adjust_stack(ParameterAreaSizeBytes);
483 }
484
485 // Copy arguments that are passed on the stack to the appropriate
486 // stack locations.
487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
489 }
490
491 // Copy arguments to be passed in registers to the appropriate
492 // registers.
493 // TODO: Investigate the impact of lowering arguments passed in
494 // registers after lowering stack arguments as opposed to the other
495 // way around. Lowering register arguments after stack arguments may
496 // reduce register pressure. On the other hand, lowering register
497 // arguments first (before stack arguments) may result in more compact
498 // code, as the memory operand displacements may end up being smaller
499 // before any stack adjustment is done.
500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
501 Variable *Reg =
502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
503 // Generate a FakeUse of register arguments so that they do not get
504 // dead code eliminated as a result of the FakeKill of scratch
505 // registers after the call.
506 Context.insert(InstFakeUse::create(Func, Reg));
507 }
508 // Generate the call instruction. Assign its result to a temporary
509 // with high register allocation weight.
510 Variable *Dest = Instr->getDest();
511 // ReturnReg doubles as ReturnRegLo as necessary.
512 Variable *ReturnReg = nullptr;
513 Variable *ReturnRegHi = nullptr;
514 if (Dest) {
515 switch (Dest->getType()) {
516 case IceType_NUM:
517 llvm_unreachable("Invalid Call dest type");
518 break;
519 case IceType_void:
520 break;
521 case IceType_i1:
522 case IceType_i8:
523 case IceType_i16:
524 case IceType_i32:
525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
526 break;
527 case IceType_i64:
528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
530 break;
531 case IceType_f32:
532 case IceType_f64:
533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
534 // the fstp instruction.
535 break;
536 case IceType_v4i1:
537 case IceType_v8i1:
538 case IceType_v16i1:
539 case IceType_v16i8:
540 case IceType_v8i16:
541 case IceType_v4i32:
542 case IceType_v4f32:
543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
544 break;
545 }
546 }
547 Operand *CallTarget = legalize(Instr->getCallTarget());
548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
549 if (NeedSandboxing) {
550 if (llvm::isa<Constant>(CallTarget)) {
551 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
552 } else {
553 Variable *CallTargetVar = nullptr;
554 _mov(CallTargetVar, CallTarget);
555 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
556 const SizeT BundleSize =
557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
559 CallTarget = CallTargetVar;
560 }
561 }
562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
563 Context.insert(NewCall);
564 if (NeedSandboxing)
565 _bundle_unlock();
566 if (ReturnRegHi)
567 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
568
569 // Add the appropriate offset to esp. The call instruction takes care
570 // of resetting the stack offset during emission.
571 if (ParameterAreaSizeBytes) {
572 Variable *esp =
573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
575 }
576
577 // Insert a register-kill pseudo instruction.
578 Context.insert(InstFakeKill::create(Func, NewCall));
579
580 // Generate a FakeUse to keep the call live if necessary.
581 if (Instr->hasSideEffects() && ReturnReg) {
582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
583 Context.insert(FakeUse);
584 }
585
586 if (!Dest)
587 return;
588
589 // Assign the result of the call to Dest.
590 if (ReturnReg) {
591 if (ReturnRegHi) {
592 assert(Dest->getType() == IceType_i64);
593 split64(Dest);
594 Variable *DestLo = Dest->getLo();
595 Variable *DestHi = Dest->getHi();
596 _mov(DestLo, ReturnReg);
597 _mov(DestHi, ReturnRegHi);
598 } else {
599 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
600 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
601 isVectorType(Dest->getType()));
602 if (isVectorType(Dest->getType())) {
603 _movp(Dest, ReturnReg);
604 } else {
605 _mov(Dest, ReturnReg);
606 }
607 }
608 } else if (isScalarFloatingType(Dest->getType())) {
609 // Special treatment for an FP function which returns its result in
610 // st(0).
611 // If Dest ends up being a physical xmm register, the fstp emit code
612 // will route st(0) through a temporary stack slot.
613 _fstp(Dest);
614 // Create a fake use of Dest in case it actually isn't used,
615 // because st(0) still needs to be popped.
616 Context.insert(InstFakeUse::create(Func, Dest));
617 }
618 }
619
620 } // end of namespace Ice 950 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698