Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(284)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Moves methods in TargetLoweringX8632.cpp so they match TargetLoweringX8664.cpp Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
82 }; 82 };
83 83
84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize = 84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =
85 llvm::array_lengthof(TableTypeX8632Attributes); 85 llvm::array_lengthof(TableTypeX8632Attributes);
86 86
87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16; 87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
88 const char *MachineTraits<TargetX8632>::TargetName = "X8632"; 88 const char *MachineTraits<TargetX8632>::TargetName = "X8632";
89 89
90 } // end of namespace X86Internal 90 } // end of namespace X86Internal
91 91
92 //------------------------------------------------------------------------------
93 // __ ______ __ __ ______ ______ __ __ __ ______
94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
98 //
99 //------------------------------------------------------------------------------
100 void TargetX8632::lowerCall(const InstCall *Instr) {
101 // x86-32 calling convention:
102 //
103 // * At the point before the call, the stack must be aligned to 16
104 // bytes.
105 //
106 // * The first four arguments of vector type, regardless of their
107 // position relative to the other arguments in the argument list, are
108 // placed in registers xmm0 - xmm3.
109 //
110 // * Other arguments are pushed onto the stack in right-to-left order,
111 // such that the left-most argument ends up on the top of the stack at
112 // the lowest memory address.
113 //
114 // * Stack arguments of vector type are aligned to start at the next
115 // highest multiple of 16 bytes. Other stack arguments are aligned to
116 // 4 bytes.
117 //
118 // This intends to match the section "IA-32 Function Calling
119 // Convention" of the document "OS X ABI Function Call Guide" by
120 // Apple.
121 NeedsStackAlignment = true;
122
123 typedef std::vector<Operand *> OperandList;
124 OperandList XmmArgs;
125 OperandList StackArgs, StackArgLocations;
126 uint32_t ParameterAreaSizeBytes = 0;
127
128 // Classify each argument operand according to the location where the
129 // argument is passed.
130 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
131 Operand *Arg = Instr->getArg(i);
132 Type Ty = Arg->getType();
133 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
134 assert(typeWidthInBytes(Ty) >= 4);
135 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
136 XmmArgs.push_back(Arg);
137 } else {
138 StackArgs.push_back(Arg);
139 if (isVectorType(Arg->getType())) {
140 ParameterAreaSizeBytes =
141 Traits::applyStackAlignment(ParameterAreaSizeBytes);
142 }
143 Variable *esp =
144 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
145 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
146 StackArgLocations.push_back(
147 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
148 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
149 }
150 }
151
152 // Adjust the parameter area so that the stack is aligned. It is
153 // assumed that the stack is already aligned at the start of the
154 // calling sequence.
155 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
156
157 // Subtract the appropriate amount for the argument area. This also
158 // takes care of setting the stack adjustment during emission.
159 //
160 // TODO: If for some reason the call instruction gets dead-code
161 // eliminated after lowering, we would need to ensure that the
162 // pre-call and the post-call esp adjustment get eliminated as well.
163 if (ParameterAreaSizeBytes) {
164 _adjust_stack(ParameterAreaSizeBytes);
165 }
166
167 // Copy arguments that are passed on the stack to the appropriate
168 // stack locations.
169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
171 }
172
173 // Copy arguments to be passed in registers to the appropriate
174 // registers.
175 // TODO: Investigate the impact of lowering arguments passed in
176 // registers after lowering stack arguments as opposed to the other
177 // way around. Lowering register arguments after stack arguments may
178 // reduce register pressure. On the other hand, lowering register
179 // arguments first (before stack arguments) may result in more compact
180 // code, as the memory operand displacements may end up being smaller
181 // before any stack adjustment is done.
182 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
183 Variable *Reg =
184 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
185 // Generate a FakeUse of register arguments so that they do not get
186 // dead code eliminated as a result of the FakeKill of scratch
187 // registers after the call.
188 Context.insert(InstFakeUse::create(Func, Reg));
189 }
190 // Generate the call instruction. Assign its result to a temporary
191 // with high register allocation weight.
192 Variable *Dest = Instr->getDest();
193 // ReturnReg doubles as ReturnRegLo as necessary.
194 Variable *ReturnReg = nullptr;
195 Variable *ReturnRegHi = nullptr;
196 if (Dest) {
197 switch (Dest->getType()) {
198 case IceType_NUM:
199 llvm_unreachable("Invalid Call dest type");
200 break;
201 case IceType_void:
Jim Stichnoth 2015/08/06 13:49:19 Wasn't there discussion on another CL that IceType
John 2015/08/06 14:44:08 There was a comment about it, but I did not unders
202 break;
203 case IceType_i1:
204 case IceType_i8:
205 case IceType_i16:
206 case IceType_i32:
207 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
208 break;
209 case IceType_i64:
210 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
211 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
212 break;
213 case IceType_f32:
214 case IceType_f64:
215 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
216 // the fstp instruction.
217 break;
218 case IceType_v4i1:
219 case IceType_v8i1:
220 case IceType_v16i1:
221 case IceType_v16i8:
222 case IceType_v8i16:
223 case IceType_v4i32:
224 case IceType_v4f32:
225 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
226 break;
227 }
228 }
229 Operand *CallTarget = legalize(Instr->getCallTarget());
230 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
231 if (NeedSandboxing) {
232 if (llvm::isa<Constant>(CallTarget)) {
233 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
234 } else {
235 Variable *CallTargetVar = nullptr;
236 _mov(CallTargetVar, CallTarget);
237 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
238 const SizeT BundleSize =
239 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
240 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
241 CallTarget = CallTargetVar;
242 }
243 }
244 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
245 Context.insert(NewCall);
246 if (NeedSandboxing)
247 _bundle_unlock();
248 if (ReturnRegHi)
249 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
250
251 // Add the appropriate offset to esp. The call instruction takes care
252 // of resetting the stack offset during emission.
253 if (ParameterAreaSizeBytes) {
254 Variable *esp =
255 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
256 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
257 }
258
259 // Insert a register-kill pseudo instruction.
260 Context.insert(InstFakeKill::create(Func, NewCall));
261
262 // Generate a FakeUse to keep the call live if necessary.
263 if (Instr->hasSideEffects() && ReturnReg) {
264 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
265 Context.insert(FakeUse);
266 }
267
268 if (!Dest)
269 return;
270
271 // Assign the result of the call to Dest.
272 if (ReturnReg) {
273 if (ReturnRegHi) {
274 assert(Dest->getType() == IceType_i64);
275 split64(Dest);
276 Variable *DestLo = Dest->getLo();
277 Variable *DestHi = Dest->getHi();
278 _mov(DestLo, ReturnReg);
279 _mov(DestHi, ReturnRegHi);
280 } else {
281 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
282 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
283 isVectorType(Dest->getType()));
284 if (isVectorType(Dest->getType())) {
285 _movp(Dest, ReturnReg);
286 } else {
287 _mov(Dest, ReturnReg);
288 }
289 }
290 } else if (isScalarFloatingType(Dest->getType())) {
291 // Special treatment for an FP function which returns its result in
292 // st(0).
293 // If Dest ends up being a physical xmm register, the fstp emit code
294 // will route st(0) through a temporary stack slot.
295 _fstp(Dest);
296 // Create a fake use of Dest in case it actually isn't used,
297 // because st(0) still needs to be popped.
298 Context.insert(InstFakeUse::create(Func, Dest));
299 }
300 }
301
302 void TargetX8632::lowerArguments() {
303 VarList &Args = Func->getArgs();
304 // The first four arguments of vector type, regardless of their
305 // position relative to the other arguments in the argument list, are
306 // passed in registers xmm0 - xmm3.
307 unsigned NumXmmArgs = 0;
308
309 Context.init(Func->getEntryNode());
310 Context.setInsertPoint(Context.getCur());
311
312 for (SizeT I = 0, E = Args.size();
313 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {
314 Variable *Arg = Args[I];
315 Type Ty = Arg->getType();
316 if (!isVectorType(Ty))
317 continue;
318 // Replace Arg in the argument list with the home register. Then
319 // generate an instruction in the prolog to copy the home register
320 // to the assigned location of Arg.
321 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;
322 ++NumXmmArgs;
323 Variable *RegisterArg = Func->makeVariable(Ty);
324 if (BuildDefs::dump())
325 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
326 RegisterArg->setRegNum(RegNum);
327 RegisterArg->setIsArg();
328 Arg->setIsArg(false);
329
330 Args[I] = RegisterArg;
331 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
332 }
333 }
334
335 void TargetX8632::lowerRet(const InstRet *Inst) {
336 Variable *Reg = nullptr;
337 if (Inst->hasRetValue()) {
338 Operand *Src0 = legalize(Inst->getRetValue());
339 // TODO(jpp): this is not needed.
340 if (Src0->getType() == IceType_i64) {
341 Variable *eax =
342 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);
343 Variable *edx =
344 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);
345 Reg = eax;
346 Context.insert(InstFakeUse::create(Func, edx));
347 } else if (isScalarFloatingType(Src0->getType())) {
348 _fld(Src0);
349 } else if (isVectorType(Src0->getType())) {
350 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);
351 } else {
352 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);
353 }
354 }
355 // Add a ret instruction even if sandboxing is enabled, because
356 // addEpilog explicitly looks for a ret instruction as a marker for
357 // where to insert the frame removal instructions.
358 _ret(Reg);
359 // Add a fake use of esp to make sure esp stays alive for the entire
360 // function. Otherwise post-call esp adjustments get dead-code
361 // eliminated. TODO: Are there more places where the fake use
362 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
363 // have a ret instruction.
364 Variable *esp =
365 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
366 Context.insert(InstFakeUse::create(Func, esp));
367 }
368
369 void TargetX8632::addProlog(CfgNode *Node) {
370 // Stack frame layout:
371 //
372 // +------------------------+
373 // | 1. return address |
374 // +------------------------+
375 // | 2. preserved registers |
376 // +------------------------+
377 // | 3. padding |
378 // +------------------------+
379 // | 4. global spill area |
380 // +------------------------+
381 // | 5. padding |
382 // +------------------------+
383 // | 6. local spill area |
384 // +------------------------+
385 // | 7. padding |
386 // +------------------------+
387 // | 8. allocas |
388 // +------------------------+
389 //
390 // The following variables record the size in bytes of the given areas:
391 // * X86_RET_IP_SIZE_BYTES: area 1
392 // * PreservedRegsSizeBytes: area 2
393 // * SpillAreaPaddingBytes: area 3
394 // * GlobalsSize: area 4
395 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
396 // * LocalsSpillAreaSize: area 6
397 // * SpillAreaSizeBytes: areas 3 - 7
398
399 // Determine stack frame offsets for each Variable without a
400 // register assignment. This can be done as one variable per stack
401 // slot. Or, do coalescing by running the register allocator again
402 // with an infinite set of registers (as a side effect, this gives
403 // variables a second chance at physical register assignment).
404 //
405 // A middle ground approach is to leverage sparsity and allocate one
406 // block of space on the frame for globals (variables with
407 // multi-block lifetime), and one block to share for locals
408 // (single-block lifetime).
409
410 Context.init(Node);
411 Context.setInsertPoint(Context.getCur());
412
413 llvm::SmallBitVector CalleeSaves =
414 getRegisterSet(RegSet_CalleeSave, RegSet_None);
415 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
416 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
417 size_t GlobalsSize = 0;
418 // If there is a separate locals area, this represents that area.
419 // Otherwise it counts any variable not counted by GlobalsSize.
420 SpillAreaSizeBytes = 0;
421 // If there is a separate locals area, this specifies the alignment
422 // for it.
423 uint32_t LocalsSlotsAlignmentBytes = 0;
424 // The entire spill locations area gets aligned to largest natural
425 // alignment of the variables that have a spill slot.
426 uint32_t SpillAreaAlignmentBytes = 0;
427 // A spill slot linked to a variable with a stack slot should reuse
428 // that stack slot.
429 std::function<bool(Variable *)> TargetVarHook =
430 [&VariablesLinkedToSpillSlots](Variable *Var) {
431 if (auto *SpillVar =
432 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
433 assert(Var->getWeight().isZero());
434 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
435 VariablesLinkedToSpillSlots.push_back(Var);
436 return true;
437 }
438 }
439 return false;
440 };
441
442 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
443 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
444 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
445 &LocalsSlotsAlignmentBytes, TargetVarHook);
446 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
447 SpillAreaSizeBytes += GlobalsSize;
448
449 // Add push instructions for preserved registers.
450 uint32_t NumCallee = 0;
451 size_t PreservedRegsSizeBytes = 0;
452 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
453 if (CalleeSaves[i] && RegsUsed[i]) {
454 ++NumCallee;
455 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
456 _push(getPhysicalRegister(i));
457 }
458 }
459 Ctx->statsUpdateRegistersSaved(NumCallee);
460
461 // Generate "push ebp; mov ebp, esp"
462 if (IsEbpBasedFrame) {
463 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
464 .count() == 0);
465 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);
466 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
467 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
468 _push(ebp);
469 _mov(ebp, esp);
470 // Keep ebp live for late-stage liveness analysis
471 // (e.g. asm-verbose mode).
472 Context.insert(InstFakeUse::create(Func, ebp));
473 }
474
475 // Align the variables area. SpillAreaPaddingBytes is the size of
476 // the region after the preserved registers and before the spill areas.
477 // LocalsSlotsPaddingBytes is the amount of padding between the globals
478 // and locals area if they are separate.
479 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);
480 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
481 uint32_t SpillAreaPaddingBytes = 0;
482 uint32_t LocalsSlotsPaddingBytes = 0;
483 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
484 SpillAreaAlignmentBytes, GlobalsSize,
485 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
486 &LocalsSlotsPaddingBytes);
487 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
488 uint32_t GlobalsAndSubsequentPaddingSize =
489 GlobalsSize + LocalsSlotsPaddingBytes;
490
491 // Align esp if necessary.
492 if (NeedsStackAlignment) {
493 uint32_t StackOffset =
494 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
495 uint32_t StackSize =
496 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
497 SpillAreaSizeBytes = StackSize - StackOffset;
498 }
499
500 // Generate "sub esp, SpillAreaSizeBytes"
501 if (SpillAreaSizeBytes)
502 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
503 Ctx->getConstantInt32(SpillAreaSizeBytes));
504 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
505
506 resetStackAdjustment();
507
508 // Fill in stack offsets for stack args, and copy args into registers
509 // for those that were register-allocated. Args are pushed right to
510 // left, so Arg[0] is closest to the stack/frame pointer.
511 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
512 size_t BasicFrameOffset =
513 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
514 if (!IsEbpBasedFrame)
515 BasicFrameOffset += SpillAreaSizeBytes;
516
517 const VarList &Args = Func->getArgs();
518 size_t InArgsSizeBytes = 0;
519 unsigned NumXmmArgs = 0;
520 for (Variable *Arg : Args) {
521 // Skip arguments passed in registers.
522 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {
523 ++NumXmmArgs;
524 continue;
525 }
526 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
527 }
528
529 // Fill in stack offsets for locals.
530 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
531 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
532 IsEbpBasedFrame);
533 // Assign stack offsets to variables that have been linked to spilled
534 // variables.
535 for (Variable *Var : VariablesLinkedToSpillSlots) {
536 Variable *Linked =
537 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
538 Var->setStackOffset(Linked->getStackOffset());
539 }
540 this->HasComputedFrame = true;
541
542 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
543 OstreamLocker L(Func->getContext());
544 Ostream &Str = Func->getContext()->getStrDump();
545
546 Str << "Stack layout:\n";
547 uint32_t EspAdjustmentPaddingSize =
548 SpillAreaSizeBytes - LocalsSpillAreaSize -
549 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
550 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
551 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
552 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
553 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
554 << " globals spill area = " << GlobalsSize << " bytes\n"
555 << " globals-locals spill areas intermediate padding = "
556 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
557 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
558 << " esp alignment padding = " << EspAdjustmentPaddingSize
559 << " bytes\n";
560
561 Str << "Stack details:\n"
562 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
563 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
564 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
565 << " bytes\n"
566 << " is ebp based = " << IsEbpBasedFrame << "\n";
567 }
568 }
569
570 void TargetX8632::addEpilog(CfgNode *Node) {
571 InstList &Insts = Node->getInsts();
572 InstList::reverse_iterator RI, E;
573 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
574 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
575 break;
576 }
577 if (RI == E)
578 return;
579
580 // Convert the reverse_iterator position into its corresponding
581 // (forward) iterator position.
582 InstList::iterator InsertPoint = RI.base();
583 --InsertPoint;
584 Context.init(Node);
585 Context.setInsertPoint(InsertPoint);
586
587 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
588 if (IsEbpBasedFrame) {
589 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);
590 // For late-stage liveness analysis (e.g. asm-verbose mode),
591 // adding a fake use of esp before the assignment of esp=ebp keeps
592 // previous esp adjustments from being dead-code eliminated.
593 Context.insert(InstFakeUse::create(Func, esp));
594 _mov(esp, ebp);
595 _pop(ebp);
596 } else {
597 // add esp, SpillAreaSizeBytes
598 if (SpillAreaSizeBytes)
599 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
600 }
601
602 // Add pop instructions for preserved registers.
603 llvm::SmallBitVector CalleeSaves =
604 getRegisterSet(RegSet_CalleeSave, RegSet_None);
605 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
606 SizeT j = CalleeSaves.size() - i - 1;
607 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)
608 continue;
609 if (CalleeSaves[j] && RegsUsed[j]) {
610 _pop(getPhysicalRegister(j));
611 }
612 }
613
614 if (!Ctx->getFlags().getUseSandboxing())
615 return;
616 // Change the original ret instruction into a sandboxed return sequence.
617 // t:ecx = pop
618 // bundle_lock
619 // and t, ~31
620 // jmp *t
621 // bundle_unlock
622 // FakeUse <original_ret_operand>
623 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
624 _pop(T_ecx);
625 lowerIndirectJump(T_ecx);
626 if (RI->getSrcSize()) {
627 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
628 Context.insert(InstFakeUse::create(Func, RetValue));
629 }
630 RI->setDeleted();
631 }
632
633 void TargetX8632::emitJumpTable(const Cfg *Func,
634 const InstJumpTable *JumpTable) const {
635 if (!BuildDefs::dump())
636 return;
637 Ostream &Str = Ctx->getStrEmit();
638 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
639 Str << "\t.section\t.rodata." << MangledName
640 << "$jumptable,\"a\",@progbits\n";
641 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
642 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
643
644 // On X8632 pointers are 32-bit hence the use of .long
645 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
646 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
647 Str << "\n";
648 }
649
92 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx) 650 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
93 : TargetDataLowering(Ctx) {} 651 : TargetDataLowering(Ctx) {}
94 652
95 namespace { 653 namespace {
96 template <typename T> struct PoolTypeConverter {}; 654 template <typename T> struct PoolTypeConverter {};
97 655
98 template <> struct PoolTypeConverter<float> { 656 template <> struct PoolTypeConverter<float> {
99 typedef uint32_t PrimitiveIntType; 657 typedef uint32_t PrimitiveIntType;
100 typedef ConstantFloat IceType; 658 typedef ConstantFloat IceType;
101 static const Type Ty = IceType_f32; 659 static const Type Ty = IceType_f32;
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 static const Type Ty = IceType_i8; 710 static const Type Ty = IceType_i8;
153 static const char *TypeName; 711 static const char *TypeName;
154 static const char *AsmTag; 712 static const char *AsmTag;
155 static const char *PrintfString; 713 static const char *PrintfString;
156 }; 714 };
157 const char *PoolTypeConverter<uint8_t>::TypeName = "i8"; 715 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
158 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte"; 716 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
159 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x"; 717 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
160 } // end of anonymous namespace 718 } // end of anonymous namespace
161 719
162 void TargetX8632::emitJumpTable(const Cfg *Func,
163 const InstJumpTable *JumpTable) const {
164 if (!BuildDefs::dump())
165 return;
166 Ostream &Str = Ctx->getStrEmit();
167 IceString MangledName = Ctx->mangleName(Func->getFunctionName());
168 Str << "\t.section\t.rodata." << MangledName
169 << "$jumptable,\"a\",@progbits\n";
170 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";
171 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";
172
173 // On X8632 pointers are 32-bit hence the use of .long
174 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)
175 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();
176 Str << "\n";
177 }
178
179 template <typename T> 720 template <typename T>
180 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { 721 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
181 if (!BuildDefs::dump()) 722 if (!BuildDefs::dump())
182 return; 723 return;
183 Ostream &Str = Ctx->getStrEmit(); 724 Ostream &Str = Ctx->getStrEmit();
184 Type Ty = T::Ty; 725 Type Ty = T::Ty;
185 SizeT Align = typeAlignInBytes(Ty); 726 SizeT Align = typeAlignInBytes(Ty);
186 ConstantList Pool = Ctx->getConstantPool(Ty); 727 ConstantList Pool = Ctx->getConstantPool(Ty);
187 728
188 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align 729 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
400 // Repeat the static asserts with respect to the high-level table 941 // Repeat the static asserts with respect to the high-level table
401 // entries in case the high-level table has extra entries. 942 // entries in case the high-level table has extra entries.
402 #define X(tag, sizeLog2, align, elts, elty, str) \ 943 #define X(tag, sizeLog2, align, elts, elty, str) \
403 static_assert(_table1_##tag == _table2_##tag, \ 944 static_assert(_table1_##tag == _table2_##tag, \
404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); 945 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
405 ICETYPE_TABLE 946 ICETYPE_TABLE
406 #undef X 947 #undef X
407 } // end of namespace dummy3 948 } // end of namespace dummy3
408 } // end of anonymous namespace 949 } // end of anonymous namespace
409 950
410 //------------------------------------------------------------------------------
411 // __ ______ __ __ ______ ______ __ __ __ ______
412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\
413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \
414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\
415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/
416 //
417 //------------------------------------------------------------------------------
418 void TargetX8632::lowerCall(const InstCall *Instr) {
419 // x86-32 calling convention:
420 //
421 // * At the point before the call, the stack must be aligned to 16
422 // bytes.
423 //
424 // * The first four arguments of vector type, regardless of their
425 // position relative to the other arguments in the argument list, are
426 // placed in registers xmm0 - xmm3.
427 //
428 // * Other arguments are pushed onto the stack in right-to-left order,
429 // such that the left-most argument ends up on the top of the stack at
430 // the lowest memory address.
431 //
432 // * Stack arguments of vector type are aligned to start at the next
433 // highest multiple of 16 bytes. Other stack arguments are aligned to
434 // 4 bytes.
435 //
436 // This intends to match the section "IA-32 Function Calling
437 // Convention" of the document "OS X ABI Function Call Guide" by
438 // Apple.
439 NeedsStackAlignment = true;
440
441 typedef std::vector<Operand *> OperandList;
442 OperandList XmmArgs;
443 OperandList StackArgs, StackArgLocations;
444 uint32_t ParameterAreaSizeBytes = 0;
445
446 // Classify each argument operand according to the location where the
447 // argument is passed.
448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
449 Operand *Arg = Instr->getArg(i);
450 Type Ty = Arg->getType();
451 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
452 assert(typeWidthInBytes(Ty) >= 4);
453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
454 XmmArgs.push_back(Arg);
455 } else {
456 StackArgs.push_back(Arg);
457 if (isVectorType(Arg->getType())) {
458 ParameterAreaSizeBytes =
459 Traits::applyStackAlignment(ParameterAreaSizeBytes);
460 }
461 Variable *esp =
462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
464 StackArgLocations.push_back(
465 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
467 }
468 }
469
470 // Adjust the parameter area so that the stack is aligned. It is
471 // assumed that the stack is already aligned at the start of the
472 // calling sequence.
473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
474
475 // Subtract the appropriate amount for the argument area. This also
476 // takes care of setting the stack adjustment during emission.
477 //
478 // TODO: If for some reason the call instruction gets dead-code
479 // eliminated after lowering, we would need to ensure that the
480 // pre-call and the post-call esp adjustment get eliminated as well.
481 if (ParameterAreaSizeBytes) {
482 _adjust_stack(ParameterAreaSizeBytes);
483 }
484
485 // Copy arguments that are passed on the stack to the appropriate
486 // stack locations.
487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
489 }
490
491 // Copy arguments to be passed in registers to the appropriate
492 // registers.
493 // TODO: Investigate the impact of lowering arguments passed in
494 // registers after lowering stack arguments as opposed to the other
495 // way around. Lowering register arguments after stack arguments may
496 // reduce register pressure. On the other hand, lowering register
497 // arguments first (before stack arguments) may result in more compact
498 // code, as the memory operand displacements may end up being smaller
499 // before any stack adjustment is done.
500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
501 Variable *Reg =
502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);
503 // Generate a FakeUse of register arguments so that they do not get
504 // dead code eliminated as a result of the FakeKill of scratch
505 // registers after the call.
506 Context.insert(InstFakeUse::create(Func, Reg));
507 }
508 // Generate the call instruction. Assign its result to a temporary
509 // with high register allocation weight.
510 Variable *Dest = Instr->getDest();
511 // ReturnReg doubles as ReturnRegLo as necessary.
512 Variable *ReturnReg = nullptr;
513 Variable *ReturnRegHi = nullptr;
514 if (Dest) {
515 switch (Dest->getType()) {
516 case IceType_NUM:
517 llvm_unreachable("Invalid Call dest type");
518 break;
519 case IceType_void:
520 break;
521 case IceType_i1:
522 case IceType_i8:
523 case IceType_i16:
524 case IceType_i32:
525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);
526 break;
527 case IceType_i64:
528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
530 break;
531 case IceType_f32:
532 case IceType_f64:
533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
534 // the fstp instruction.
535 break;
536 case IceType_v4i1:
537 case IceType_v8i1:
538 case IceType_v16i1:
539 case IceType_v16i8:
540 case IceType_v8i16:
541 case IceType_v4i32:
542 case IceType_v4f32:
543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);
544 break;
545 }
546 }
547 Operand *CallTarget = legalize(Instr->getCallTarget());
548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
549 if (NeedSandboxing) {
550 if (llvm::isa<Constant>(CallTarget)) {
551 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
552 } else {
553 Variable *CallTargetVar = nullptr;
554 _mov(CallTargetVar, CallTarget);
555 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
556 const SizeT BundleSize =
557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
559 CallTarget = CallTargetVar;
560 }
561 }
562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
563 Context.insert(NewCall);
564 if (NeedSandboxing)
565 _bundle_unlock();
566 if (ReturnRegHi)
567 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
568
569 // Add the appropriate offset to esp. The call instruction takes care
570 // of resetting the stack offset during emission.
571 if (ParameterAreaSizeBytes) {
572 Variable *esp =
573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
575 }
576
577 // Insert a register-kill pseudo instruction.
578 Context.insert(InstFakeKill::create(Func, NewCall));
579
580 // Generate a FakeUse to keep the call live if necessary.
581 if (Instr->hasSideEffects() && ReturnReg) {
582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
583 Context.insert(FakeUse);
584 }
585
586 if (!Dest)
587 return;
588
589 // Assign the result of the call to Dest.
590 if (ReturnReg) {
591 if (ReturnRegHi) {
592 assert(Dest->getType() == IceType_i64);
593 split64(Dest);
594 Variable *DestLo = Dest->getLo();
595 Variable *DestHi = Dest->getHi();
596 _mov(DestLo, ReturnReg);
597 _mov(DestHi, ReturnRegHi);
598 } else {
599 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
600 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
601 isVectorType(Dest->getType()));
602 if (isVectorType(Dest->getType())) {
603 _movp(Dest, ReturnReg);
604 } else {
605 _mov(Dest, ReturnReg);
606 }
607 }
608 } else if (isScalarFloatingType(Dest->getType())) {
609 // Special treatment for an FP function which returns its result in
610 // st(0).
611 // If Dest ends up being a physical xmm register, the fstp emit code
612 // will route st(0) through a temporary stack slot.
613 _fstp(Dest);
614 // Create a fake use of Dest in case it actually isn't used,
615 // because st(0) still needs to be popped.
616 Context.insert(InstFakeUse::create(Func, Dest));
617 }
618 }
619
620 } // end of namespace Ice 951 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698