Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(170)

Side by Side Diff: src/IceTargetLoweringX8664.cpp

Issue 1472623002: Unify alloca, outgoing arg, and prolog construction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixed missing out args in subtraction. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
163 // This intends to match the section "Function Calling Sequence" of the 163 // This intends to match the section "Function Calling Sequence" of the
164 // document "System V Application Binary Interface." 164 // document "System V Application Binary Interface."
165 NeedsStackAlignment = true; 165 NeedsStackAlignment = true;
166 166
167 using OperandList = 167 using OperandList =
168 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, 168 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS,
169 Traits::X86_MAX_GPR_ARGS)>; 169 Traits::X86_MAX_GPR_ARGS)>;
170 OperandList XmmArgs; 170 OperandList XmmArgs;
171 OperandList GprArgs; 171 OperandList GprArgs;
172 OperandList StackArgs, StackArgLocations; 172 OperandList StackArgs, StackArgLocations;
173 uint32_t ParameterAreaSizeBytes = 0; 173 int32_t ParameterAreaSizeBytes = 0;
174 174
175 // Classify each argument operand according to the location where the 175 // Classify each argument operand according to the location where the
176 // argument is passed. 176 // argument is passed.
177 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 177 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
178 Operand *Arg = Instr->getArg(i); 178 Operand *Arg = Instr->getArg(i);
179 Type Ty = Arg->getType(); 179 Type Ty = Arg->getType();
180 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 180 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
181 assert(typeWidthInBytes(Ty) >= 4); 181 assert(typeWidthInBytes(Ty) >= 4);
182 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { 182 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {
183 XmmArgs.push_back(Arg); 183 XmmArgs.push_back(Arg);
(...skipping 13 matching lines...) Expand all
197 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 197 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
198 StackArgLocations.push_back( 198 StackArgLocations.push_back(
199 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); 199 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
200 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 200 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
201 } 201 }
202 } 202 }
203 203
204 // Adjust the parameter area so that the stack is aligned. It is assumed that 204 // Adjust the parameter area so that the stack is aligned. It is assumed that
205 // the stack is already aligned at the start of the calling sequence. 205 // the stack is already aligned at the start of the calling sequence.
206 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 206 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
207 207 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
208 // Subtract the appropriate amount for the argument area. This also takes 208 maxOutArgsSizeBytes());
209 // care of setting the stack adjustment during emission.
210 //
211 // TODO: If for some reason the call instruction gets dead-code eliminated
212 // after lowering, we would need to ensure that the pre-call and the
213 // post-call esp adjustment get eliminated as well.
214 if (ParameterAreaSizeBytes) {
215 _adjust_stack(ParameterAreaSizeBytes);
216 }
217 209
218 // Copy arguments that are passed on the stack to the appropriate stack 210 // Copy arguments that are passed on the stack to the appropriate stack
219 // locations. 211 // locations.
220 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { 212 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
221 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); 213 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
222 } 214 }
223 215
224 // Copy arguments to be passed in registers to the appropriate registers. 216 // Copy arguments to be passed in registers to the appropriate registers.
225 // TODO: Investigate the impact of lowering arguments passed in registers 217 // TODO: Investigate the impact of lowering arguments passed in registers
226 // after lowering stack arguments as opposed to the other way around. 218 // after lowering stack arguments as opposed to the other way around.
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 // | 4. global spill area | 394 // | 4. global spill area |
403 // +------------------------+ 395 // +------------------------+
404 // | 5. padding | 396 // | 5. padding |
405 // +------------------------+ 397 // +------------------------+
406 // | 6. local spill area | 398 // | 6. local spill area |
407 // +------------------------+ 399 // +------------------------+
408 // | 7. padding | 400 // | 7. padding |
409 // +------------------------+ 401 // +------------------------+
410 // | 8. allocas | 402 // | 8. allocas |
411 // +------------------------+ 403 // +------------------------+
404 // | 9. padding |
405 // +------------------------+
406 // | 10. out args |
407 // +------------------------+ <--- StackPointer
412 // 408 //
413 // The following variables record the size in bytes of the given areas: 409 // The following variables record the size in bytes of the given areas:
414 // * X86_RET_IP_SIZE_BYTES: area 1 410 // * X86_RET_IP_SIZE_BYTES: area 1
415 // * PreservedRegsSizeBytes: area 2 411 // * PreservedRegsSizeBytes: area 2
416 // * SpillAreaPaddingBytes: area 3 412 // * SpillAreaPaddingBytes: area 3
417 // * GlobalsSize: area 4 413 // * GlobalsSize: area 4
418 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 414 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
419 // * LocalsSpillAreaSize: area 6 415 // * LocalsSpillAreaSize: area 6
420 // * SpillAreaSizeBytes: areas 3 - 7 416 // * SpillAreaSizeBytes: areas 3 - 10
417 // * maxOutArgsSizeBytes(): area 10
421 418
422 // Determine stack frame offsets for each Variable without a register 419 // Determine stack frame offsets for each Variable without a register
423 // assignment. This can be done as one variable per stack slot. Or, do 420 // assignment. This can be done as one variable per stack slot. Or, do
424 // coalescing by running the register allocator again with an infinite set of 421 // coalescing by running the register allocator again with an infinite set of
425 // registers (as a side effect, this gives variables a second chance at 422 // registers (as a side effect, this gives variables a second chance at
426 // physical register assignment). 423 // physical register assignment).
427 // 424 //
428 // A middle ground approach is to leverage sparsity and allocate one block of 425 // A middle ground approach is to leverage sparsity and allocate one block of
429 // space on the frame for globals (variables with multi-block lifetime), and 426 // space on the frame for globals (variables with multi-block lifetime), and
430 // one block to share for locals (single-block lifetime). 427 // one block to share for locals (single-block lifetime).
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
502 uint32_t LocalsSlotsPaddingBytes = 0; 499 uint32_t LocalsSlotsPaddingBytes = 0;
503 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes, 500 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
504 SpillAreaAlignmentBytes, GlobalsSize, 501 SpillAreaAlignmentBytes, GlobalsSize,
505 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes, 502 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
506 &LocalsSlotsPaddingBytes); 503 &LocalsSlotsPaddingBytes);
507 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 504 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
508 uint32_t GlobalsAndSubsequentPaddingSize = 505 uint32_t GlobalsAndSubsequentPaddingSize =
509 GlobalsSize + LocalsSlotsPaddingBytes; 506 GlobalsSize + LocalsSlotsPaddingBytes;
510 507
511 // Align esp if necessary. 508 // Align esp if necessary.
512 if (NeedsStackAlignment) { 509 if (!NeedsStackAlignment) {
510 SpillAreaSizeBytes += maxOutArgsSizeBytes();
511 } else {
513 uint32_t StackOffset = 512 uint32_t StackOffset =
514 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 513 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
515 uint32_t StackSize = 514 uint32_t StackSize =
516 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); 515 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
516 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
517 SpillAreaSizeBytes = StackSize - StackOffset; 517 SpillAreaSizeBytes = StackSize - StackOffset;
518 } 518 }
519 519
520 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
521 // fixed allocations in the prolog.
522 if (PrologEmitsFixedAllocas)
523 SpillAreaSizeBytes += FixedAllocaSizeBytes;
520 // Generate "sub esp, SpillAreaSizeBytes" 524 // Generate "sub esp, SpillAreaSizeBytes"
521 if (SpillAreaSizeBytes) 525 if (SpillAreaSizeBytes) {
522 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), 526 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
523 Ctx->getConstantInt32(SpillAreaSizeBytes)); 527 Ctx->getConstantInt32(SpillAreaSizeBytes));
528 // If the fixed allocas are aligned more than the stack frame, align the
529 // stack pointer accordingly.
530 if (PrologEmitsFixedAllocas &&
531 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
532 assert(IsEbpBasedFrame);
533 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
534 Ctx->getConstantInt32(-FixedAllocaAlignBytes));
535 }
536 }
524 537
525 // Account for alloca instructions with known frame offsets. 538 // Account for alloca instructions with known frame offsets.
526 SpillAreaSizeBytes += FixedAllocaSizeBytes; 539 if (!PrologEmitsFixedAllocas)
540 SpillAreaSizeBytes += FixedAllocaSizeBytes;
527 541
528 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 542 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
529 543
530 // Initialize the stack adjustment so that after all the known-frame-offset
531 // alloca instructions are emitted, the stack adjustment will reach zero.
532 resetStackAdjustment();
533 updateStackAdjustment(-FixedAllocaSizeBytes);
534
535 // Fill in stack offsets for stack args, and copy args into registers for 544 // Fill in stack offsets for stack args, and copy args into registers for
536 // those that were register-allocated. Args are pushed right to left, so 545 // those that were register-allocated. Args are pushed right to left, so
537 // Arg[0] is closest to the stack/frame pointer. 546 // Arg[0] is closest to the stack/frame pointer.
538 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 547 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
539 size_t BasicFrameOffset = 548 size_t BasicFrameOffset =
540 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; 549 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
541 if (!IsEbpBasedFrame) 550 if (!IsEbpBasedFrame)
542 BasicFrameOffset += SpillAreaSizeBytes; 551 BasicFrameOffset += SpillAreaSizeBytes;
543 552
544 const VarList &Args = Func->getArgs(); 553 const VarList &Args = Func->getArgs();
(...skipping 11 matching lines...) Expand all
556 assert(isScalarIntegerType(Arg->getType())); 565 assert(isScalarIntegerType(Arg->getType()));
557 if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) { 566 if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) {
558 ++NumGPRArgs; 567 ++NumGPRArgs;
559 continue; 568 continue;
560 } 569 }
561 } 570 }
562 // For esp-based frames, the esp value may not stabilize to its home value 571 // For esp-based frames, the esp value may not stabilize to its home value
563 // until after all the fixed-size alloca instructions have executed. In 572 // until after all the fixed-size alloca instructions have executed. In
564 // this case, a stack adjustment is needed when accessing in-args in order 573 // this case, a stack adjustment is needed when accessing in-args in order
565 // to copy them into registers. 574 // to copy them into registers.
566 size_t StackAdjBytes = IsEbpBasedFrame ? 0 : -FixedAllocaSizeBytes; 575 size_t StackAdjBytes = 0;
576 if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas)
577 StackAdjBytes -= FixedAllocaSizeBytes;
567 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes, 578 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes,
568 InArgsSizeBytes); 579 InArgsSizeBytes);
569 } 580 }
570 581
571 // Fill in stack offsets for locals. 582 // Fill in stack offsets for locals.
572 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 583 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
573 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, 584 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
574 IsEbpBasedFrame); 585 IsEbpBasedFrame);
575 // Assign stack offsets to variables that have been linked to spilled 586 // Assign stack offsets to variables that have been linked to spilled
576 // variables. 587 // variables.
577 for (Variable *Var : VariablesLinkedToSpillSlots) { 588 for (Variable *Var : VariablesLinkedToSpillSlots) {
578 Variable *Linked = 589 Variable *Linked =
579 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); 590 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
580 Var->setStackOffset(Linked->getStackOffset()); 591 Var->setStackOffset(Linked->getStackOffset());
581 } 592 }
582 this->HasComputedFrame = true; 593 this->HasComputedFrame = true;
583 594
584 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 595 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
585 OstreamLocker L(Func->getContext()); 596 OstreamLocker L(Func->getContext());
586 Ostream &Str = Func->getContext()->getStrDump(); 597 Ostream &Str = Func->getContext()->getStrDump();
587 598
588 Str << "Stack layout:\n"; 599 Str << "Stack layout:\n";
589 uint32_t EspAdjustmentPaddingSize = 600 uint32_t EspAdjustmentPaddingSize =
590 SpillAreaSizeBytes - LocalsSpillAreaSize - 601 SpillAreaSizeBytes - LocalsSpillAreaSize -
591 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 602 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
603 maxOutArgsSizeBytes();
592 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 604 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
593 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" 605 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
594 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 606 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
595 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 607 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
596 << " globals spill area = " << GlobalsSize << " bytes\n" 608 << " globals spill area = " << GlobalsSize << " bytes\n"
597 << " globals-locals spill areas intermediate padding = " 609 << " globals-locals spill areas intermediate padding = "
598 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 610 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
599 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 611 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
600 << " esp alignment padding = " << EspAdjustmentPaddingSize 612 << " esp alignment padding = " << EspAdjustmentPaddingSize
601 << " bytes\n"; 613 << " bytes\n";
602 614
603 Str << "Stack details:\n" 615 Str << "Stack details:\n"
604 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" 616 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
605 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 617 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
618 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
606 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 619 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
607 << " bytes\n" 620 << " bytes\n"
608 << " is ebp based = " << IsEbpBasedFrame << "\n"; 621 << " is ebp based = " << IsEbpBasedFrame << "\n";
609 } 622 }
610 } 623 }
611 624
612 void TargetX8664::addEpilog(CfgNode *Node) { 625 void TargetX8664::addEpilog(CfgNode *Node) {
613 InstList &Insts = Node->getInsts(); 626 InstList &Insts = Node->getInsts();
614 InstList::reverse_iterator RI, E; 627 InstList::reverse_iterator RI, E;
615 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 628 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
(...skipping 350 matching lines...) Expand 10 before | Expand all | Expand 10 after
966 // case the high-level table has extra entries. 979 // case the high-level table has extra entries.
967 #define X(tag, sizeLog2, align, elts, elty, str) \ 980 #define X(tag, sizeLog2, align, elts, elty, str) \
968 static_assert(_table1_##tag == _table2_##tag, \ 981 static_assert(_table1_##tag == _table2_##tag, \
969 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); 982 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE");
970 ICETYPE_TABLE 983 ICETYPE_TABLE
971 #undef X 984 #undef X
972 } // end of namespace dummy3 985 } // end of namespace dummy3
973 } // end of anonymous namespace 986 } // end of anonymous namespace
974 987
975 } // end of namespace Ice 988 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698