Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(255)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1472623002: Unify alloca, outgoing arg, and prolog construction (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixed missing out args in subtraction. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after
162 Variable *Dest = Instr->getDest(); 162 Variable *Dest = Instr->getDest();
163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { 163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) {
164 ParameterAreaSizeBytes = 164 ParameterAreaSizeBytes =
165 std::max(static_cast<size_t>(ParameterAreaSizeBytes), 165 std::max(static_cast<size_t>(ParameterAreaSizeBytes),
166 typeWidthInBytesOnStack(Dest->getType())); 166 typeWidthInBytesOnStack(Dest->getType()));
167 } 167 }
168 168
169 // Adjust the parameter area so that the stack is aligned. It is assumed that 169 // Adjust the parameter area so that the stack is aligned. It is assumed that
170 // the stack is already aligned at the start of the calling sequence. 170 // the stack is already aligned at the start of the calling sequence.
171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
172 172 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <=
173 // Subtract the appropriate amount for the argument area. This also takes 173 maxOutArgsSizeBytes());
174 // care of setting the stack adjustment during emission.
175 //
176 // TODO: If for some reason the call instruction gets dead-code eliminated
177 // after lowering, we would need to ensure that the pre-call and the
178 // post-call esp adjustment get eliminated as well.
179 if (ParameterAreaSizeBytes) {
180 _adjust_stack(ParameterAreaSizeBytes);
181 }
182 174
183 // Copy arguments that are passed on the stack to the appropriate stack 175 // Copy arguments that are passed on the stack to the appropriate stack
184 // locations. 176 // locations.
185 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { 177 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
186 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); 178 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
187 } 179 }
188 180
189 // Copy arguments to be passed in registers to the appropriate registers. 181 // Copy arguments to be passed in registers to the appropriate registers.
190 // TODO: Investigate the impact of lowering arguments passed in registers 182 // TODO: Investigate the impact of lowering arguments passed in registers
191 // after lowering stack arguments as opposed to the other way around. 183 // after lowering stack arguments as opposed to the other way around.
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
268 // Special treatment for an FP function which returns its result in st(0). 260 // Special treatment for an FP function which returns its result in st(0).
269 // If Dest ends up being a physical xmm register, the fstp emit code will 261 // If Dest ends up being a physical xmm register, the fstp emit code will
270 // route st(0) through the space reserved in the function argument area 262 // route st(0) through the space reserved in the function argument area
271 // we allocated. 263 // we allocated.
272 _fstp(Dest); 264 _fstp(Dest);
273 // Create a fake use of Dest in case it actually isn't used, because st(0) 265 // Create a fake use of Dest in case it actually isn't used, because st(0)
274 // still needs to be popped. 266 // still needs to be popped.
275 Context.insert(InstFakeUse::create(Func, Dest)); 267 Context.insert(InstFakeUse::create(Func, Dest));
276 } 268 }
277 269
278 // Add the appropriate offset to esp.
279 if (ParameterAreaSizeBytes)
280 _adjust_stack(-ParameterAreaSizeBytes);
281
282 // Generate a FakeUse to keep the call live if necessary. 270 // Generate a FakeUse to keep the call live if necessary.
283 if (Instr->hasSideEffects() && ReturnReg) { 271 if (Instr->hasSideEffects() && ReturnReg) {
284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 272 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
285 Context.insert(FakeUse); 273 Context.insert(FakeUse);
286 } 274 }
287 275
288 if (!Dest) 276 if (!Dest)
289 return; 277 return;
290 278
291 // Assign the result of the call to Dest. 279 // Assign the result of the call to Dest.
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
384 // | 4. global spill area | 372 // | 4. global spill area |
385 // +------------------------+ 373 // +------------------------+
386 // | 5. padding | 374 // | 5. padding |
387 // +------------------------+ 375 // +------------------------+
388 // | 6. local spill area | 376 // | 6. local spill area |
389 // +------------------------+ 377 // +------------------------+
390 // | 7. padding | 378 // | 7. padding |
391 // +------------------------+ 379 // +------------------------+
392 // | 8. allocas | 380 // | 8. allocas |
393 // +------------------------+ 381 // +------------------------+
382 // | 9. padding |
383 // +------------------------+
384 // | 10. out args |
385 // +------------------------+ <--- StackPointer
394 // 386 //
395 // The following variables record the size in bytes of the given areas: 387 // The following variables record the size in bytes of the given areas:
396 // * X86_RET_IP_SIZE_BYTES: area 1 388 // * X86_RET_IP_SIZE_BYTES: area 1
397 // * PreservedRegsSizeBytes: area 2 389 // * PreservedRegsSizeBytes: area 2
398 // * SpillAreaPaddingBytes: area 3 390 // * SpillAreaPaddingBytes: area 3
399 // * GlobalsSize: area 4 391 // * GlobalsSize: area 4
400 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 392 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
401 // * LocalsSpillAreaSize: area 6 393 // * LocalsSpillAreaSize: area 6
402 // * SpillAreaSizeBytes: areas 3 - 7 394 // * SpillAreaSizeBytes: areas 3 - 10
395 // * maxOutArgsSizeBytes(): area 10
403 396
404 // Determine stack frame offsets for each Variable without a register 397 // Determine stack frame offsets for each Variable without a register
405 // assignment. This can be done as one variable per stack slot. Or, do 398 // assignment. This can be done as one variable per stack slot. Or, do
406 // coalescing by running the register allocator again with an infinite set of 399 // coalescing by running the register allocator again with an infinite set of
407 // registers (as a side effect, this gives variables a second chance at 400 // registers (as a side effect, this gives variables a second chance at
408 // physical register assignment). 401 // physical register assignment).
409 // 402 //
410 // A middle ground approach is to leverage sparsity and allocate one block of 403 // A middle ground approach is to leverage sparsity and allocate one block of
411 // space on the frame for globals (variables with multi-block lifetime), and 404 // space on the frame for globals (variables with multi-block lifetime), and
412 // one block to share for locals (single-block lifetime). 405 // one block to share for locals (single-block lifetime).
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
503 // space on the stack for this. 496 // space on the stack for this.
504 const Type ReturnType = Func->getReturnType(); 497 const Type ReturnType = Func->getReturnType();
505 if (isScalarFloatingType(ReturnType)) { 498 if (isScalarFloatingType(ReturnType)) {
506 // Avoid misaligned double-precicion load/store. 499 // Avoid misaligned double-precicion load/store.
507 NeedsStackAlignment = true; 500 NeedsStackAlignment = true;
508 SpillAreaSizeBytes = 501 SpillAreaSizeBytes =
509 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); 502 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes);
510 } 503 }
511 504
512 // Align esp if necessary. 505 // Align esp if necessary.
513 if (NeedsStackAlignment) { 506 if (!NeedsStackAlignment) {
Jim Stichnoth 2015/11/26 18:32:06 Instead of "if (!c) a; else b;", use "if (c) b; el
sehr 2015/11/26 21:09:23 I did it to parallel ARM. I fixed that also.
507 SpillAreaSizeBytes += maxOutArgsSizeBytes();
508 } else {
514 uint32_t StackOffset = 509 uint32_t StackOffset =
515 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 510 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
516 uint32_t StackSize = 511 uint32_t StackSize =
517 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); 512 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);
513 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes());
518 SpillAreaSizeBytes = StackSize - StackOffset; 514 SpillAreaSizeBytes = StackSize - StackOffset;
519 } 515 }
520 516
521 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the 517 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the
522 // fixed allocations in the prolog. 518 // fixed allocations in the prolog.
523 if (PrologEmitsFixedAllocas) 519 if (PrologEmitsFixedAllocas)
524 SpillAreaSizeBytes += FixedAllocaSizeBytes; 520 SpillAreaSizeBytes += FixedAllocaSizeBytes;
525 if (SpillAreaSizeBytes) { 521 if (SpillAreaSizeBytes) {
526 // Generate "sub esp, SpillAreaSizeBytes" 522 // Generate "sub esp, SpillAreaSizeBytes"
527 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), 523 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
528 Ctx->getConstantInt32(SpillAreaSizeBytes)); 524 Ctx->getConstantInt32(SpillAreaSizeBytes));
529 // If the fixed allocas are aligned more than the stack frame, align the 525 // If the fixed allocas are aligned more than the stack frame, align the
530 // stack pointer accordingly. 526 // stack pointer accordingly.
531 if (PrologEmitsFixedAllocas && 527 if (PrologEmitsFixedAllocas &&
532 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { 528 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) {
533 assert(IsEbpBasedFrame); 529 assert(IsEbpBasedFrame);
534 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp), 530 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp),
535 Ctx->getConstantInt32(-FixedAllocaAlignBytes)); 531 Ctx->getConstantInt32(-FixedAllocaAlignBytes));
536 } 532 }
537 } 533 }
538 534
539 // Account for known-frame-offset alloca instructions that were not already 535 // Account for known-frame-offset alloca instructions that were not already
540 // combined into the prolog. 536 // combined into the prolog.
541 if (!PrologEmitsFixedAllocas) 537 if (!PrologEmitsFixedAllocas)
542 SpillAreaSizeBytes += FixedAllocaSizeBytes; 538 SpillAreaSizeBytes += FixedAllocaSizeBytes;
543 539
544 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 540 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
545 541
546 // Initialize the stack adjustment so that after all the known-frame-offset
547 // alloca instructions are emitted, the stack adjustment will reach zero.
548 resetStackAdjustment();
549 if (!PrologEmitsFixedAllocas)
550 updateStackAdjustment(-FixedAllocaSizeBytes);
551
552 // Fill in stack offsets for stack args, and copy args into registers for 542 // Fill in stack offsets for stack args, and copy args into registers for
553 // those that were register-allocated. Args are pushed right to left, so 543 // those that were register-allocated. Args are pushed right to left, so
554 // Arg[0] is closest to the stack/frame pointer. 544 // Arg[0] is closest to the stack/frame pointer.
555 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 545 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
556 size_t BasicFrameOffset = 546 size_t BasicFrameOffset =
557 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; 547 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;
558 if (!IsEbpBasedFrame) 548 if (!IsEbpBasedFrame)
559 BasicFrameOffset += SpillAreaSizeBytes; 549 BasicFrameOffset += SpillAreaSizeBytes;
560 550
561 const VarList &Args = Func->getArgs(); 551 const VarList &Args = Func->getArgs();
(...skipping 30 matching lines...) Expand all
592 } 582 }
593 this->HasComputedFrame = true; 583 this->HasComputedFrame = true;
594 584
595 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 585 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
596 OstreamLocker L(Func->getContext()); 586 OstreamLocker L(Func->getContext());
597 Ostream &Str = Func->getContext()->getStrDump(); 587 Ostream &Str = Func->getContext()->getStrDump();
598 588
599 Str << "Stack layout:\n"; 589 Str << "Stack layout:\n";
600 uint32_t EspAdjustmentPaddingSize = 590 uint32_t EspAdjustmentPaddingSize =
601 SpillAreaSizeBytes - LocalsSpillAreaSize - 591 SpillAreaSizeBytes - LocalsSpillAreaSize -
602 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 592 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
593 maxOutArgsSizeBytes();
603 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 594 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
604 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" 595 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"
605 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 596 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
606 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 597 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
607 << " globals spill area = " << GlobalsSize << " bytes\n" 598 << " globals spill area = " << GlobalsSize << " bytes\n"
608 << " globals-locals spill areas intermediate padding = " 599 << " globals-locals spill areas intermediate padding = "
609 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 600 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
610 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 601 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
611 << " esp alignment padding = " << EspAdjustmentPaddingSize 602 << " esp alignment padding = " << EspAdjustmentPaddingSize
612 << " bytes\n"; 603 << " bytes\n";
613 604
614 Str << "Stack details:\n" 605 Str << "Stack details:\n"
615 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" 606 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
616 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 607 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
608 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n"
617 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 609 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
618 << " bytes\n" 610 << " bytes\n"
619 << " is ebp based = " << IsEbpBasedFrame << "\n"; 611 << " is ebp based = " << IsEbpBasedFrame << "\n";
620 } 612 }
621 } 613 }
622 614
623 void TargetX8632::addEpilog(CfgNode *Node) { 615 void TargetX8632::addEpilog(CfgNode *Node) {
624 InstList &Insts = Node->getInsts(); 616 InstList &Insts = Node->getInsts();
625 InstList::reverse_iterator RI, E; 617 InstList::reverse_iterator RI, E;
626 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 618 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
(...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after
1006 // case the high-level table has extra entries. 998 // case the high-level table has extra entries.
1007 #define X(tag, sizeLog2, align, elts, elty, str) \ 999 #define X(tag, sizeLog2, align, elts, elty, str) \
1008 static_assert(_table1_##tag == _table2_##tag, \ 1000 static_assert(_table1_##tag == _table2_##tag, \
1009 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); 1001 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
1010 ICETYPE_TABLE 1002 ICETYPE_TABLE
1011 #undef X 1003 #undef X
1012 } // end of namespace dummy3 1004 } // end of namespace dummy3
1013 } // end of anonymous namespace 1005 } // end of anonymous namespace
1014 1006
1015 } // end of namespace Ice 1007 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698