Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 162 Variable *Dest = Instr->getDest(); | 162 Variable *Dest = Instr->getDest(); |
| 163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | 163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
| 164 ParameterAreaSizeBytes = | 164 ParameterAreaSizeBytes = |
| 165 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | 165 std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
| 166 typeWidthInBytesOnStack(Dest->getType())); | 166 typeWidthInBytesOnStack(Dest->getType())); |
| 167 } | 167 } |
| 168 | 168 |
| 169 // Adjust the parameter area so that the stack is aligned. It is assumed that | 169 // Adjust the parameter area so that the stack is aligned. It is assumed that |
| 170 // the stack is already aligned at the start of the calling sequence. | 170 // the stack is already aligned at the start of the calling sequence. |
| 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 172 | 172 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= |
| 173 // Subtract the appropriate amount for the argument area. This also takes | 173 maxOutArgsSizeBytes()); |
| 174 // care of setting the stack adjustment during emission. | |
| 175 // | |
| 176 // TODO: If for some reason the call instruction gets dead-code eliminated | |
| 177 // after lowering, we would need to ensure that the pre-call and the | |
| 178 // post-call esp adjustment get eliminated as well. | |
| 179 if (ParameterAreaSizeBytes) { | |
| 180 _adjust_stack(ParameterAreaSizeBytes); | |
| 181 } | |
| 182 | 174 |
| 183 // Copy arguments that are passed on the stack to the appropriate stack | 175 // Copy arguments that are passed on the stack to the appropriate stack |
| 184 // locations. | 176 // locations. |
| 185 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 177 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| 186 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 178 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| 187 } | 179 } |
| 188 | 180 |
| 189 // Copy arguments to be passed in registers to the appropriate registers. | 181 // Copy arguments to be passed in registers to the appropriate registers. |
| 190 // TODO: Investigate the impact of lowering arguments passed in registers | 182 // TODO: Investigate the impact of lowering arguments passed in registers |
| 191 // after lowering stack arguments as opposed to the other way around. | 183 // after lowering stack arguments as opposed to the other way around. |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 268 // Special treatment for an FP function which returns its result in st(0). | 260 // Special treatment for an FP function which returns its result in st(0). |
| 269 // If Dest ends up being a physical xmm register, the fstp emit code will | 261 // If Dest ends up being a physical xmm register, the fstp emit code will |
| 270 // route st(0) through the space reserved in the function argument area | 262 // route st(0) through the space reserved in the function argument area |
| 271 // we allocated. | 263 // we allocated. |
| 272 _fstp(Dest); | 264 _fstp(Dest); |
| 273 // Create a fake use of Dest in case it actually isn't used, because st(0) | 265 // Create a fake use of Dest in case it actually isn't used, because st(0) |
| 274 // still needs to be popped. | 266 // still needs to be popped. |
| 275 Context.insert(InstFakeUse::create(Func, Dest)); | 267 Context.insert(InstFakeUse::create(Func, Dest)); |
| 276 } | 268 } |
| 277 | 269 |
| 278 // Add the appropriate offset to esp. | |
| 279 if (ParameterAreaSizeBytes) | |
| 280 _adjust_stack(-ParameterAreaSizeBytes); | |
| 281 | |
| 282 // Generate a FakeUse to keep the call live if necessary. | 270 // Generate a FakeUse to keep the call live if necessary. |
| 283 if (Instr->hasSideEffects() && ReturnReg) { | 271 if (Instr->hasSideEffects() && ReturnReg) { |
| 284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 272 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 285 Context.insert(FakeUse); | 273 Context.insert(FakeUse); |
| 286 } | 274 } |
| 287 | 275 |
| 288 if (!Dest) | 276 if (!Dest) |
| 289 return; | 277 return; |
| 290 | 278 |
| 291 // Assign the result of the call to Dest. | 279 // Assign the result of the call to Dest. |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 384 // | 4. global spill area | | 372 // | 4. global spill area | |
| 385 // +------------------------+ | 373 // +------------------------+ |
| 386 // | 5. padding | | 374 // | 5. padding | |
| 387 // +------------------------+ | 375 // +------------------------+ |
| 388 // | 6. local spill area | | 376 // | 6. local spill area | |
| 389 // +------------------------+ | 377 // +------------------------+ |
| 390 // | 7. padding | | 378 // | 7. padding | |
| 391 // +------------------------+ | 379 // +------------------------+ |
| 392 // | 8. allocas | | 380 // | 8. allocas | |
| 393 // +------------------------+ | 381 // +------------------------+ |
| 382 // | 9. padding | | |
| 383 // +------------------------+ | |
| 384 // | 10. out args | | |
| 385 // +------------------------+ <--- StackPointer | |
| 394 // | 386 // |
| 395 // The following variables record the size in bytes of the given areas: | 387 // The following variables record the size in bytes of the given areas: |
| 396 // * X86_RET_IP_SIZE_BYTES: area 1 | 388 // * X86_RET_IP_SIZE_BYTES: area 1 |
| 397 // * PreservedRegsSizeBytes: area 2 | 389 // * PreservedRegsSizeBytes: area 2 |
| 398 // * SpillAreaPaddingBytes: area 3 | 390 // * SpillAreaPaddingBytes: area 3 |
| 399 // * GlobalsSize: area 4 | 391 // * GlobalsSize: area 4 |
| 400 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | 392 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
| 401 // * LocalsSpillAreaSize: area 6 | 393 // * LocalsSpillAreaSize: area 6 |
| 402 // * SpillAreaSizeBytes: areas 3 - 7 | 394 // * SpillAreaSizeBytes: areas 3 - 10 |
| 395 // * maxOutArgsSizeBytes(): area 10 | |
| 403 | 396 |
| 404 // Determine stack frame offsets for each Variable without a register | 397 // Determine stack frame offsets for each Variable without a register |
| 405 // assignment. This can be done as one variable per stack slot. Or, do | 398 // assignment. This can be done as one variable per stack slot. Or, do |
| 406 // coalescing by running the register allocator again with an infinite set of | 399 // coalescing by running the register allocator again with an infinite set of |
| 407 // registers (as a side effect, this gives variables a second chance at | 400 // registers (as a side effect, this gives variables a second chance at |
| 408 // physical register assignment). | 401 // physical register assignment). |
| 409 // | 402 // |
| 410 // A middle ground approach is to leverage sparsity and allocate one block of | 403 // A middle ground approach is to leverage sparsity and allocate one block of |
| 411 // space on the frame for globals (variables with multi-block lifetime), and | 404 // space on the frame for globals (variables with multi-block lifetime), and |
| 412 // one block to share for locals (single-block lifetime). | 405 // one block to share for locals (single-block lifetime). |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 503 // space on the stack for this. | 496 // space on the stack for this. |
| 504 const Type ReturnType = Func->getReturnType(); | 497 const Type ReturnType = Func->getReturnType(); |
| 505 if (isScalarFloatingType(ReturnType)) { | 498 if (isScalarFloatingType(ReturnType)) { |
| 506 // Avoid misaligned double-precicion load/store. | 499 // Avoid misaligned double-precicion load/store. |
| 507 NeedsStackAlignment = true; | 500 NeedsStackAlignment = true; |
| 508 SpillAreaSizeBytes = | 501 SpillAreaSizeBytes = |
| 509 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); | 502 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); |
| 510 } | 503 } |
| 511 | 504 |
| 512 // Align esp if necessary. | 505 // Align esp if necessary. |
| 513 if (NeedsStackAlignment) { | 506 if (!NeedsStackAlignment) { |
|
Jim Stichnoth
2015/11/26 18:32:06
Instead of "if (!c) a; else b;", use "if (c) b; el
sehr
2015/11/26 21:09:23
I did it to parallel ARM. I fixed that also.
| |
| 507 SpillAreaSizeBytes += maxOutArgsSizeBytes(); | |
| 508 } else { | |
| 514 uint32_t StackOffset = | 509 uint32_t StackOffset = |
| 515 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 510 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 516 uint32_t StackSize = | 511 uint32_t StackSize = |
| 517 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 512 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 513 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes()); | |
| 518 SpillAreaSizeBytes = StackSize - StackOffset; | 514 SpillAreaSizeBytes = StackSize - StackOffset; |
| 519 } | 515 } |
| 520 | 516 |
| 521 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the | 517 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the |
| 522 // fixed allocations in the prolog. | 518 // fixed allocations in the prolog. |
| 523 if (PrologEmitsFixedAllocas) | 519 if (PrologEmitsFixedAllocas) |
| 524 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 520 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 525 if (SpillAreaSizeBytes) { | 521 if (SpillAreaSizeBytes) { |
| 526 // Generate "sub esp, SpillAreaSizeBytes" | 522 // Generate "sub esp, SpillAreaSizeBytes" |
| 527 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | 523 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 528 Ctx->getConstantInt32(SpillAreaSizeBytes)); | 524 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 529 // If the fixed allocas are aligned more than the stack frame, align the | 525 // If the fixed allocas are aligned more than the stack frame, align the |
| 530 // stack pointer accordingly. | 526 // stack pointer accordingly. |
| 531 if (PrologEmitsFixedAllocas && | 527 if (PrologEmitsFixedAllocas && |
| 532 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { | 528 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { |
| 533 assert(IsEbpBasedFrame); | 529 assert(IsEbpBasedFrame); |
| 534 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | 530 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 535 Ctx->getConstantInt32(-FixedAllocaAlignBytes)); | 531 Ctx->getConstantInt32(-FixedAllocaAlignBytes)); |
| 536 } | 532 } |
| 537 } | 533 } |
| 538 | 534 |
| 539 // Account for known-frame-offset alloca instructions that were not already | 535 // Account for known-frame-offset alloca instructions that were not already |
| 540 // combined into the prolog. | 536 // combined into the prolog. |
| 541 if (!PrologEmitsFixedAllocas) | 537 if (!PrologEmitsFixedAllocas) |
| 542 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 538 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 543 | 539 |
| 544 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 540 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 545 | 541 |
| 546 // Initialize the stack adjustment so that after all the known-frame-offset | |
| 547 // alloca instructions are emitted, the stack adjustment will reach zero. | |
| 548 resetStackAdjustment(); | |
| 549 if (!PrologEmitsFixedAllocas) | |
| 550 updateStackAdjustment(-FixedAllocaSizeBytes); | |
| 551 | |
| 552 // Fill in stack offsets for stack args, and copy args into registers for | 542 // Fill in stack offsets for stack args, and copy args into registers for |
| 553 // those that were register-allocated. Args are pushed right to left, so | 543 // those that were register-allocated. Args are pushed right to left, so |
| 554 // Arg[0] is closest to the stack/frame pointer. | 544 // Arg[0] is closest to the stack/frame pointer. |
| 555 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 545 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 556 size_t BasicFrameOffset = | 546 size_t BasicFrameOffset = |
| 557 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | 547 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; |
| 558 if (!IsEbpBasedFrame) | 548 if (!IsEbpBasedFrame) |
| 559 BasicFrameOffset += SpillAreaSizeBytes; | 549 BasicFrameOffset += SpillAreaSizeBytes; |
| 560 | 550 |
| 561 const VarList &Args = Func->getArgs(); | 551 const VarList &Args = Func->getArgs(); |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 592 } | 582 } |
| 593 this->HasComputedFrame = true; | 583 this->HasComputedFrame = true; |
| 594 | 584 |
| 595 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { | 585 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
| 596 OstreamLocker L(Func->getContext()); | 586 OstreamLocker L(Func->getContext()); |
| 597 Ostream &Str = Func->getContext()->getStrDump(); | 587 Ostream &Str = Func->getContext()->getStrDump(); |
| 598 | 588 |
| 599 Str << "Stack layout:\n"; | 589 Str << "Stack layout:\n"; |
| 600 uint32_t EspAdjustmentPaddingSize = | 590 uint32_t EspAdjustmentPaddingSize = |
| 601 SpillAreaSizeBytes - LocalsSpillAreaSize - | 591 SpillAreaSizeBytes - LocalsSpillAreaSize - |
| 602 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | 592 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
| 593 maxOutArgsSizeBytes(); | |
| 603 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 594 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| 604 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" | 595 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| 605 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | 596 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| 606 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | 597 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| 607 << " globals spill area = " << GlobalsSize << " bytes\n" | 598 << " globals spill area = " << GlobalsSize << " bytes\n" |
| 608 << " globals-locals spill areas intermediate padding = " | 599 << " globals-locals spill areas intermediate padding = " |
| 609 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | 600 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| 610 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | 601 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| 611 << " esp alignment padding = " << EspAdjustmentPaddingSize | 602 << " esp alignment padding = " << EspAdjustmentPaddingSize |
| 612 << " bytes\n"; | 603 << " bytes\n"; |
| 613 | 604 |
| 614 Str << "Stack details:\n" | 605 Str << "Stack details:\n" |
| 615 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | 606 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| 616 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | 607 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| 608 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n" | |
| 617 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | 609 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| 618 << " bytes\n" | 610 << " bytes\n" |
| 619 << " is ebp based = " << IsEbpBasedFrame << "\n"; | 611 << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| 620 } | 612 } |
| 621 } | 613 } |
| 622 | 614 |
| 623 void TargetX8632::addEpilog(CfgNode *Node) { | 615 void TargetX8632::addEpilog(CfgNode *Node) { |
| 624 InstList &Insts = Node->getInsts(); | 616 InstList &Insts = Node->getInsts(); |
| 625 InstList::reverse_iterator RI, E; | 617 InstList::reverse_iterator RI, E; |
| 626 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 618 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| (...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1006 // case the high-level table has extra entries. | 998 // case the high-level table has extra entries. |
| 1007 #define X(tag, sizeLog2, align, elts, elty, str) \ | 999 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 1008 static_assert(_table1_##tag == _table2_##tag, \ | 1000 static_assert(_table1_##tag == _table2_##tag, \ |
| 1009 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 1001 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 1010 ICETYPE_TABLE | 1002 ICETYPE_TABLE |
| 1011 #undef X | 1003 #undef X |
| 1012 } // end of namespace dummy3 | 1004 } // end of namespace dummy3 |
| 1013 } // end of anonymous namespace | 1005 } // end of anonymous namespace |
| 1014 | 1006 |
| 1015 } // end of namespace Ice | 1007 } // end of namespace Ice |
| OLD | NEW |