| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 145 } else { | 145 } else { |
| 146 StackArgs.push_back(Arg); | 146 StackArgs.push_back(Arg); |
| 147 if (isVectorType(Arg->getType())) { | 147 if (isVectorType(Arg->getType())) { |
| 148 ParameterAreaSizeBytes = | 148 ParameterAreaSizeBytes = |
| 149 Traits::applyStackAlignment(ParameterAreaSizeBytes); | 149 Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 150 } | 150 } |
| 151 Variable *esp = | 151 Variable *esp = |
| 152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); | 152 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
| 153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 153 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); | 154 auto *Mem = Traits::X86OperandMem::create(Func, Ty, esp, Loc); |
| 155 // Stack stores for arguments are fixed to esp. | |
| 156 Mem->setIgnoreStackAdjust(true); | |
| 157 StackArgLocations.push_back(Mem); | 155 StackArgLocations.push_back(Mem); |
| 158 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 156 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 159 } | 157 } |
| 160 } | 158 } |
| 161 // Ensure there is enough space for the fstp/movs for floating returns. | 159 // Ensure there is enough space for the fstp/movs for floating returns. |
| 162 Variable *Dest = Instr->getDest(); | 160 Variable *Dest = Instr->getDest(); |
| 163 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { | 161 if (Dest != nullptr && isScalarFloatingType(Dest->getType())) { |
| 164 ParameterAreaSizeBytes = | 162 ParameterAreaSizeBytes = |
| 165 std::max(static_cast<size_t>(ParameterAreaSizeBytes), | 163 std::max(static_cast<size_t>(ParameterAreaSizeBytes), |
| 166 typeWidthInBytesOnStack(Dest->getType())); | 164 typeWidthInBytesOnStack(Dest->getType())); |
| 167 } | 165 } |
| 168 | 166 |
| 169 // Adjust the parameter area so that the stack is aligned. It is assumed that | 167 // Adjust the parameter area so that the stack is aligned. It is assumed that |
| 170 // the stack is already aligned at the start of the calling sequence. | 168 // the stack is already aligned at the start of the calling sequence. |
| 171 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 169 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 172 | 170 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= |
| 173 // Subtract the appropriate amount for the argument area. This also takes | 171 maxOutArgsSizeBytes()); |
| 174 // care of setting the stack adjustment during emission. | |
| 175 // | |
| 176 // TODO: If for some reason the call instruction gets dead-code eliminated | |
| 177 // after lowering, we would need to ensure that the pre-call and the | |
| 178 // post-call esp adjustment get eliminated as well. | |
| 179 if (ParameterAreaSizeBytes) { | |
| 180 _adjust_stack(ParameterAreaSizeBytes); | |
| 181 } | |
| 182 | 172 |
| 183 // Copy arguments that are passed on the stack to the appropriate stack | 173 // Copy arguments that are passed on the stack to the appropriate stack |
| 184 // locations. | 174 // locations. |
| 185 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 175 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| 186 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 176 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| 187 } | 177 } |
| 188 | 178 |
| 189 // Copy arguments to be passed in registers to the appropriate registers. | 179 // Copy arguments to be passed in registers to the appropriate registers. |
| 190 // TODO: Investigate the impact of lowering arguments passed in registers | 180 // TODO: Investigate the impact of lowering arguments passed in registers |
| 191 // after lowering stack arguments as opposed to the other way around. | 181 // after lowering stack arguments as opposed to the other way around. |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 268 // Special treatment for an FP function which returns its result in st(0). | 258 // Special treatment for an FP function which returns its result in st(0). |
| 269 // If Dest ends up being a physical xmm register, the fstp emit code will | 259 // If Dest ends up being a physical xmm register, the fstp emit code will |
| 270 // route st(0) through the space reserved in the function argument area | 260 // route st(0) through the space reserved in the function argument area |
| 271 // we allocated. | 261 // we allocated. |
| 272 _fstp(Dest); | 262 _fstp(Dest); |
| 273 // Create a fake use of Dest in case it actually isn't used, because st(0) | 263 // Create a fake use of Dest in case it actually isn't used, because st(0) |
| 274 // still needs to be popped. | 264 // still needs to be popped. |
| 275 Context.insert(InstFakeUse::create(Func, Dest)); | 265 Context.insert(InstFakeUse::create(Func, Dest)); |
| 276 } | 266 } |
| 277 | 267 |
| 278 // Add the appropriate offset to esp. | |
| 279 if (ParameterAreaSizeBytes) | |
| 280 _adjust_stack(-ParameterAreaSizeBytes); | |
| 281 | |
| 282 // Generate a FakeUse to keep the call live if necessary. | 268 // Generate a FakeUse to keep the call live if necessary. |
| 283 if (Instr->hasSideEffects() && ReturnReg) { | 269 if (Instr->hasSideEffects() && ReturnReg) { |
| 284 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); | 270 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); |
| 285 Context.insert(FakeUse); | 271 Context.insert(FakeUse); |
| 286 } | 272 } |
| 287 | 273 |
| 288 if (!Dest) | 274 if (!Dest) |
| 289 return; | 275 return; |
| 290 | 276 |
| 291 // Assign the result of the call to Dest. | 277 // Assign the result of the call to Dest. |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 384 // | 4. global spill area | | 370 // | 4. global spill area | |
| 385 // +------------------------+ | 371 // +------------------------+ |
| 386 // | 5. padding | | 372 // | 5. padding | |
| 387 // +------------------------+ | 373 // +------------------------+ |
| 388 // | 6. local spill area | | 374 // | 6. local spill area | |
| 389 // +------------------------+ | 375 // +------------------------+ |
| 390 // | 7. padding | | 376 // | 7. padding | |
| 391 // +------------------------+ | 377 // +------------------------+ |
| 392 // | 8. allocas | | 378 // | 8. allocas | |
| 393 // +------------------------+ | 379 // +------------------------+ |
| 380 // | 9. padding | |
| 381 // +------------------------+ |
| 382 // | 10. out args | |
| 383 // +------------------------+ <--- StackPointer |
| 394 // | 384 // |
| 395 // The following variables record the size in bytes of the given areas: | 385 // The following variables record the size in bytes of the given areas: |
| 396 // * X86_RET_IP_SIZE_BYTES: area 1 | 386 // * X86_RET_IP_SIZE_BYTES: area 1 |
| 397 // * PreservedRegsSizeBytes: area 2 | 387 // * PreservedRegsSizeBytes: area 2 |
| 398 // * SpillAreaPaddingBytes: area 3 | 388 // * SpillAreaPaddingBytes: area 3 |
| 399 // * GlobalsSize: area 4 | 389 // * GlobalsSize: area 4 |
| 400 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | 390 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
| 401 // * LocalsSpillAreaSize: area 6 | 391 // * LocalsSpillAreaSize: area 6 |
| 402 // * SpillAreaSizeBytes: areas 3 - 7 | 392 // * SpillAreaSizeBytes: areas 3 - 10 |
| 393 // * maxOutArgsSizeBytes(): area 10 |
| 403 | 394 |
| 404 // Determine stack frame offsets for each Variable without a register | 395 // Determine stack frame offsets for each Variable without a register |
| 405 // assignment. This can be done as one variable per stack slot. Or, do | 396 // assignment. This can be done as one variable per stack slot. Or, do |
| 406 // coalescing by running the register allocator again with an infinite set of | 397 // coalescing by running the register allocator again with an infinite set of |
| 407 // registers (as a side effect, this gives variables a second chance at | 398 // registers (as a side effect, this gives variables a second chance at |
| 408 // physical register assignment). | 399 // physical register assignment). |
| 409 // | 400 // |
| 410 // A middle ground approach is to leverage sparsity and allocate one block of | 401 // A middle ground approach is to leverage sparsity and allocate one block of |
| 411 // space on the frame for globals (variables with multi-block lifetime), and | 402 // space on the frame for globals (variables with multi-block lifetime), and |
| 412 // one block to share for locals (single-block lifetime). | 403 // one block to share for locals (single-block lifetime). |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 508 SpillAreaSizeBytes = | 499 SpillAreaSizeBytes = |
| 509 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); | 500 std::max(typeWidthInBytesOnStack(ReturnType), SpillAreaSizeBytes); |
| 510 } | 501 } |
| 511 | 502 |
| 512 // Align esp if necessary. | 503 // Align esp if necessary. |
| 513 if (NeedsStackAlignment) { | 504 if (NeedsStackAlignment) { |
| 514 uint32_t StackOffset = | 505 uint32_t StackOffset = |
| 515 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 506 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 516 uint32_t StackSize = | 507 uint32_t StackSize = |
| 517 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 508 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 509 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes()); |
| 518 SpillAreaSizeBytes = StackSize - StackOffset; | 510 SpillAreaSizeBytes = StackSize - StackOffset; |
| 511 } else { |
| 512 SpillAreaSizeBytes += maxOutArgsSizeBytes(); |
| 519 } | 513 } |
| 520 | 514 |
| 521 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the | 515 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the |
| 522 // fixed allocations in the prolog. | 516 // fixed allocations in the prolog. |
| 523 if (PrologEmitsFixedAllocas) | 517 if (PrologEmitsFixedAllocas) |
| 524 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 518 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 525 if (SpillAreaSizeBytes) { | 519 if (SpillAreaSizeBytes) { |
| 526 // Generate "sub esp, SpillAreaSizeBytes" | 520 // Generate "sub esp, SpillAreaSizeBytes" |
| 527 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | 521 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 528 Ctx->getConstantInt32(SpillAreaSizeBytes)); | 522 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 529 // If the fixed allocas are aligned more than the stack frame, align the | 523 // If the fixed allocas are aligned more than the stack frame, align the |
| 530 // stack pointer accordingly. | 524 // stack pointer accordingly. |
| 531 if (PrologEmitsFixedAllocas && | 525 if (PrologEmitsFixedAllocas && |
| 532 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { | 526 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { |
| 533 assert(IsEbpBasedFrame); | 527 assert(IsEbpBasedFrame); |
| 534 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | 528 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 535 Ctx->getConstantInt32(-FixedAllocaAlignBytes)); | 529 Ctx->getConstantInt32(-FixedAllocaAlignBytes)); |
| 536 } | 530 } |
| 537 } | 531 } |
| 538 | 532 |
| 539 // Account for known-frame-offset alloca instructions that were not already | 533 // Account for known-frame-offset alloca instructions that were not already |
| 540 // combined into the prolog. | 534 // combined into the prolog. |
| 541 if (!PrologEmitsFixedAllocas) | 535 if (!PrologEmitsFixedAllocas) |
| 542 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 536 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 543 | 537 |
| 544 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 538 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 545 | 539 |
| 546 // Initialize the stack adjustment so that after all the known-frame-offset | |
| 547 // alloca instructions are emitted, the stack adjustment will reach zero. | |
| 548 resetStackAdjustment(); | |
| 549 if (!PrologEmitsFixedAllocas) | |
| 550 updateStackAdjustment(-FixedAllocaSizeBytes); | |
| 551 | |
| 552 // Fill in stack offsets for stack args, and copy args into registers for | 540 // Fill in stack offsets for stack args, and copy args into registers for |
| 553 // those that were register-allocated. Args are pushed right to left, so | 541 // those that were register-allocated. Args are pushed right to left, so |
| 554 // Arg[0] is closest to the stack/frame pointer. | 542 // Arg[0] is closest to the stack/frame pointer. |
| 555 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 543 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 556 size_t BasicFrameOffset = | 544 size_t BasicFrameOffset = |
| 557 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | 545 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; |
| 558 if (!IsEbpBasedFrame) | 546 if (!IsEbpBasedFrame) |
| 559 BasicFrameOffset += SpillAreaSizeBytes; | 547 BasicFrameOffset += SpillAreaSizeBytes; |
| 560 | 548 |
| 561 const VarList &Args = Func->getArgs(); | 549 const VarList &Args = Func->getArgs(); |
| (...skipping 30 matching lines...) Expand all Loading... |
| 592 } | 580 } |
| 593 this->HasComputedFrame = true; | 581 this->HasComputedFrame = true; |
| 594 | 582 |
| 595 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { | 583 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
| 596 OstreamLocker L(Func->getContext()); | 584 OstreamLocker L(Func->getContext()); |
| 597 Ostream &Str = Func->getContext()->getStrDump(); | 585 Ostream &Str = Func->getContext()->getStrDump(); |
| 598 | 586 |
| 599 Str << "Stack layout:\n"; | 587 Str << "Stack layout:\n"; |
| 600 uint32_t EspAdjustmentPaddingSize = | 588 uint32_t EspAdjustmentPaddingSize = |
| 601 SpillAreaSizeBytes - LocalsSpillAreaSize - | 589 SpillAreaSizeBytes - LocalsSpillAreaSize - |
| 602 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | 590 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
| 591 maxOutArgsSizeBytes(); |
| 603 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 592 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| 604 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" | 593 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| 605 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | 594 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| 606 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | 595 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| 607 << " globals spill area = " << GlobalsSize << " bytes\n" | 596 << " globals spill area = " << GlobalsSize << " bytes\n" |
| 608 << " globals-locals spill areas intermediate padding = " | 597 << " globals-locals spill areas intermediate padding = " |
| 609 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | 598 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| 610 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | 599 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| 611 << " esp alignment padding = " << EspAdjustmentPaddingSize | 600 << " esp alignment padding = " << EspAdjustmentPaddingSize |
| 612 << " bytes\n"; | 601 << " bytes\n"; |
| 613 | 602 |
| 614 Str << "Stack details:\n" | 603 Str << "Stack details:\n" |
| 615 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | 604 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| 616 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | 605 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| 606 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n" |
| 617 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | 607 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| 618 << " bytes\n" | 608 << " bytes\n" |
| 619 << " is ebp based = " << IsEbpBasedFrame << "\n"; | 609 << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| 620 } | 610 } |
| 621 } | 611 } |
| 622 | 612 |
| 623 void TargetX8632::addEpilog(CfgNode *Node) { | 613 void TargetX8632::addEpilog(CfgNode *Node) { |
| 624 InstList &Insts = Node->getInsts(); | 614 InstList &Insts = Node->getInsts(); |
| 625 InstList::reverse_iterator RI, E; | 615 InstList::reverse_iterator RI, E; |
| 626 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 616 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| (...skipping 379 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1006 // case the high-level table has extra entries. | 996 // case the high-level table has extra entries. |
| 1007 #define X(tag, sizeLog2, align, elts, elty, str) \ | 997 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 1008 static_assert(_table1_##tag == _table2_##tag, \ | 998 static_assert(_table1_##tag == _table2_##tag, \ |
| 1009 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); | 999 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
| 1010 ICETYPE_TABLE | 1000 ICETYPE_TABLE |
| 1011 #undef X | 1001 #undef X |
| 1012 } // end of namespace dummy3 | 1002 } // end of namespace dummy3 |
| 1013 } // end of anonymous namespace | 1003 } // end of anonymous namespace |
| 1014 | 1004 |
| 1015 } // end of namespace Ice | 1005 } // end of namespace Ice |
| OLD | NEW |