| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8664.cpp - x86-64 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 163 // This intends to match the section "Function Calling Sequence" of the | 163 // This intends to match the section "Function Calling Sequence" of the |
| 164 // document "System V Application Binary Interface." | 164 // document "System V Application Binary Interface." |
| 165 NeedsStackAlignment = true; | 165 NeedsStackAlignment = true; |
| 166 | 166 |
| 167 using OperandList = | 167 using OperandList = |
| 168 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, | 168 llvm::SmallVector<Operand *, constexprMax(Traits::X86_MAX_XMM_ARGS, |
| 169 Traits::X86_MAX_GPR_ARGS)>; | 169 Traits::X86_MAX_GPR_ARGS)>; |
| 170 OperandList XmmArgs; | 170 OperandList XmmArgs; |
| 171 OperandList GprArgs; | 171 OperandList GprArgs; |
| 172 OperandList StackArgs, StackArgLocations; | 172 OperandList StackArgs, StackArgLocations; |
| 173 uint32_t ParameterAreaSizeBytes = 0; | 173 int32_t ParameterAreaSizeBytes = 0; |
| 174 | 174 |
| 175 // Classify each argument operand according to the location where the | 175 // Classify each argument operand according to the location where the |
| 176 // argument is passed. | 176 // argument is passed. |
| 177 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 177 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 178 Operand *Arg = Instr->getArg(i); | 178 Operand *Arg = Instr->getArg(i); |
| 179 Type Ty = Arg->getType(); | 179 Type Ty = Arg->getType(); |
| 180 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 180 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 181 assert(typeWidthInBytes(Ty) >= 4); | 181 assert(typeWidthInBytes(Ty) >= 4); |
| 182 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { | 182 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) { |
| 183 XmmArgs.push_back(Arg); | 183 XmmArgs.push_back(Arg); |
| (...skipping 13 matching lines...) Expand all Loading... |
| 197 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); | 197 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); |
| 198 StackArgLocations.push_back( | 198 StackArgLocations.push_back( |
| 199 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); | 199 Traits::X86OperandMem::create(Func, Ty, esp, Loc)); |
| 200 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 200 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 201 } | 201 } |
| 202 } | 202 } |
| 203 | 203 |
| 204 // Adjust the parameter area so that the stack is aligned. It is assumed that | 204 // Adjust the parameter area so that the stack is aligned. It is assumed that |
| 205 // the stack is already aligned at the start of the calling sequence. | 205 // the stack is already aligned at the start of the calling sequence. |
| 206 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); | 206 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
| 207 | 207 assert(static_cast<uint32_t>(ParameterAreaSizeBytes) <= |
| 208 // Subtract the appropriate amount for the argument area. This also takes | 208 maxOutArgsSizeBytes()); |
| 209 // care of setting the stack adjustment during emission. | |
| 210 // | |
| 211 // TODO: If for some reason the call instruction gets dead-code eliminated | |
| 212 // after lowering, we would need to ensure that the pre-call and the | |
| 213 // post-call esp adjustment get eliminated as well. | |
| 214 if (ParameterAreaSizeBytes) { | |
| 215 _adjust_stack(ParameterAreaSizeBytes); | |
| 216 } | |
| 217 | 209 |
| 218 // Copy arguments that are passed on the stack to the appropriate stack | 210 // Copy arguments that are passed on the stack to the appropriate stack |
| 219 // locations. | 211 // locations. |
| 220 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | 212 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| 221 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | 213 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| 222 } | 214 } |
| 223 | 215 |
| 224 // Copy arguments to be passed in registers to the appropriate registers. | 216 // Copy arguments to be passed in registers to the appropriate registers. |
| 225 // TODO: Investigate the impact of lowering arguments passed in registers | 217 // TODO: Investigate the impact of lowering arguments passed in registers |
| 226 // after lowering stack arguments as opposed to the other way around. | 218 // after lowering stack arguments as opposed to the other way around. |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 402 // | 4. global spill area | | 394 // | 4. global spill area | |
| 403 // +------------------------+ | 395 // +------------------------+ |
| 404 // | 5. padding | | 396 // | 5. padding | |
| 405 // +------------------------+ | 397 // +------------------------+ |
| 406 // | 6. local spill area | | 398 // | 6. local spill area | |
| 407 // +------------------------+ | 399 // +------------------------+ |
| 408 // | 7. padding | | 400 // | 7. padding | |
| 409 // +------------------------+ | 401 // +------------------------+ |
| 410 // | 8. allocas | | 402 // | 8. allocas | |
| 411 // +------------------------+ | 403 // +------------------------+ |
| 404 // | 9. padding | |
| 405 // +------------------------+ |
| 406 // | 10. out args | |
| 407 // +------------------------+ <--- StackPointer |
| 412 // | 408 // |
| 413 // The following variables record the size in bytes of the given areas: | 409 // The following variables record the size in bytes of the given areas: |
| 414 // * X86_RET_IP_SIZE_BYTES: area 1 | 410 // * X86_RET_IP_SIZE_BYTES: area 1 |
| 415 // * PreservedRegsSizeBytes: area 2 | 411 // * PreservedRegsSizeBytes: area 2 |
| 416 // * SpillAreaPaddingBytes: area 3 | 412 // * SpillAreaPaddingBytes: area 3 |
| 417 // * GlobalsSize: area 4 | 413 // * GlobalsSize: area 4 |
| 418 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 | 414 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
| 419 // * LocalsSpillAreaSize: area 6 | 415 // * LocalsSpillAreaSize: area 6 |
| 420 // * SpillAreaSizeBytes: areas 3 - 7 | 416 // * SpillAreaSizeBytes: areas 3 - 10 |
| 417 // * maxOutArgsSizeBytes(): area 10 |
| 421 | 418 |
| 422 // Determine stack frame offsets for each Variable without a register | 419 // Determine stack frame offsets for each Variable without a register |
| 423 // assignment. This can be done as one variable per stack slot. Or, do | 420 // assignment. This can be done as one variable per stack slot. Or, do |
| 424 // coalescing by running the register allocator again with an infinite set of | 421 // coalescing by running the register allocator again with an infinite set of |
| 425 // registers (as a side effect, this gives variables a second chance at | 422 // registers (as a side effect, this gives variables a second chance at |
| 426 // physical register assignment). | 423 // physical register assignment). |
| 427 // | 424 // |
| 428 // A middle ground approach is to leverage sparsity and allocate one block of | 425 // A middle ground approach is to leverage sparsity and allocate one block of |
| 429 // space on the frame for globals (variables with multi-block lifetime), and | 426 // space on the frame for globals (variables with multi-block lifetime), and |
| 430 // one block to share for locals (single-block lifetime). | 427 // one block to share for locals (single-block lifetime). |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 507 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 504 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
| 508 uint32_t GlobalsAndSubsequentPaddingSize = | 505 uint32_t GlobalsAndSubsequentPaddingSize = |
| 509 GlobalsSize + LocalsSlotsPaddingBytes; | 506 GlobalsSize + LocalsSlotsPaddingBytes; |
| 510 | 507 |
| 511 // Align esp if necessary. | 508 // Align esp if necessary. |
| 512 if (NeedsStackAlignment) { | 509 if (NeedsStackAlignment) { |
| 513 uint32_t StackOffset = | 510 uint32_t StackOffset = |
| 514 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 511 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| 515 uint32_t StackSize = | 512 uint32_t StackSize = |
| 516 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 513 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 514 StackSize = Traits::applyStackAlignment(StackSize + maxOutArgsSizeBytes()); |
| 517 SpillAreaSizeBytes = StackSize - StackOffset; | 515 SpillAreaSizeBytes = StackSize - StackOffset; |
| 516 } else { |
| 517 SpillAreaSizeBytes += maxOutArgsSizeBytes(); |
| 518 } | 518 } |
| 519 | 519 |
| 520 // Combine fixed allocations into SpillAreaSizeBytes if we are emitting the |
| 521 // fixed allocations in the prolog. |
| 522 if (PrologEmitsFixedAllocas) |
| 523 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 520 // Generate "sub esp, SpillAreaSizeBytes" | 524 // Generate "sub esp, SpillAreaSizeBytes" |
| 521 if (SpillAreaSizeBytes) | 525 if (SpillAreaSizeBytes) { |
| 522 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), | 526 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 523 Ctx->getConstantInt32(SpillAreaSizeBytes)); | 527 Ctx->getConstantInt32(SpillAreaSizeBytes)); |
| 528 // If the fixed allocas are aligned more than the stack frame, align the |
| 529 // stack pointer accordingly. |
| 530 if (PrologEmitsFixedAllocas && |
| 531 FixedAllocaAlignBytes > Traits::X86_STACK_ALIGNMENT_BYTES) { |
| 532 assert(IsEbpBasedFrame); |
| 533 _and(getPhysicalRegister(Traits::RegisterSet::Reg_esp), |
| 534 Ctx->getConstantInt32(-FixedAllocaAlignBytes)); |
| 535 } |
| 536 } |
| 524 | 537 |
| 525 // Account for alloca instructions with known frame offsets. | 538 // Account for alloca instructions with known frame offsets. |
| 526 SpillAreaSizeBytes += FixedAllocaSizeBytes; | 539 if (!PrologEmitsFixedAllocas) |
| 540 SpillAreaSizeBytes += FixedAllocaSizeBytes; |
| 527 | 541 |
| 528 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 542 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 529 | 543 |
| 530 // Initialize the stack adjustment so that after all the known-frame-offset | |
| 531 // alloca instructions are emitted, the stack adjustment will reach zero. | |
| 532 resetStackAdjustment(); | |
| 533 updateStackAdjustment(-FixedAllocaSizeBytes); | |
| 534 | |
| 535 // Fill in stack offsets for stack args, and copy args into registers for | 544 // Fill in stack offsets for stack args, and copy args into registers for |
| 536 // those that were register-allocated. Args are pushed right to left, so | 545 // those that were register-allocated. Args are pushed right to left, so |
| 537 // Arg[0] is closest to the stack/frame pointer. | 546 // Arg[0] is closest to the stack/frame pointer. |
| 538 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 547 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 539 size_t BasicFrameOffset = | 548 size_t BasicFrameOffset = |
| 540 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; | 549 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; |
| 541 if (!IsEbpBasedFrame) | 550 if (!IsEbpBasedFrame) |
| 542 BasicFrameOffset += SpillAreaSizeBytes; | 551 BasicFrameOffset += SpillAreaSizeBytes; |
| 543 | 552 |
| 544 const VarList &Args = Func->getArgs(); | 553 const VarList &Args = Func->getArgs(); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 556 assert(isScalarIntegerType(Arg->getType())); | 565 assert(isScalarIntegerType(Arg->getType())); |
| 557 if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) { | 566 if (NumGPRArgs < Traits::X86_MAX_GPR_ARGS) { |
| 558 ++NumGPRArgs; | 567 ++NumGPRArgs; |
| 559 continue; | 568 continue; |
| 560 } | 569 } |
| 561 } | 570 } |
| 562 // For esp-based frames, the esp value may not stabilize to its home value | 571 // For esp-based frames, the esp value may not stabilize to its home value |
| 563 // until after all the fixed-size alloca instructions have executed. In | 572 // until after all the fixed-size alloca instructions have executed. In |
| 564 // this case, a stack adjustment is needed when accessing in-args in order | 573 // this case, a stack adjustment is needed when accessing in-args in order |
| 565 // to copy them into registers. | 574 // to copy them into registers. |
| 566 size_t StackAdjBytes = IsEbpBasedFrame ? 0 : -FixedAllocaSizeBytes; | 575 size_t StackAdjBytes = 0; |
| 576 if (!IsEbpBasedFrame && !PrologEmitsFixedAllocas) |
| 577 StackAdjBytes -= FixedAllocaSizeBytes; |
| 567 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes, | 578 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, StackAdjBytes, |
| 568 InArgsSizeBytes); | 579 InArgsSizeBytes); |
| 569 } | 580 } |
| 570 | 581 |
| 571 // Fill in stack offsets for locals. | 582 // Fill in stack offsets for locals. |
| 572 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, | 583 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| 573 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, | 584 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| 574 IsEbpBasedFrame); | 585 IsEbpBasedFrame); |
| 575 // Assign stack offsets to variables that have been linked to spilled | 586 // Assign stack offsets to variables that have been linked to spilled |
| 576 // variables. | 587 // variables. |
| 577 for (Variable *Var : VariablesLinkedToSpillSlots) { | 588 for (Variable *Var : VariablesLinkedToSpillSlots) { |
| 578 Variable *Linked = | 589 Variable *Linked = |
| 579 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); | 590 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo(); |
| 580 Var->setStackOffset(Linked->getStackOffset()); | 591 Var->setStackOffset(Linked->getStackOffset()); |
| 581 } | 592 } |
| 582 this->HasComputedFrame = true; | 593 this->HasComputedFrame = true; |
| 583 | 594 |
| 584 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { | 595 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { |
| 585 OstreamLocker L(Func->getContext()); | 596 OstreamLocker L(Func->getContext()); |
| 586 Ostream &Str = Func->getContext()->getStrDump(); | 597 Ostream &Str = Func->getContext()->getStrDump(); |
| 587 | 598 |
| 588 Str << "Stack layout:\n"; | 599 Str << "Stack layout:\n"; |
| 589 uint32_t EspAdjustmentPaddingSize = | 600 uint32_t EspAdjustmentPaddingSize = |
| 590 SpillAreaSizeBytes - LocalsSpillAreaSize - | 601 SpillAreaSizeBytes - LocalsSpillAreaSize - |
| 591 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; | 602 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes - |
| 603 maxOutArgsSizeBytes(); |
| 592 Str << " in-args = " << InArgsSizeBytes << " bytes\n" | 604 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| 593 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" | 605 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n" |
| 594 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" | 606 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| 595 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" | 607 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| 596 << " globals spill area = " << GlobalsSize << " bytes\n" | 608 << " globals spill area = " << GlobalsSize << " bytes\n" |
| 597 << " globals-locals spill areas intermediate padding = " | 609 << " globals-locals spill areas intermediate padding = " |
| 598 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" | 610 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| 599 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" | 611 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| 600 << " esp alignment padding = " << EspAdjustmentPaddingSize | 612 << " esp alignment padding = " << EspAdjustmentPaddingSize |
| 601 << " bytes\n"; | 613 << " bytes\n"; |
| 602 | 614 |
| 603 Str << "Stack details:\n" | 615 Str << "Stack details:\n" |
| 604 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" | 616 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| 605 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" | 617 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| 618 << " outgoing args size = " << maxOutArgsSizeBytes() << " bytes\n" |
| 606 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes | 619 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| 607 << " bytes\n" | 620 << " bytes\n" |
| 608 << " is ebp based = " << IsEbpBasedFrame << "\n"; | 621 << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| 609 } | 622 } |
| 610 } | 623 } |
| 611 | 624 |
| 612 void TargetX8664::addEpilog(CfgNode *Node) { | 625 void TargetX8664::addEpilog(CfgNode *Node) { |
| 613 InstList &Insts = Node->getInsts(); | 626 InstList &Insts = Node->getInsts(); |
| 614 InstList::reverse_iterator RI, E; | 627 InstList::reverse_iterator RI, E; |
| 615 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 628 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| (...skipping 350 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 966 // case the high-level table has extra entries. | 979 // case the high-level table has extra entries. |
| 967 #define X(tag, sizeLog2, align, elts, elty, str) \ | 980 #define X(tag, sizeLog2, align, elts, elty, str) \ |
| 968 static_assert(_table1_##tag == _table2_##tag, \ | 981 static_assert(_table1_##tag == _table2_##tag, \ |
| 969 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); | 982 "Inconsistency between ICETYPEX8664_TABLE and ICETYPE_TABLE"); |
| 970 ICETYPE_TABLE | 983 ICETYPE_TABLE |
| 971 #undef X | 984 #undef X |
| 972 } // end of namespace dummy3 | 985 } // end of namespace dummy3 |
| 973 } // end of anonymous namespace | 986 } // end of anonymous namespace |
| 974 | 987 |
| 975 } // end of namespace Ice | 988 } // end of namespace Ice |
| OLD | NEW |