Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2705)

Side by Side Diff: src/IceTargetLoweringMIPS32.cpp

Issue 2051713002: [Subzero][MIPS32] Adds prolog instructions for MIPS32 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // 1 //
2 // The Subzero Code Generator 2 // The Subzero Code Generator
3 // 3 //
4 // This file is distributed under the University of Illinois Open Source 4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details. 5 // License. See LICENSE.TXT for details.
6 // 6 //
7 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
8 /// 8 ///
9 /// \file 9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost 10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
122 TypeToRegisterSet[IceType_v4f32] = VectorRegisters; 122 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
123 123
124 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i) 124 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
125 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i]; 125 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
126 126
127 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet, 127 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
128 llvm::array_lengthof(TypeToRegisterSet), 128 llvm::array_lengthof(TypeToRegisterSet),
129 RegMIPS32::getRegName, getRegClassName); 129 RegMIPS32::getRegName, getRegClassName);
130 } 130 }
131 131
132 void TargetMIPS32::findMaxStackOutArgsSize() {
133 // MinNeededOutArgsBytes should be updated if the Target ever creates a
134 // high-level InstCall that requires more stack bytes.
135 constexpr size_t MinNeededOutArgsBytes = 16;
136 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
137 for (CfgNode *Node : Func->getNodes()) {
138 Context.init(Node);
139 while (!Context.atEnd()) {
140 PostIncrLoweringContext PostIncrement(Context);
141 Inst *CurInstr = iteratorToInst(Context.getCur());
142 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
143 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
144 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
145 }
146 }
147 }
148 }
149
132 void TargetMIPS32::translateO2() { 150 void TargetMIPS32::translateO2() {
133 TimerMarker T(TimerStack::TT_O2, Func); 151 TimerMarker T(TimerStack::TT_O2, Func);
134 152
135 // TODO(stichnot): share passes with X86? 153 // TODO(stichnot): share passes with X86?
136 // https://code.google.com/p/nativeclient/issues/detail?id=4094 154 // https://code.google.com/p/nativeclient/issues/detail?id=4094
137 genTargetHelperCalls(); 155 genTargetHelperCalls();
138 156
157 findMaxStackOutArgsSize();
158
139 // Merge Alloca instructions, and lay out the stack. 159 // Merge Alloca instructions, and lay out the stack.
140 static constexpr bool SortAndCombineAllocas = false; 160 static constexpr bool SortAndCombineAllocas = false;
141 Func->processAllocas(SortAndCombineAllocas); 161 Func->processAllocas(SortAndCombineAllocas);
142 Func->dump("After Alloca processing"); 162 Func->dump("After Alloca processing");
143 163
144 if (!getFlags().getEnablePhiEdgeSplit()) { 164 if (!getFlags().getEnablePhiEdgeSplit()) {
145 // Lower Phi instructions. 165 // Lower Phi instructions.
146 Func->placePhiLoads(); 166 Func->placePhiLoads();
147 if (Func->hasError()) 167 if (Func->hasError())
148 return; 168 return;
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
230 Func->doNopInsertion(); 250 Func->doNopInsertion();
231 } 251 }
232 } 252 }
233 253
234 void TargetMIPS32::translateOm1() { 254 void TargetMIPS32::translateOm1() {
235 TimerMarker T(TimerStack::TT_Om1, Func); 255 TimerMarker T(TimerStack::TT_Om1, Func);
236 256
237 // TODO: share passes with X86? 257 // TODO: share passes with X86?
238 genTargetHelperCalls(); 258 genTargetHelperCalls();
239 259
260 findMaxStackOutArgsSize();
261
240 // Do not merge Alloca instructions, and lay out the stack. 262 // Do not merge Alloca instructions, and lay out the stack.
241 static constexpr bool SortAndCombineAllocas = false; 263 static constexpr bool SortAndCombineAllocas = false;
242 Func->processAllocas(SortAndCombineAllocas); 264 Func->processAllocas(SortAndCombineAllocas);
243 Func->dump("After Alloca processing"); 265 Func->dump("After Alloca processing");
244 266
245 Func->placePhiLoads(); 267 Func->placePhiLoads();
246 if (Func->hasError()) 268 if (Func->hasError())
247 return; 269 return;
248 Func->placePhiStores(); 270 Func->placePhiStores();
249 if (Func->hasError()) 271 if (Func->hasError())
(...skipping 224 matching lines...) Expand 10 before | Expand all | Expand 10 after
474 RegisterArg->setIsArg(); 496 RegisterArg->setIsArg();
475 Arg->setIsArg(false); 497 Arg->setIsArg(false);
476 Args[I] = RegisterArg; 498 Args[I] = RegisterArg;
477 Context.insert<InstAssign>(Arg, RegisterArg); 499 Context.insert<InstAssign>(Arg, RegisterArg);
478 } 500 }
479 } 501 }
480 } 502 }
481 503
482 Type TargetMIPS32::stackSlotType() { return IceType_i32; } 504 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
483 505
506 // Helper function for addProlog().
507 //
508 // This assumes Arg is an argument passed on the stack. This sets the frame
509 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
510 // I64 arg that has been split into Lo and Hi components, it calls itself
511 // recursively on the components, taking care to handle Lo first because of the
512 // little-endian architecture. Lastly, this function generates an instruction
513 // to copy Arg into its assigned register if applicable.
514 void TargetMIPS32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
515 size_t BasicFrameOffset,
516 size_t *InArgsSizeBytes) {
517 const Type Ty = Arg->getType();
518 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
519
520 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
521 Variable *const Lo = Arg64On32->getLo();
522 Variable *const Hi = Arg64On32->getHi();
523 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
524 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
525 return;
526 }
527 assert(Ty != IceType_i64);
528
529 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
530 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
531
532 if (!Arg->hasReg()) {
533 Arg->setStackOffset(ArgStackOffset);
534 return;
535 }
536
537 // If the argument variable has been assigned a register, we need to copy the
538 // value from the stack slot.
539 Variable *Parameter = Func->makeVariable(Ty);
540 Parameter->setMustNotHaveReg();
541 Parameter->setStackOffset(ArgStackOffset);
542 _mov(Arg, Parameter);
543 }
544
484 void TargetMIPS32::addProlog(CfgNode *Node) { 545 void TargetMIPS32::addProlog(CfgNode *Node) {
485 (void)Node; 546 // Stack frame layout:
547 //
548 // +------------------------+
549 // | 1. preserved registers |
550 // +------------------------+
551 // | 2. padding |
552 // +------------------------+
553 // | 3. global spill area |
554 // +------------------------+
555 // | 4. padding |
556 // +------------------------+
557 // | 5. local spill area |
558 // +------------------------+
559 // | 6. padding |
560 // +------------------------+
561 // | 7. allocas |
562 // +------------------------+
563 // | 8. padding |
564 // +------------------------+
565 // | 9. out args |
566 // +------------------------+ <--- StackPointer
567 //
568 // The following variables record the size in bytes of the given areas:
569 // * PreservedRegsSizeBytes: area 1
570 // * SpillAreaPaddingBytes: area 2
571 // * GlobalsSize: area 3
572 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
573 // * LocalsSpillAreaSize: area 5
574 // * SpillAreaSizeBytes: areas 2 - 9
575 // * maxOutArgsSizeBytes(): area 9
576
577 Context.init(Node);
578 Context.setInsertPoint(Context.getCur());
579
580 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
581 RegsUsed = SmallBitVector(CalleeSaves.size());
582
583 VarList SortedSpilledVariables;
584
585 size_t GlobalsSize = 0;
586 // If there is a separate locals area, this represents that area. Otherwise
587 // it counts any variable not counted by GlobalsSize.
588 SpillAreaSizeBytes = 0;
589 // If there is a separate locals area, this specifies the alignment for it.
590 uint32_t LocalsSlotsAlignmentBytes = 0;
591 // The entire spill locations area gets aligned to largest natural alignment
592 // of the variables that have a spill slot.
593 uint32_t SpillAreaAlignmentBytes = 0;
594 // For now, we don't have target-specific variables that need special
595 // treatment (no stack-slot-linked SpillVariable type).
596 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
597 static constexpr bool AssignStackSlot = false;
598 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
599 if (llvm::isa<Variable64On32>(Var)) {
600 return DontAssignStackSlot;
601 }
602 return AssignStackSlot;
603 };
604
605 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
606 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
607 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
608 &LocalsSlotsAlignmentBytes, TargetVarHook);
609 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
610 SpillAreaSizeBytes += GlobalsSize;
611
612 PreservedGPRs.reserve(CalleeSaves.size());
613
614 // Consider FP and RA as callee-save / used as needed.
615 if (UsesFramePointer) {
616 if (RegsUsed[RegMIPS32::Reg_FP]) {
617 llvm::report_fatal_error("Frame pointer has been used.");
618 }
619 CalleeSaves[RegMIPS32::Reg_FP] = true;
620 RegsUsed[RegMIPS32::Reg_FP] = true;
621 }
622 if (!MaybeLeafFunc) {
623 CalleeSaves[RegMIPS32::Reg_RA] = true;
624 RegsUsed[RegMIPS32::Reg_RA] = true;
625 }
626
627 // Make two passes over the used registers. The first pass records all the
628 // used registers -- and their aliases. Then, we figure out which GPR
629 // registers should be saved.
630 SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
631 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
632 if (CalleeSaves[i] && RegsUsed[i]) {
633 ToPreserve |= RegisterAliases[i];
634 }
635 }
636
637 uint32_t NumCallee = 0;
638 size_t PreservedRegsSizeBytes = 0;
639
640 // RegClasses is a tuple of
641 //
642 // <First Register in Class, Last Register in Class, Vector of Save Registers>
643 //
644 // We use this tuple to figure out which register we should save/restore
645 // during
646 // prolog/epilog.
647 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
648 const RegClassType RegClass = RegClassType(
649 RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_GPR_Last, &PreservedGPRs);
650 const uint32_t FirstRegInClass = std::get<0>(RegClass);
651 const uint32_t LastRegInClass = std::get<1>(RegClass);
652 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
653 for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
654 if (!ToPreserve[Reg]) {
655 continue;
656 }
657 ++NumCallee;
658 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
659 PreservedRegsSizeBytes +=
660 typeWidthInBytesOnStack(PhysicalRegister->getType());
661 PreservedRegsInClass->push_back(PhysicalRegister);
662 }
663
664 Ctx->statsUpdateRegistersSaved(NumCallee);
665
666 // Align the variables area. SpillAreaPaddingBytes is the size of the region
667 // after the preserved registers and before the spill areas.
668 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
669 // locals area if they are separate.
670 assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
671 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
672 uint32_t SpillAreaPaddingBytes = 0;
673 uint32_t LocalsSlotsPaddingBytes = 0;
674 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
675 GlobalsSize, LocalsSlotsAlignmentBytes,
676 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
677 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
678 uint32_t GlobalsAndSubsequentPaddingSize =
679 GlobalsSize + LocalsSlotsPaddingBytes;
680
681 if (MaybeLeafFunc)
682 MaxOutArgsSizeBytes = 0;
683
684 // Adds the out args space to the stack, and align SP if necessary.
685 uint32_t TotalStackSizeBytes = PreservedRegsSizeBytes + SpillAreaSizeBytes;
686
687 // TODO(sagar.thakur): Combine fixed alloca and maximum out argument size with
688 // TotalStackSizeBytes once lowerAlloca is implemented and leaf function
689 // information is generated by lowerCall.
690
691 // Generate "addiu sp, sp, -TotalStackSizeBytes"
692 if (TotalStackSizeBytes) {
693 // Use the scratch register if needed to legalize the immediate.
694 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
695 _addiu(SP, SP, -(TotalStackSizeBytes));
696 }
697
698 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
699
700 if (!PreservedGPRs.empty()) {
701 uint32_t StackOffset = TotalStackSizeBytes;
702 for (Variable *Var : *PreservedRegsInClass) {
703 Variable *PhysicalRegister = getPhysicalRegister(Var->getRegNum());
704 StackOffset -= typeWidthInBytesOnStack(PhysicalRegister->getType());
705 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
706 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
707 Func, IceType_i32, SP,
708 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
709 _sw(PhysicalRegister, MemoryLocation);
710 }
711 }
712
713 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
714
715 // Generate "mov FP, SP" if needed.
716 if (UsesFramePointer) {
717 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
718 _mov(FP, SP);
719 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
720 Context.insert<InstFakeUse>(FP);
721 }
722
723 // Fill in stack offsets for stack args, and copy args into registers for
724 // those that were register-allocated. Args are pushed right to left, so
725 // Arg[0] is closest to the stack/frame pointer.
726 const VarList &Args = Func->getArgs();
727 size_t InArgsSizeBytes = 0;
728 TargetMIPS32::CallingConv CC;
729
730 for (Variable *Arg : Args) {
731 RegNumT DummyReg;
732 const Type Ty = Arg->getType();
733 // Skip arguments passed in registers.
734 if (CC.argInGPR(Ty, &DummyReg)) {
735 continue;
736 } else {
737 finishArgumentLowering(Arg, FP, TotalStackSizeBytes, &InArgsSizeBytes);
738 }
739 }
740
741 // Fill in stack offsets for locals.
742 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
743 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, UsesF ramePointer);
Jim Stichnoth 2016/06/09 21:19:47 80-col
sagar.thakur 2016/06/10 09:40:35 Done.
744 this->HasComputedFrame = true;
745
746 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
747 OstreamLocker _(Func->getContext());
748 Ostream &Str = Func->getContext()->getStrDump();
749
750 Str << "Stack layout:\n";
751 uint32_t SPAdjustmentPaddingSize =
752 SpillAreaSizeBytes - LocalsSpillAreaSize -
753 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
754 MaxOutArgsSizeBytes;
755 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
756 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
757 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
758 << " globals spill area = " << GlobalsSize << " bytes\n"
759 << " globals-locals spill areas intermediate padding = "
760 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
761 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
762 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
763
764 Str << "Stack details:\n"
765 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
766 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
767 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
768 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
769 << " bytes\n"
770 << " is FP based = " << 1 << "\n";
771 }
486 return; 772 return;
487 UnimplementedError(getFlags());
488 } 773 }
489 774
490 void TargetMIPS32::addEpilog(CfgNode *Node) { 775 void TargetMIPS32::addEpilog(CfgNode *Node) {
491 (void)Node; 776 (void)Node;
492 return; 777 return;
493 UnimplementedError(getFlags()); 778 UnimplementedError(getFlags());
494 } 779 }
495 780
496 Operand *TargetMIPS32::loOperand(Operand *Operand) { 781 Operand *TargetMIPS32::loOperand(Operand *Operand) {
497 assert(Operand->getType() == IceType_i64); 782 assert(Operand->getType() == IceType_i64);
(...skipping 1201 matching lines...) Expand 10 before | Expand all | Expand 10 after
1699 Str << "\t.set\t" 1984 Str << "\t.set\t"
1700 << "nomips16\n"; 1985 << "nomips16\n";
1701 } 1986 }
1702 1987
1703 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM]; 1988 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
1704 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM]; 1989 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
1705 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM]; 1990 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
1706 1991
1707 } // end of namespace MIPS32 1992 } // end of namespace MIPS32
1708 } // end of namespace Ice 1993 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698