Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1467473003: Subzero. ARM32. No more SP frobbing. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fixes the Offsetis typo. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 continue; 226 continue;
227 } 227 }
228 Lo->setRegNum(FirstReg); 228 Lo->setRegNum(FirstReg);
229 Lo->setMustHaveReg(); 229 Lo->setMustHaveReg();
230 Hi->setRegNum(FirstReg + 1); 230 Hi->setRegNum(FirstReg + 1);
231 Hi->setMustHaveReg(); 231 Hi->setMustHaveReg();
232 } 232 }
233 } 233 }
234 } // end of anonymous namespace 234 } // end of anonymous namespace
235 235
236 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
237 TargetARM32::CallingConv CC;
238 size_t OutArgsSizeBytes = 0;
239 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
240 Operand *Arg = legalizeUndef(Call->getArg(i));
241 Type Ty = Arg->getType();
242 if (Ty == IceType_i64) {
243 std::pair<int32_t, int32_t> Regs;
244 if (CC.I64InRegs(&Regs)) {
245 continue;
246 }
247 } else if (isVectorType(Ty) || isFloatingType(Ty)) {
248 int32_t Reg;
249 if (CC.FPInReg(Ty, &Reg)) {
250 continue;
251 }
252 } else {
253 assert(Ty == IceType_i32);
254 int32_t Reg;
255 if (CC.I32InReg(&Reg)) {
256 continue;
257 }
258 }
259
260 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
261 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
262 }
263
264 return applyStackAlignment(OutArgsSizeBytes);
265 }
266
267 void TargetARM32::findMaxStackOutArgsSize() {
268 // MinNeededOutArgsBytes should be updated if the Target ever creates an
Jim Stichnoth 2015/11/22 04:09:43 s/an/a/ at least for the way I pronounce "high"
John 2015/11/23 18:54:10 Done.
269 // high-level InstCall that requires more stack bytes.
270 constexpr size_t MinNeededOutArgsBytes = 0;
271 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
272 for (CfgNode *Node : Func->getNodes()) {
273 Context.init(Node);
274 while (!Context.atEnd()) {
275 PostIncrLoweringContext PostIncrement(Context);
276 Inst *CurInstr = Context.getCur();
277 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
278 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
279 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
280 }
281 }
282 }
283 }
284
236 void TargetARM32::translateO2() { 285 void TargetARM32::translateO2() {
237 TimerMarker T(TimerStack::TT_O2, Func); 286 TimerMarker T(TimerStack::TT_O2, Func);
238 287
239 // TODO(stichnot): share passes with X86? 288 // TODO(stichnot): share passes with X86?
240 // https://code.google.com/p/nativeclient/issues/detail?id=4094 289 // https://code.google.com/p/nativeclient/issues/detail?id=4094
241 genTargetHelperCalls(); 290 genTargetHelperCalls();
291 findMaxStackOutArgsSize();
242 292
243 // Do not merge Alloca instructions, and lay out the stack. 293 // Do not merge Alloca instructions, and lay out the stack.
244 static constexpr bool SortAndCombineAllocas = false; 294 static constexpr bool SortAndCombineAllocas = false;
245 Func->processAllocas(SortAndCombineAllocas); 295 Func->processAllocas(SortAndCombineAllocas);
246 Func->dump("After Alloca processing"); 296 Func->dump("After Alloca processing");
247 297
248 if (!Ctx->getFlags().getPhiEdgeSplit()) { 298 if (!Ctx->getFlags().getPhiEdgeSplit()) {
249 // Lower Phi instructions. 299 // Lower Phi instructions.
250 Func->placePhiLoads(); 300 Func->placePhiLoads();
251 if (Func->hasError()) 301 if (Func->hasError())
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
339 if (Ctx->getFlags().shouldDoNopInsertion()) { 389 if (Ctx->getFlags().shouldDoNopInsertion()) {
340 Func->doNopInsertion(); 390 Func->doNopInsertion();
341 } 391 }
342 } 392 }
343 393
344 void TargetARM32::translateOm1() { 394 void TargetARM32::translateOm1() {
345 TimerMarker T(TimerStack::TT_Om1, Func); 395 TimerMarker T(TimerStack::TT_Om1, Func);
346 396
347 // TODO: share passes with X86? 397 // TODO: share passes with X86?
348 genTargetHelperCalls(); 398 genTargetHelperCalls();
399 findMaxStackOutArgsSize();
349 400
350 // Do not merge Alloca instructions, and lay out the stack. 401 // Do not merge Alloca instructions, and lay out the stack.
351 static constexpr bool SortAndCombineAllocas = false; 402 static constexpr bool SortAndCombineAllocas = false;
352 Func->processAllocas(SortAndCombineAllocas); 403 Func->processAllocas(SortAndCombineAllocas);
353 Func->dump("After Alloca processing"); 404 Func->dump("After Alloca processing");
354 405
355 Func->placePhiLoads(); 406 Func->placePhiLoads();
356 if (Func->hasError()) 407 if (Func->hasError())
357 return; 408 return;
358 Func->placePhiStores(); 409 Func->placePhiStores();
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
466 return; 517 return;
467 } 518 }
468 if (Var->mustHaveReg()) { 519 if (Var->mustHaveReg()) {
469 llvm::report_fatal_error( 520 llvm::report_fatal_error(
470 "Infinite-weight Variable has no register assigned"); 521 "Infinite-weight Variable has no register assigned");
471 } 522 }
472 int32_t Offset = Var->getStackOffset(); 523 int32_t Offset = Var->getStackOffset();
473 int32_t BaseRegNum = Var->getBaseRegNum(); 524 int32_t BaseRegNum = Var->getBaseRegNum();
474 if (BaseRegNum == Variable::NoRegister) { 525 if (BaseRegNum == Variable::NoRegister) {
475 BaseRegNum = getFrameOrStackReg(); 526 BaseRegNum = getFrameOrStackReg();
476 if (!hasFramePointer())
477 Offset += getStackAdjustment();
478 } 527 }
479 const Type VarTy = Var->getType(); 528 const Type VarTy = Var->getType();
480 Str << "[" << getRegName(BaseRegNum, VarTy); 529 Str << "[" << getRegName(BaseRegNum, VarTy);
481 if (Offset != 0) { 530 if (Offset != 0) {
482 Str << ", " << getConstantPrefix() << Offset; 531 Str << ", " << getConstantPrefix() << Offset;
483 } 532 }
484 Str << "]"; 533 Str << "]";
485 } 534 }
486 535
487 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { 536 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
663 // | 2. padding | 712 // | 2. padding |
664 // +------------------------+ <--- FramePointer (if used) 713 // +------------------------+ <--- FramePointer (if used)
665 // | 3. global spill area | 714 // | 3. global spill area |
666 // +------------------------+ 715 // +------------------------+
667 // | 4. padding | 716 // | 4. padding |
668 // +------------------------+ 717 // +------------------------+
669 // | 5. local spill area | 718 // | 5. local spill area |
670 // +------------------------+ 719 // +------------------------+
671 // | 6. padding | 720 // | 6. padding |
672 // +------------------------+ 721 // +------------------------+
673 // | 7. allocas | 722 // | 7. allocas (variable) |
723 // +------------------------+
724 // | 8. padding |
725 // +------------------------+
726 // | 9. out args |
674 // +------------------------+ <--- StackPointer 727 // +------------------------+ <--- StackPointer
675 // 728 //
676 // The following variables record the size in bytes of the given areas: 729 // The following variables record the size in bytes of the given areas:
677 // * PreservedRegsSizeBytes: area 1 730 // * PreservedRegsSizeBytes: area 1
678 // * SpillAreaPaddingBytes: area 2 731 // * SpillAreaPaddingBytes: area 2
679 // * GlobalsSize: area 3 732 // * GlobalsSize: area 3
680 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 733 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
681 // * LocalsSpillAreaSize: area 5 734 // * LocalsSpillAreaSize: area 5
682 // * SpillAreaSizeBytes: areas 2 - 6 735 // * SpillAreaSizeBytes: areas 2 - 6, and 9
736 // * MaxOutArgsSizeBytes: area 9
737 //
683 // Determine stack frame offsets for each Variable without a register 738 // Determine stack frame offsets for each Variable without a register
684 // assignment. This can be done as one variable per stack slot. Or, do 739 // assignment. This can be done as one variable per stack slot. Or, do
685 // coalescing by running the register allocator again with an infinite set of 740 // coalescing by running the register allocator again with an infinite set of
686 // registers (as a side effect, this gives variables a second chance at 741 // registers (as a side effect, this gives variables a second chance at
687 // physical register assignment). 742 // physical register assignment).
688 // 743 //
689 // A middle ground approach is to leverage sparsity and allocate one block of 744 // A middle ground approach is to leverage sparsity and allocate one block of
690 // space on the frame for globals (variables with multi-block lifetime), and 745 // space on the frame for globals (variables with multi-block lifetime), and
691 // one block to share for locals (single-block lifetime). 746 // one block to share for locals (single-block lifetime).
692 747
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
778 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 833 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
779 uint32_t SpillAreaPaddingBytes = 0; 834 uint32_t SpillAreaPaddingBytes = 0;
780 uint32_t LocalsSlotsPaddingBytes = 0; 835 uint32_t LocalsSlotsPaddingBytes = 0;
781 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 836 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
782 GlobalsSize, LocalsSlotsAlignmentBytes, 837 GlobalsSize, LocalsSlotsAlignmentBytes,
783 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 838 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
784 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 839 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
785 uint32_t GlobalsAndSubsequentPaddingSize = 840 uint32_t GlobalsAndSubsequentPaddingSize =
786 GlobalsSize + LocalsSlotsPaddingBytes; 841 GlobalsSize + LocalsSlotsPaddingBytes;
787 842
788 // Align SP if necessary. 843 // Adds the out args space to the stack, and align SP if necessary.
789 if (NeedsStackAlignment) { 844 if (!NeedsStackAlignment) {
845 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
846 } else {
790 uint32_t StackOffset = PreservedRegsSizeBytes; 847 uint32_t StackOffset = PreservedRegsSizeBytes;
791 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 848 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
849 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
792 SpillAreaSizeBytes = StackSize - StackOffset; 850 SpillAreaSizeBytes = StackSize - StackOffset;
793 } 851 }
794 852
795 // Generate "sub sp, SpillAreaSizeBytes" 853 // Generate "sub sp, SpillAreaSizeBytes"
796 if (SpillAreaSizeBytes) { 854 if (SpillAreaSizeBytes) {
797 // Use the scratch register if needed to legalize the immediate. 855 // Use the scratch register if needed to legalize the immediate.
798 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 856 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
799 Legal_Reg | Legal_Flex, getReservedTmpReg()); 857 Legal_Reg | Legal_Flex, getReservedTmpReg());
800 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 858 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
801 _sub(SP, SP, SubAmount); 859 _sub(SP, SP, SubAmount);
802 } 860 }
803 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 861 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
804 862
805 resetStackAdjustment();
806
807 // Fill in stack offsets for stack args, and copy args into registers for 863 // Fill in stack offsets for stack args, and copy args into registers for
808 // those that were register-allocated. Args are pushed right to left, so 864 // those that were register-allocated. Args are pushed right to left, so
809 // Arg[0] is closest to the stack/frame pointer. 865 // Arg[0] is closest to the stack/frame pointer.
810 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 866 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
811 size_t BasicFrameOffset = PreservedRegsSizeBytes; 867 size_t BasicFrameOffset = PreservedRegsSizeBytes;
812 if (!UsesFramePointer) 868 if (!UsesFramePointer)
813 BasicFrameOffset += SpillAreaSizeBytes; 869 BasicFrameOffset += SpillAreaSizeBytes;
814 870
815 const VarList &Args = Func->getArgs(); 871 const VarList &Args = Func->getArgs();
816 size_t InArgsSizeBytes = 0; 872 size_t InArgsSizeBytes = 0;
(...skipping 23 matching lines...) Expand all
840 UsesFramePointer); 896 UsesFramePointer);
841 this->HasComputedFrame = true; 897 this->HasComputedFrame = true;
842 898
843 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 899 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
844 OstreamLocker _(Func->getContext()); 900 OstreamLocker _(Func->getContext());
845 Ostream &Str = Func->getContext()->getStrDump(); 901 Ostream &Str = Func->getContext()->getStrDump();
846 902
847 Str << "Stack layout:\n"; 903 Str << "Stack layout:\n";
848 uint32_t SPAdjustmentPaddingSize = 904 uint32_t SPAdjustmentPaddingSize =
849 SpillAreaSizeBytes - LocalsSpillAreaSize - 905 SpillAreaSizeBytes - LocalsSpillAreaSize -
850 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 906 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
907 MaxOutArgsSizeBytes;
851 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 908 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
852 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 909 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
853 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 910 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
854 << " globals spill area = " << GlobalsSize << " bytes\n" 911 << " globals spill area = " << GlobalsSize << " bytes\n"
855 << " globals-locals spill areas intermediate padding = " 912 << " globals-locals spill areas intermediate padding = "
856 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 913 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
857 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 914 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
858 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; 915 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
859 916
860 Str << "Stack details:\n" 917 Str << "Stack details:\n"
861 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" 918 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
862 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 919 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
920 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
863 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 921 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
864 << " bytes\n" 922 << " bytes\n"
865 << " is FP based = " << UsesFramePointer << "\n"; 923 << " is FP based = " << UsesFramePointer << "\n";
866 } 924 }
867 } 925 }
868 926
869 void TargetARM32::addEpilog(CfgNode *Node) { 927 void TargetARM32::addEpilog(CfgNode *Node) {
870 InstList &Insts = Node->getInsts(); 928 InstList &Insts = Node->getInsts();
871 InstList::reverse_iterator RI, E; 929 InstList::reverse_iterator RI, E;
872 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 930 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
949 _ret(LR, RetValue); 1007 _ret(LR, RetValue);
950 _bundle_unlock(); 1008 _bundle_unlock();
951 RI->setDeleted(); 1009 RI->setDeleted();
952 } 1010 }
953 1011
954 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { 1012 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
955 constexpr bool ZeroExt = false; 1013 constexpr bool ZeroExt = false;
956 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); 1014 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
957 } 1015 }
958 1016
959 Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, 1017 Variable *TargetARM32::newBaseRegister(int32_t Offset, Variable *OrigBaseReg) {
960 int32_t StackAdjust,
961 Variable *OrigBaseReg) {
962 int32_t Offset = OriginalOffset + StackAdjust;
963 // Legalize will likely need a movw/movt combination, but if the top bits are 1018 // Legalize will likely need a movw/movt combination, but if the top bits are
964 // all 0 from negating the offset and subtracting, we could use that instead. 1019 // all 0 from negating the offset and subtracting, we could use that instead.
965 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; 1020 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
966 if (ShouldSub) 1021 if (ShouldSub)
967 Offset = -Offset; 1022 Offset = -Offset;
968 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), 1023 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
969 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1024 Legal_Reg | Legal_Flex, getReservedTmpReg());
970 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); 1025 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
971 if (ShouldSub) 1026 if (ShouldSub)
972 _sub(ScratchReg, OrigBaseReg, OffsetVal); 1027 _sub(ScratchReg, OrigBaseReg, OffsetVal);
973 else 1028 else
974 _add(ScratchReg, OrigBaseReg, OffsetVal); 1029 _add(ScratchReg, OrigBaseReg, OffsetVal);
975 return ScratchReg; 1030 return ScratchReg;
976 } 1031 }
977 1032
978 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, 1033 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
979 int32_t StackAdjust,
980 Variable *OrigBaseReg, 1034 Variable *OrigBaseReg,
981 Variable **NewBaseReg, 1035 Variable **NewBaseReg,
982 int32_t *NewBaseOffset) { 1036 int32_t *NewBaseOffset) {
983 if (isLegalMemOffset(Ty, Offset + StackAdjust)) { 1037 if (isLegalMemOffset(Ty, Offset)) {
984 return OperandARM32Mem::create( 1038 return OperandARM32Mem::create(
985 Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>( 1039 Func, Ty, OrigBaseReg,
986 Ctx->getConstantInt32(Offset + StackAdjust)), 1040 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)),
987 OperandARM32Mem::Offset); 1041 OperandARM32Mem::Offset);
988 } 1042 }
989 1043
990 if (*NewBaseReg == nullptr) { 1044 if (*NewBaseReg == nullptr) {
991 *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); 1045 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
992 *NewBaseOffset = Offset + StackAdjust; 1046 *NewBaseOffset = Offset;
993 } 1047 }
994 1048
995 int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset; 1049 int32_t OffsetDiff = Offset - *NewBaseOffset;
996 if (!isLegalMemOffset(Ty, OffsetDiff)) { 1050 if (!isLegalMemOffset(Ty, OffsetDiff)) {
997 *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); 1051 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
998 *NewBaseOffset = Offset + StackAdjust; 1052 *NewBaseOffset = Offset;
999 OffsetDiff = 0; 1053 OffsetDiff = 0;
1000 } 1054 }
1001 1055
1002 return OperandARM32Mem::create( 1056 return OperandARM32Mem::create(
1003 Func, Ty, *NewBaseReg, 1057 Func, Ty, *NewBaseReg,
1004 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)), 1058 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
1005 OperandARM32Mem::Offset); 1059 OperandARM32Mem::Offset);
1006 } 1060 }
1007 1061
1008 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, 1062 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
1009 Variable *OrigBaseReg, Variable **NewBaseReg, 1063 Variable **NewBaseReg, int32_t *NewBaseOffset) {
1010 int32_t *NewBaseOffset) {
1011 Variable *Dest = MovInstr->getDest(); 1064 Variable *Dest = MovInstr->getDest();
1012 assert(Dest != nullptr); 1065 assert(Dest != nullptr);
1013 Type DestTy = Dest->getType(); 1066 Type DestTy = Dest->getType();
1014 assert(DestTy != IceType_i64); 1067 assert(DestTy != IceType_i64);
1015 1068
1016 Operand *Src = MovInstr->getSrc(0); 1069 Operand *Src = MovInstr->getSrc(0);
1017 Type SrcTy = Src->getType(); 1070 Type SrcTy = Src->getType();
1018 (void)SrcTy; 1071 (void)SrcTy;
1019 assert(SrcTy != IceType_i64); 1072 assert(SrcTy != IceType_i64);
1020 1073
1021 if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) 1074 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1022 return; 1075 return;
1023 1076
1024 bool Legalized = false; 1077 bool Legalized = false;
1025 if (!Dest->hasReg()) { 1078 if (!Dest->hasReg()) {
1026 auto *const SrcR = llvm::cast<Variable>(Src); 1079 auto *const SrcR = llvm::cast<Variable>(Src);
1027 assert(SrcR->hasReg()); 1080 assert(SrcR->hasReg());
1028 const int32_t Offset = Dest->getStackOffset(); 1081 const int32_t Offset = Dest->getStackOffset();
1029 // This is a _mov(Mem(), Variable), i.e., a store. 1082 // This is a _mov(Mem(), Variable), i.e., a store.
1030 _str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, 1083 _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
1031 NewBaseReg, NewBaseOffset), 1084 NewBaseOffset),
1032 MovInstr->getPredicate()); 1085 MovInstr->getPredicate());
1033 // _str() does not have a Dest, so we add a fake-def(Dest). 1086 // _str() does not have a Dest, so we add a fake-def(Dest).
1034 Context.insert(InstFakeDef::create(Func, Dest)); 1087 Context.insert(InstFakeDef::create(Func, Dest));
1035 Legalized = true; 1088 Legalized = true;
1036 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { 1089 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1037 if (!Var->hasReg()) { 1090 if (!Var->hasReg()) {
1038 const int32_t Offset = Var->getStackOffset(); 1091 const int32_t Offset = Var->getStackOffset();
1039 _ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, 1092 _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
1040 NewBaseReg, NewBaseOffset), 1093 NewBaseOffset),
1041 MovInstr->getPredicate()); 1094 MovInstr->getPredicate());
1042 Legalized = true; 1095 Legalized = true;
1043 } 1096 }
1044 } 1097 }
1045 1098
1046 if (Legalized) { 1099 if (Legalized) {
1047 if (MovInstr->isDestRedefined()) { 1100 if (MovInstr->isDestRedefined()) {
1048 _set_dest_redefined(); 1101 _set_dest_redefined();
1049 } 1102 }
1050 MovInstr->setDeleted(); 1103 MovInstr->setDeleted();
1051 } 1104 }
1052 } 1105 }
1053 1106
1054 void TargetARM32::legalizeStackSlots() { 1107 void TargetARM32::legalizeStackSlots() {
1055 // If a stack variable's frame offset doesn't fit, convert from: 1108 // If a stack variable's frame offset doesn't fit, convert from:
1056 // ldr X, OFF[SP] 1109 // ldr X, OFF[SP]
1057 // to: 1110 // to:
1058 // movw/movt TMP, OFF_PART 1111 // movw/movt TMP, OFF_PART
1059 // add TMP, TMP, SP 1112 // add TMP, TMP, SP
1060 // ldr X, OFF_MORE[TMP] 1113 // ldr X, OFF_MORE[TMP]
1061 // 1114 //
1062 // This is safe because we have reserved TMP, and add for ARM does not 1115 // This is safe because we have reserved TMP, and add for ARM does not
1063 // clobber the flags register. 1116 // clobber the flags register.
1064 Func->dump("Before legalizeStackSlots"); 1117 Func->dump("Before legalizeStackSlots");
1065 assert(hasComputedFrame()); 1118 assert(hasComputedFrame());
1066 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); 1119 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
1067 int32_t StackAdjust = 0;
1068 // Do a fairly naive greedy clustering for now. Pick the first stack slot 1120 // Do a fairly naive greedy clustering for now. Pick the first stack slot
1069 // that's out of bounds and make a new base reg using the architecture's temp 1121 // that's out of bounds and make a new base reg using the architecture's temp
1070 // register. If that works for the next slot, then great. Otherwise, create a 1122 // register. If that works for the next slot, then great. Otherwise, create a
1071 // new base register, clobbering the previous base register. Never share a 1123 // new base register, clobbering the previous base register. Never share a
1072 // base reg across different basic blocks. This isn't ideal if local and 1124 // base reg across different basic blocks. This isn't ideal if local and
1073 // multi-block variables are far apart and their references are interspersed. 1125 // multi-block variables are far apart and their references are interspersed.
1074 // It may help to be more coordinated about assign stack slot numbers and may 1126 // It may help to be more coordinated about assign stack slot numbers and may
1075 // help to assign smaller offsets to higher-weight variables so that they 1127 // help to assign smaller offsets to higher-weight variables so that they
1076 // don't depend on this legalization. 1128 // don't depend on this legalization.
1077 for (CfgNode *Node : Func->getNodes()) { 1129 for (CfgNode *Node : Func->getNodes()) {
1078 Context.init(Node); 1130 Context.init(Node);
1079 Variable *NewBaseReg = nullptr; 1131 Variable *NewBaseReg = nullptr;
1080 int32_t NewBaseOffset = 0; 1132 int32_t NewBaseOffset = 0;
1081 while (!Context.atEnd()) { 1133 while (!Context.atEnd()) {
1082 PostIncrLoweringContext PostIncrement(Context); 1134 PostIncrLoweringContext PostIncrement(Context);
1083 Inst *CurInstr = Context.getCur(); 1135 Inst *CurInstr = Context.getCur();
1084 Variable *Dest = CurInstr->getDest(); 1136 Variable *Dest = CurInstr->getDest();
1085 1137
1086 // Check if the previous NewBaseReg is clobbered, and reset if needed. 1138 // Check if the previous NewBaseReg is clobbered, and reset if needed.
1087 if ((Dest && NewBaseReg && Dest->hasReg() && 1139 if ((Dest && NewBaseReg && Dest->hasReg() &&
1088 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || 1140 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
1089 llvm::isa<InstFakeKill>(CurInstr)) { 1141 llvm::isa<InstFakeKill>(CurInstr)) {
1090 NewBaseReg = nullptr; 1142 NewBaseReg = nullptr;
1091 NewBaseOffset = 0; 1143 NewBaseOffset = 0;
1092 } 1144 }
1093 1145
1094 // The stack adjustment only matters if we are using SP instead of FP.
1095 if (!hasFramePointer()) {
1096 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
1097 StackAdjust += AdjInst->getAmount();
1098 NewBaseOffset += AdjInst->getAmount();
1099 continue;
1100 }
1101 if (llvm::isa<InstARM32Call>(CurInstr)) {
1102 NewBaseOffset -= StackAdjust;
1103 StackAdjust = 0;
1104 continue;
1105 }
1106 }
1107
1108 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { 1146 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
1109 legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, 1147 legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset);
1110 &NewBaseOffset);
1111 } 1148 }
1112 } 1149 }
1113 } 1150 }
1114 } 1151 }
1115 1152
1116 Operand *TargetARM32::loOperand(Operand *Operand) { 1153 Operand *TargetARM32::loOperand(Operand *Operand) {
1117 assert(Operand->getType() == IceType_i64); 1154 assert(Operand->getType() == IceType_i64);
1118 if (Operand->getType() != IceType_i64) 1155 if (Operand->getType() != IceType_i64)
1119 return Operand; 1156 return Operand;
1120 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 1157 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
1262 // Non-constant sizes need to be adjusted to the next highest multiple of 1299 // Non-constant sizes need to be adjusted to the next highest multiple of
1263 // the required alignment at runtime. 1300 // the required alignment at runtime.
1264 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); 1301 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
1265 Variable *T = makeReg(IceType_i32); 1302 Variable *T = makeReg(IceType_i32);
1266 _mov(T, TotalSize); 1303 _mov(T, TotalSize);
1267 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); 1304 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1268 _add(T, T, AddAmount); 1305 _add(T, T, AddAmount);
1269 alignRegisterPow2(T, Alignment); 1306 alignRegisterPow2(T, Alignment);
1270 _sub(SP, SP, T); 1307 _sub(SP, SP, T);
1271 } 1308 }
1272 _mov(Dest, SP); 1309 Variable *T = SP;
1310 if (MaxOutArgsSizeBytes != 0) {
1311 T = makeReg(getPointerType());
1312 Operand *OutArgsSizeRF = legalize(
1313 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
1314 _add(T, SP, OutArgsSizeRF);
1315 }
1316 _mov(Dest, T);
1273 } 1317 }
1274 1318
1275 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 1319 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1276 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 1320 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1277 return; 1321 return;
1278 Variable *SrcLoReg = legalizeToReg(SrcLo); 1322 Variable *SrcLoReg = legalizeToReg(SrcLo);
1279 switch (Ty) { 1323 switch (Ty) {
1280 default: 1324 default:
1281 llvm::report_fatal_error("Unexpected type"); 1325 llvm::report_fatal_error("Unexpected type");
1282 case IceType_i8: 1326 case IceType_i8:
(...skipping 803 matching lines...) Expand 10 before | Expand all | Expand 10 after
2086 } 2130 }
2087 case InstArithmetic::Xor: { 2131 case InstArithmetic::Xor: {
2088 Variable *Src0R = Srcs.src0R(this); 2132 Variable *Src0R = Srcs.src0R(this);
2089 Operand *Src1RF = Srcs.src1RF(this); 2133 Operand *Src1RF = Srcs.src1RF(this);
2090 _eor(T, Src0R, Src1RF); 2134 _eor(T, Src0R, Src1RF);
2091 _mov(Dest, T); 2135 _mov(Dest, T);
2092 return; 2136 return;
2093 } 2137 }
2094 case InstArithmetic::Sub: { 2138 case InstArithmetic::Sub: {
2095 if (Srcs.hasConstOperand()) { 2139 if (Srcs.hasConstOperand()) {
2140 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
2141 // to be used.
2096 Variable *Src0R = Srcs.src0R(this); 2142 Variable *Src0R = Srcs.src0R(this);
2097 if (Srcs.immediateIsFlexEncodable()) { 2143 if (Srcs.immediateIsFlexEncodable()) {
2098 Operand *Src1RF = Srcs.src1RF(this); 2144 Operand *Src1RF = Srcs.src1RF(this);
2099 if (Srcs.swappedOperands()) { 2145 if (Srcs.swappedOperands()) {
2100 _rsb(T, Src0R, Src1RF); 2146 _rsb(T, Src0R, Src1RF);
2101 } else { 2147 } else {
2102 _sub(T, Src0R, Src1RF); 2148 _sub(T, Src0R, Src1RF);
2103 } 2149 }
2104 _mov(Dest, T); 2150 _mov(Dest, T);
2105 return; 2151 return;
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
2339 2385
2340 // Assign arguments to registers and stack. Also reserve stack. 2386 // Assign arguments to registers and stack. Also reserve stack.
2341 TargetARM32::CallingConv CC; 2387 TargetARM32::CallingConv CC;
2342 // Pair of Arg Operand -> GPR number assignments. 2388 // Pair of Arg Operand -> GPR number assignments.
2343 llvm::SmallVector<std::pair<Operand *, int32_t>, 2389 llvm::SmallVector<std::pair<Operand *, int32_t>,
2344 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs; 2390 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
2345 llvm::SmallVector<std::pair<Operand *, int32_t>, 2391 llvm::SmallVector<std::pair<Operand *, int32_t>,
2346 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs; 2392 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
2347 // Pair of Arg Operand -> stack offset. 2393 // Pair of Arg Operand -> stack offset.
2348 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; 2394 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
2349 int32_t ParameterAreaSizeBytes = 0; 2395 size_t ParameterAreaSizeBytes = 0;
2350 2396
2351 // Classify each argument operand according to the location where the 2397 // Classify each argument operand according to the location where the
2352 // argument is passed. 2398 // argument is passed.
2353 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 2399 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2354 Operand *Arg = legalizeUndef(Instr->getArg(i)); 2400 Operand *Arg = legalizeUndef(Instr->getArg(i));
2355 Type Ty = Arg->getType(); 2401 Type Ty = Arg->getType();
2356 bool InRegs = false; 2402 bool InRegs = false;
2357 if (Ty == IceType_i64) { 2403 if (Ty == IceType_i64) {
2358 std::pair<int32_t, int32_t> Regs; 2404 std::pair<int32_t, int32_t> Regs;
2359 if (CC.I64InRegs(&Regs)) { 2405 if (CC.I64InRegs(&Regs)) {
(...skipping 23 matching lines...) Expand all
2383 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); 2429 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
2384 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); 2430 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
2385 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); 2431 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
2386 } 2432 }
2387 } 2433 }
2388 2434
2389 // Adjust the parameter area so that the stack is aligned. It is assumed that 2435 // Adjust the parameter area so that the stack is aligned. It is assumed that
2390 // the stack is already aligned at the start of the calling sequence. 2436 // the stack is already aligned at the start of the calling sequence.
2391 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 2437 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
2392 2438
2393 // Subtract the appropriate amount for the argument area. This also takes 2439 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
2394 // care of setting the stack adjustment during emission. 2440 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
2395 //
2396 // TODO: If for some reason the call instruction gets dead-code eliminated
2397 // after lowering, we would need to ensure that the pre-call and the
2398 // post-call esp adjustment get eliminated as well.
2399 if (ParameterAreaSizeBytes) {
2400 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
2401 Legal_Reg | Legal_Flex);
2402 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
2403 } 2441 }
2404 2442
2405 // Copy arguments that are passed on the stack to the appropriate stack 2443 // Copy arguments that are passed on the stack to the appropriate stack
2406 // locations. 2444 // locations.
2407 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 2445 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2408 for (auto &StackArg : StackArgs) { 2446 for (auto &StackArg : StackArgs) {
2409 ConstantInteger32 *Loc = 2447 ConstantInteger32 *Loc =
2410 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 2448 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
2411 Type Ty = StackArg.first->getType(); 2449 Type Ty = StackArg.first->getType();
2412 OperandARM32Mem *Addr; 2450 OperandARM32Mem *Addr;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
2485 // Generate a FakeUse of register arguments so that they do not get dead 2523 // Generate a FakeUse of register arguments so that they do not get dead
2486 // code eliminated as a result of the FakeKill of scratch registers after 2524 // code eliminated as a result of the FakeKill of scratch registers after
2487 // the call. 2525 // the call.
2488 Context.insert(InstFakeUse::create(Func, Reg)); 2526 Context.insert(InstFakeUse::create(Func, Reg));
2489 } 2527 }
2490 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); 2528 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
2491 Context.insert(NewCall); 2529 Context.insert(NewCall);
2492 if (ReturnRegHi) 2530 if (ReturnRegHi)
2493 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2531 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2494 2532
2495 // Add the appropriate offset to SP. The call instruction takes care of
2496 // resetting the stack offset during emission.
2497 if (ParameterAreaSizeBytes) {
2498 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
2499 Legal_Reg | Legal_Flex);
2500 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2501 _add(SP, SP, AddAmount);
2502 }
2503
2504 // Insert a register-kill pseudo instruction. 2533 // Insert a register-kill pseudo instruction.
2505 Context.insert(InstFakeKill::create(Func, NewCall)); 2534 Context.insert(InstFakeKill::create(Func, NewCall));
2506 2535
2507 // Generate a FakeUse to keep the call live if necessary. 2536 // Generate a FakeUse to keep the call live if necessary.
2508 if (Instr->hasSideEffects() && ReturnReg) { 2537 if (Instr->hasSideEffects() && ReturnReg) {
2509 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 2538 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
2510 Context.insert(FakeUse); 2539 Context.insert(FakeUse);
2511 } 2540 }
2512 2541
2513 if (!Dest) 2542 if (!Dest)
(...skipping 2869 matching lines...) Expand 10 before | Expand all | Expand 10 after
5383 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5412 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
5384 // However, for compatibility with current NaCl LLVM, don't claim that. 5413 // However, for compatibility with current NaCl LLVM, don't claim that.
5385 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5414 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
5386 } 5415 }
5387 5416
5388 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5417 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
5389 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5418 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
5390 llvm::SmallBitVector TargetARM32::ScratchRegs; 5419 llvm::SmallBitVector TargetARM32::ScratchRegs;
5391 5420
5392 } // end of namespace Ice 5421 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698