Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(759)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1467473003: Subzero. ARM32. No more SP frobbing. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Adds TODO() for fixing an unrelated bug. Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 continue; 226 continue;
227 } 227 }
228 Lo->setRegNum(FirstReg); 228 Lo->setRegNum(FirstReg);
229 Lo->setMustHaveReg(); 229 Lo->setMustHaveReg();
230 Hi->setRegNum(FirstReg + 1); 230 Hi->setRegNum(FirstReg + 1);
231 Hi->setMustHaveReg(); 231 Hi->setMustHaveReg();
232 } 232 }
233 } 233 }
234 } // end of anonymous namespace 234 } // end of anonymous namespace
235 235
236 void TargetARM32::findMaxStackOutArgsSize() {
237 // MinNeededOutArgsBytes should be updated if the Target ever creates an
238 // high-level InstCall that requires more stack bytes.
239 constexpr size_t MinNeededOutArgsBytes = 0;
240 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
241 for (CfgNode *Node : Func->getNodes()) {
242 Context.init(Node);
243 while (!Context.atEnd()) {
sehr 2015/11/20 20:07:35 Again, factor to call the virtual method here.
John 2015/11/20 22:17:17 Done.
244 PostIncrLoweringContext PostIncrement(Context);
245 Inst *CurInstr = Context.getCur();
246 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
247 TargetARM32::CallingConv CC;
248 size_t OutArgsSizeBytes = 0;
249 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
250 Operand *Arg = legalizeUndef(Call->getArg(i));
251 Type Ty = Arg->getType();
252 if (Ty == IceType_i64) {
253 std::pair<int32_t, int32_t> Regs;
254 if (CC.I64InRegs(&Regs)) {
255 continue;
256 }
257 } else if (isVectorType(Ty) || isFloatingType(Ty)) {
258 int32_t Reg;
259 if (CC.FPInReg(Ty, &Reg)) {
260 continue;
261 }
262 } else {
263 assert(Ty == IceType_i32);
264 int32_t Reg;
265 if (CC.I32InReg(&Reg)) {
266 continue;
267 }
268 }
269
270 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
271 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
272 }
273 OutArgsSizeBytes = applyStackAlignment(OutArgsSizeBytes);
274 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
275 }
276 }
277 }
278 }
279
236 void TargetARM32::translateO2() { 280 void TargetARM32::translateO2() {
237 TimerMarker T(TimerStack::TT_O2, Func); 281 TimerMarker T(TimerStack::TT_O2, Func);
238 282
239 // TODO(stichnot): share passes with X86? 283 // TODO(stichnot): share passes with X86?
240 // https://code.google.com/p/nativeclient/issues/detail?id=4094 284 // https://code.google.com/p/nativeclient/issues/detail?id=4094
285 findMaxStackOutArgsSize();
241 286
242 // Do not merge Alloca instructions, and lay out the stack. 287 // Do not merge Alloca instructions, and lay out the stack.
243 static constexpr bool SortAndCombineAllocas = false; 288 static constexpr bool SortAndCombineAllocas = false;
244 Func->processAllocas(SortAndCombineAllocas); 289 Func->processAllocas(SortAndCombineAllocas);
245 Func->dump("After Alloca processing"); 290 Func->dump("After Alloca processing");
246 291
247 if (!Ctx->getFlags().getPhiEdgeSplit()) { 292 if (!Ctx->getFlags().getPhiEdgeSplit()) {
248 // Lower Phi instructions. 293 // Lower Phi instructions.
249 Func->placePhiLoads(); 294 Func->placePhiLoads();
250 if (Func->hasError()) 295 if (Func->hasError())
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
337 // Nop insertion 382 // Nop insertion
338 if (Ctx->getFlags().shouldDoNopInsertion()) { 383 if (Ctx->getFlags().shouldDoNopInsertion()) {
339 Func->doNopInsertion(); 384 Func->doNopInsertion();
340 } 385 }
341 } 386 }
342 387
343 void TargetARM32::translateOm1() { 388 void TargetARM32::translateOm1() {
344 TimerMarker T(TimerStack::TT_Om1, Func); 389 TimerMarker T(TimerStack::TT_Om1, Func);
345 390
346 // TODO: share passes with X86? 391 // TODO: share passes with X86?
392 findMaxStackOutArgsSize();
347 393
348 // Do not merge Alloca instructions, and lay out the stack. 394 // Do not merge Alloca instructions, and lay out the stack.
349 static constexpr bool SortAndCombineAllocas = false; 395 static constexpr bool SortAndCombineAllocas = false;
350 Func->processAllocas(SortAndCombineAllocas); 396 Func->processAllocas(SortAndCombineAllocas);
351 Func->dump("After Alloca processing"); 397 Func->dump("After Alloca processing");
352 398
353 Func->placePhiLoads(); 399 Func->placePhiLoads();
354 if (Func->hasError()) 400 if (Func->hasError())
355 return; 401 return;
356 Func->placePhiStores(); 402 Func->placePhiStores();
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
464 return; 510 return;
465 } 511 }
466 if (Var->mustHaveReg()) { 512 if (Var->mustHaveReg()) {
467 llvm::report_fatal_error( 513 llvm::report_fatal_error(
468 "Infinite-weight Variable has no register assigned"); 514 "Infinite-weight Variable has no register assigned");
469 } 515 }
470 int32_t Offset = Var->getStackOffset(); 516 int32_t Offset = Var->getStackOffset();
471 int32_t BaseRegNum = Var->getBaseRegNum(); 517 int32_t BaseRegNum = Var->getBaseRegNum();
472 if (BaseRegNum == Variable::NoRegister) { 518 if (BaseRegNum == Variable::NoRegister) {
473 BaseRegNum = getFrameOrStackReg(); 519 BaseRegNum = getFrameOrStackReg();
474 if (!hasFramePointer())
475 Offset += getStackAdjustment();
476 } 520 }
477 const Type VarTy = Var->getType(); 521 const Type VarTy = Var->getType();
478 Str << "[" << getRegName(BaseRegNum, VarTy); 522 Str << "[" << getRegName(BaseRegNum, VarTy);
479 if (Offset != 0) { 523 if (Offset != 0) {
480 Str << ", " << getConstantPrefix() << Offset; 524 Str << ", " << getConstantPrefix() << Offset;
481 } 525 }
482 Str << "]"; 526 Str << "]";
483 } 527 }
484 528
485 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { 529 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
661 // | 2. padding | 705 // | 2. padding |
662 // +------------------------+ <--- FramePointer (if used) 706 // +------------------------+ <--- FramePointer (if used)
663 // | 3. global spill area | 707 // | 3. global spill area |
664 // +------------------------+ 708 // +------------------------+
665 // | 4. padding | 709 // | 4. padding |
666 // +------------------------+ 710 // +------------------------+
667 // | 5. local spill area | 711 // | 5. local spill area |
668 // +------------------------+ 712 // +------------------------+
669 // | 6. padding | 713 // | 6. padding |
670 // +------------------------+ 714 // +------------------------+
671 // | 7. allocas | 715 // | 7. allocas (variable) |
716 // +------------------------+
717 // | 8. padding |
718 // +------------------------+
719 // | 9. out args |
672 // +------------------------+ <--- StackPointer 720 // +------------------------+ <--- StackPointer
673 // 721 //
674 // The following variables record the size in bytes of the given areas: 722 // The following variables record the size in bytes of the given areas:
675 // * PreservedRegsSizeBytes: area 1 723 // * PreservedRegsSizeBytes: area 1
676 // * SpillAreaPaddingBytes: area 2 724 // * SpillAreaPaddingBytes: area 2
677 // * GlobalsSize: area 3 725 // * GlobalsSize: area 3
678 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 726 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
679 // * LocalsSpillAreaSize: area 5 727 // * LocalsSpillAreaSize: area 5
680 // * SpillAreaSizeBytes: areas 2 - 6 728 // * SpillAreaSizeBytes: areas 2 - 6, and 9
729 // * MaxOutArgsSizeBytes: area 9
730 //
681 // Determine stack frame offsets for each Variable without a register 731 // Determine stack frame offsets for each Variable without a register
682 // assignment. This can be done as one variable per stack slot. Or, do 732 // assignment. This can be done as one variable per stack slot. Or, do
683 // coalescing by running the register allocator again with an infinite set of 733 // coalescing by running the register allocator again with an infinite set of
684 // registers (as a side effect, this gives variables a second chance at 734 // registers (as a side effect, this gives variables a second chance at
685 // physical register assignment). 735 // physical register assignment).
686 // 736 //
687 // A middle ground approach is to leverage sparsity and allocate one block of 737 // A middle ground approach is to leverage sparsity and allocate one block of
688 // space on the frame for globals (variables with multi-block lifetime), and 738 // space on the frame for globals (variables with multi-block lifetime), and
689 // one block to share for locals (single-block lifetime). 739 // one block to share for locals (single-block lifetime).
690 740
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
776 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 826 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
777 uint32_t SpillAreaPaddingBytes = 0; 827 uint32_t SpillAreaPaddingBytes = 0;
778 uint32_t LocalsSlotsPaddingBytes = 0; 828 uint32_t LocalsSlotsPaddingBytes = 0;
779 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 829 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
780 GlobalsSize, LocalsSlotsAlignmentBytes, 830 GlobalsSize, LocalsSlotsAlignmentBytes,
781 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 831 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
782 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 832 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
783 uint32_t GlobalsAndSubsequentPaddingSize = 833 uint32_t GlobalsAndSubsequentPaddingSize =
784 GlobalsSize + LocalsSlotsPaddingBytes; 834 GlobalsSize + LocalsSlotsPaddingBytes;
785 835
786 // Align SP if necessary. 836 // Adds the out args space to the stack, and align SP if necessary.
787 if (NeedsStackAlignment) { 837 if (!NeedsStackAlignment) {
838 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
839 } else {
788 uint32_t StackOffset = PreservedRegsSizeBytes; 840 uint32_t StackOffset = PreservedRegsSizeBytes;
789 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 841 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
842 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
790 SpillAreaSizeBytes = StackSize - StackOffset; 843 SpillAreaSizeBytes = StackSize - StackOffset;
791 } 844 }
792 845
793 // Generate "sub sp, SpillAreaSizeBytes" 846 // Generate "sub sp, SpillAreaSizeBytes"
794 if (SpillAreaSizeBytes) { 847 if (SpillAreaSizeBytes) {
795 // Use the scratch register if needed to legalize the immediate. 848 // Use the scratch register if needed to legalize the immediate.
796 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 849 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
797 Legal_Reg | Legal_Flex, getReservedTmpReg()); 850 Legal_Reg | Legal_Flex, getReservedTmpReg());
798 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 851 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
799 _sub(SP, SP, SubAmount); 852 _sub(SP, SP, SubAmount);
800 } 853 }
801 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 854 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
802 855
803 resetStackAdjustment();
804
805 // Fill in stack offsets for stack args, and copy args into registers for 856 // Fill in stack offsets for stack args, and copy args into registers for
806 // those that were register-allocated. Args are pushed right to left, so 857 // those that were register-allocated. Args are pushed right to left, so
807 // Arg[0] is closest to the stack/frame pointer. 858 // Arg[0] is closest to the stack/frame pointer.
808 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 859 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
809 size_t BasicFrameOffset = PreservedRegsSizeBytes; 860 size_t BasicFrameOffset = PreservedRegsSizeBytes;
810 if (!UsesFramePointer) 861 if (!UsesFramePointer)
811 BasicFrameOffset += SpillAreaSizeBytes; 862 BasicFrameOffset += SpillAreaSizeBytes;
812 863
813 const VarList &Args = Func->getArgs(); 864 const VarList &Args = Func->getArgs();
814 size_t InArgsSizeBytes = 0; 865 size_t InArgsSizeBytes = 0;
(...skipping 23 matching lines...) Expand all
838 UsesFramePointer); 889 UsesFramePointer);
839 this->HasComputedFrame = true; 890 this->HasComputedFrame = true;
840 891
841 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 892 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
842 OstreamLocker _(Func->getContext()); 893 OstreamLocker _(Func->getContext());
843 Ostream &Str = Func->getContext()->getStrDump(); 894 Ostream &Str = Func->getContext()->getStrDump();
844 895
845 Str << "Stack layout:\n"; 896 Str << "Stack layout:\n";
846 uint32_t SPAdjustmentPaddingSize = 897 uint32_t SPAdjustmentPaddingSize =
847 SpillAreaSizeBytes - LocalsSpillAreaSize - 898 SpillAreaSizeBytes - LocalsSpillAreaSize -
848 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; 899 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
900 MaxOutArgsSizeBytes;
849 Str << " in-args = " << InArgsSizeBytes << " bytes\n" 901 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
850 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" 902 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
851 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" 903 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
852 << " globals spill area = " << GlobalsSize << " bytes\n" 904 << " globals spill area = " << GlobalsSize << " bytes\n"
853 << " globals-locals spill areas intermediate padding = " 905 << " globals-locals spill areas intermediate padding = "
854 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" 906 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
855 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" 907 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
856 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; 908 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
857 909
858 Str << "Stack details:\n" 910 Str << "Stack details:\n"
859 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" 911 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
860 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" 912 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
913 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
861 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 914 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
862 << " bytes\n" 915 << " bytes\n"
863 << " is FP based = " << UsesFramePointer << "\n"; 916 << " is FP based = " << UsesFramePointer << "\n";
864 } 917 }
865 } 918 }
866 919
867 void TargetARM32::addEpilog(CfgNode *Node) { 920 void TargetARM32::addEpilog(CfgNode *Node) {
868 InstList &Insts = Node->getInsts(); 921 InstList &Insts = Node->getInsts();
869 InstList::reverse_iterator RI, E; 922 InstList::reverse_iterator RI, E;
870 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 923 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
947 _ret(LR, RetValue); 1000 _ret(LR, RetValue);
948 _bundle_unlock(); 1001 _bundle_unlock();
949 RI->setDeleted(); 1002 RI->setDeleted();
950 } 1003 }
951 1004
952 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const { 1005 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
953 constexpr bool ZeroExt = false; 1006 constexpr bool ZeroExt = false;
954 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset); 1007 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
955 } 1008 }
956 1009
957 Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset, 1010 Variable *TargetARM32::newBaseRegister(int32_t Offset, Variable *OrigBaseReg) {
958 int32_t StackAdjust,
959 Variable *OrigBaseReg) {
960 int32_t Offset = OriginalOffset + StackAdjust;
961 // Legalize will likely need a movw/movt combination, but if the top bits are 1011 // Legalize will likely need a movw/movt combination, but if the top bits are
962 // all 0 from negating the offset and subtracting, we could use that instead. 1012 // all 0 from negating the offset and subtracting, we could use that instead.
963 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; 1013 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
964 if (ShouldSub) 1014 if (ShouldSub)
965 Offset = -Offset; 1015 Offset = -Offset;
966 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), 1016 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
967 Legal_Reg | Legal_Flex, getReservedTmpReg()); 1017 Legal_Reg | Legal_Flex, getReservedTmpReg());
968 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); 1018 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
969 if (ShouldSub) 1019 if (ShouldSub)
970 _sub(ScratchReg, OrigBaseReg, OffsetVal); 1020 _sub(ScratchReg, OrigBaseReg, OffsetVal);
971 else 1021 else
972 _add(ScratchReg, OrigBaseReg, OffsetVal); 1022 _add(ScratchReg, OrigBaseReg, OffsetVal);
973 return ScratchReg; 1023 return ScratchReg;
974 } 1024 }
975 1025
976 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, 1026 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
977 int32_t StackAdjust,
978 Variable *OrigBaseReg, 1027 Variable *OrigBaseReg,
979 Variable **NewBaseReg, 1028 Variable **NewBaseReg,
980 int32_t *NewBaseOffset) { 1029 int32_t *NewBaseOffset) {
981 if (isLegalMemOffset(Ty, Offset + StackAdjust)) { 1030 if (isLegalMemOffset(Ty, Offset)) {
982 return OperandARM32Mem::create( 1031 return OperandARM32Mem::create(
983 Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>( 1032 Func, Ty, OrigBaseReg,
984 Ctx->getConstantInt32(Offset + StackAdjust)), 1033 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)),
985 OperandARM32Mem::Offset); 1034 OperandARM32Mem::Offset);
986 } 1035 }
987 1036
988 if (*NewBaseReg == nullptr) { 1037 if (*NewBaseReg == nullptr) {
989 *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); 1038 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
990 *NewBaseOffset = Offset + StackAdjust; 1039 *NewBaseOffset = Offset;
991 } 1040 }
992 1041
993 int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset; 1042 int32_t OffsetDiff = Offset - *NewBaseOffset;
994 if (!isLegalMemOffset(Ty, OffsetDiff)) { 1043 if (!isLegalMemOffset(Ty, OffsetDiff)) {
995 *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg); 1044 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
996 *NewBaseOffset = Offset + StackAdjust; 1045 *NewBaseOffset = Offset;
997 OffsetDiff = 0; 1046 OffsetDiff = 0;
998 } 1047 }
999 1048
1000 return OperandARM32Mem::create( 1049 return OperandARM32Mem::create(
1001 Func, Ty, *NewBaseReg, 1050 Func, Ty, *NewBaseReg,
1002 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)), 1051 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
1003 OperandARM32Mem::Offset); 1052 OperandARM32Mem::Offset);
1004 } 1053 }
1005 1054
1006 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust, 1055 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
1007 Variable *OrigBaseReg, Variable **NewBaseReg, 1056 Variable **NewBaseReg, int32_t *NewBaseOffset) {
1008 int32_t *NewBaseOffset) {
1009 Variable *Dest = MovInstr->getDest(); 1057 Variable *Dest = MovInstr->getDest();
1010 assert(Dest != nullptr); 1058 assert(Dest != nullptr);
1011 Type DestTy = Dest->getType(); 1059 Type DestTy = Dest->getType();
1012 assert(DestTy != IceType_i64); 1060 assert(DestTy != IceType_i64);
1013 1061
1014 Operand *Src = MovInstr->getSrc(0); 1062 Operand *Src = MovInstr->getSrc(0);
1015 Type SrcTy = Src->getType(); 1063 Type SrcTy = Src->getType();
1016 (void)SrcTy; 1064 (void)SrcTy;
1017 assert(SrcTy != IceType_i64); 1065 assert(SrcTy != IceType_i64);
1018 1066
1019 if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) 1067 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1020 return; 1068 return;
1021 1069
1022 bool Legalized = false; 1070 bool Legalized = false;
1023 if (!Dest->hasReg()) { 1071 if (!Dest->hasReg()) {
1024 auto *const SrcR = llvm::cast<Variable>(Src); 1072 auto *const SrcR = llvm::cast<Variable>(Src);
1025 assert(SrcR->hasReg()); 1073 assert(SrcR->hasReg());
1026 const int32_t Offset = Dest->getStackOffset(); 1074 const int32_t Offset = Dest->getStackOffset();
1027 // This is a _mov(Mem(), Variable), i.e., a store. 1075 // This is a _mov(Mem(), Variable), i.e., a store.
1028 _str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, 1076 _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
1029 NewBaseReg, NewBaseOffset), 1077 NewBaseOffset),
1030 MovInstr->getPredicate()); 1078 MovInstr->getPredicate());
1031 // _str() does not have a Dest, so we add a fake-def(Dest). 1079 // _str() does not have a Dest, so we add a fake-def(Dest).
1032 Context.insert(InstFakeDef::create(Func, Dest)); 1080 Context.insert(InstFakeDef::create(Func, Dest));
1033 Legalized = true; 1081 Legalized = true;
1034 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { 1082 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1035 if (!Var->hasReg()) { 1083 if (!Var->hasReg()) {
1036 const int32_t Offset = Var->getStackOffset(); 1084 const int32_t Offset = Var->getStackOffset();
1037 _ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg, 1085 _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
1038 NewBaseReg, NewBaseOffset), 1086 NewBaseOffset),
1039 MovInstr->getPredicate()); 1087 MovInstr->getPredicate());
1040 Legalized = true; 1088 Legalized = true;
1041 } 1089 }
1042 } 1090 }
1043 1091
1044 if (Legalized) { 1092 if (Legalized) {
1045 if (MovInstr->isDestRedefined()) { 1093 if (MovInstr->isDestRedefined()) {
1046 _set_dest_redefined(); 1094 _set_dest_redefined();
1047 } 1095 }
1048 MovInstr->setDeleted(); 1096 MovInstr->setDeleted();
1049 } 1097 }
1050 } 1098 }
1051 1099
1052 void TargetARM32::legalizeStackSlots() { 1100 void TargetARM32::legalizeStackSlots() {
1053 // If a stack variable's frame offset doesn't fit, convert from: 1101 // If a stack variable's frame offset doesn't fit, convert from:
1054 // ldr X, OFF[SP] 1102 // ldr X, OFF[SP]
1055 // to: 1103 // to:
1056 // movw/movt TMP, OFF_PART 1104 // movw/movt TMP, OFF_PART
1057 // add TMP, TMP, SP 1105 // add TMP, TMP, SP
1058 // ldr X, OFF_MORE[TMP] 1106 // ldr X, OFF_MORE[TMP]
1059 // 1107 //
1060 // This is safe because we have reserved TMP, and add for ARM does not 1108 // This is safe because we have reserved TMP, and add for ARM does not
1061 // clobber the flags register. 1109 // clobber the flags register.
1062 Func->dump("Before legalizeStackSlots"); 1110 Func->dump("Before legalizeStackSlots");
1063 assert(hasComputedFrame()); 1111 assert(hasComputedFrame());
1064 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); 1112 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
1065 int32_t StackAdjust = 0;
1066 // Do a fairly naive greedy clustering for now. Pick the first stack slot 1113 // Do a fairly naive greedy clustering for now. Pick the first stack slot
1067 // that's out of bounds and make a new base reg using the architecture's temp 1114 // that's out of bounds and make a new base reg using the architecture's temp
1068 // register. If that works for the next slot, then great. Otherwise, create a 1115 // register. If that works for the next slot, then great. Otherwise, create a
1069 // new base register, clobbering the previous base register. Never share a 1116 // new base register, clobbering the previous base register. Never share a
1070 // base reg across different basic blocks. This isn't ideal if local and 1117 // base reg across different basic blocks. This isn't ideal if local and
1071 // multi-block variables are far apart and their references are interspersed. 1118 // multi-block variables are far apart and their references are interspersed.
1072 // It may help to be more coordinated about assign stack slot numbers and may 1119 // It may help to be more coordinated about assign stack slot numbers and may
1073 // help to assign smaller offsets to higher-weight variables so that they 1120 // help to assign smaller offsets to higher-weight variables so that they
1074 // don't depend on this legalization. 1121 // don't depend on this legalization.
1075 for (CfgNode *Node : Func->getNodes()) { 1122 for (CfgNode *Node : Func->getNodes()) {
1076 Context.init(Node); 1123 Context.init(Node);
1077 Variable *NewBaseReg = nullptr; 1124 Variable *NewBaseReg = nullptr;
1078 int32_t NewBaseOffset = 0; 1125 int32_t NewBaseOffset = 0;
1079 while (!Context.atEnd()) { 1126 while (!Context.atEnd()) {
1080 PostIncrLoweringContext PostIncrement(Context); 1127 PostIncrLoweringContext PostIncrement(Context);
1081 Inst *CurInstr = Context.getCur(); 1128 Inst *CurInstr = Context.getCur();
1082 Variable *Dest = CurInstr->getDest(); 1129 Variable *Dest = CurInstr->getDest();
1083 1130
1084 // Check if the previous NewBaseReg is clobbered, and reset if needed. 1131 // Check if the previous NewBaseReg is clobbered, and reset if needed.
1085 if ((Dest && NewBaseReg && Dest->hasReg() && 1132 if ((Dest && NewBaseReg && Dest->hasReg() &&
1086 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || 1133 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
1087 llvm::isa<InstFakeKill>(CurInstr)) { 1134 llvm::isa<InstFakeKill>(CurInstr)) {
1088 NewBaseReg = nullptr; 1135 NewBaseReg = nullptr;
1089 NewBaseOffset = 0; 1136 NewBaseOffset = 0;
1090 } 1137 }
1091 1138
1092 // The stack adjustment only matters if we are using SP instead of FP.
1093 if (!hasFramePointer()) {
1094 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
1095 StackAdjust += AdjInst->getAmount();
1096 NewBaseOffset += AdjInst->getAmount();
1097 continue;
1098 }
1099 if (llvm::isa<InstARM32Call>(CurInstr)) {
1100 NewBaseOffset -= StackAdjust;
1101 StackAdjust = 0;
1102 continue;
1103 }
1104 }
1105
1106 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) { 1139 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
1107 legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg, 1140 legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset);
1108 &NewBaseOffset);
1109 } 1141 }
1110 } 1142 }
1111 } 1143 }
1112 } 1144 }
1113 1145
1114 Operand *TargetARM32::loOperand(Operand *Operand) { 1146 Operand *TargetARM32::loOperand(Operand *Operand) {
1115 assert(Operand->getType() == IceType_i64); 1147 assert(Operand->getType() == IceType_i64);
1116 if (Operand->getType() != IceType_i64) 1148 if (Operand->getType() != IceType_i64)
1117 return Operand; 1149 return Operand;
1118 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 1150 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
1260 // Non-constant sizes need to be adjusted to the next highest multiple of 1292 // Non-constant sizes need to be adjusted to the next highest multiple of
1261 // the required alignment at runtime. 1293 // the required alignment at runtime.
1262 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); 1294 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
1263 Variable *T = makeReg(IceType_i32); 1295 Variable *T = makeReg(IceType_i32);
1264 _mov(T, TotalSize); 1296 _mov(T, TotalSize);
1265 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); 1297 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1266 _add(T, T, AddAmount); 1298 _add(T, T, AddAmount);
1267 alignRegisterPow2(T, Alignment); 1299 alignRegisterPow2(T, Alignment);
1268 _sub(SP, SP, T); 1300 _sub(SP, SP, T);
1269 } 1301 }
1270 _mov(Dest, SP); 1302 Variable *T = SP;
1303 if (MaxOutArgsSizeBytes != 0) {
1304 T = makeReg(getPointerType());
1305 Operand *OutArgsSizeRF = legalize(
1306 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
1307 _add(T, SP, OutArgsSizeRF);
1308 }
1309 _mov(Dest, T);
1271 } 1310 }
1272 1311
1273 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 1312 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1274 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 1313 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1275 return; 1314 return;
1276 Variable *SrcLoReg = legalizeToReg(SrcLo); 1315 Variable *SrcLoReg = legalizeToReg(SrcLo);
1277 switch (Ty) { 1316 switch (Ty) {
1278 default: 1317 default:
1279 llvm::report_fatal_error("Unexpected type"); 1318 llvm::report_fatal_error("Unexpected type");
1280 case IceType_i8: 1319 case IceType_i8:
(...skipping 803 matching lines...) Expand 10 before | Expand all | Expand 10 after
2084 } 2123 }
2085 case InstArithmetic::Xor: { 2124 case InstArithmetic::Xor: {
2086 Variable *Src0R = Srcs.src0R(this); 2125 Variable *Src0R = Srcs.src0R(this);
2087 Operand *Src1RF = Srcs.src1RF(this); 2126 Operand *Src1RF = Srcs.src1RF(this);
2088 _eor(T, Src0R, Src1RF); 2127 _eor(T, Src0R, Src1RF);
2089 _mov(Dest, T); 2128 _mov(Dest, T);
2090 return; 2129 return;
2091 } 2130 }
2092 case InstArithmetic::Sub: { 2131 case InstArithmetic::Sub: {
2093 if (Srcs.hasConstOperand()) { 2132 if (Srcs.hasConstOperand()) {
2133 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
2134 // to be used.
2094 Variable *Src0R = Srcs.src0R(this); 2135 Variable *Src0R = Srcs.src0R(this);
2095 if (Srcs.immediateIsFlexEncodable()) { 2136 if (Srcs.immediateIsFlexEncodable()) {
2096 Operand *Src1RF = Srcs.src1RF(this); 2137 Operand *Src1RF = Srcs.src1RF(this);
2097 if (Srcs.swappedOperands()) { 2138 if (Srcs.swappedOperands()) {
2098 _rsb(T, Src0R, Src1RF); 2139 _rsb(T, Src0R, Src1RF);
2099 } else { 2140 } else {
2100 _sub(T, Src0R, Src1RF); 2141 _sub(T, Src0R, Src1RF);
2101 } 2142 }
2102 _mov(Dest, T); 2143 _mov(Dest, T);
2103 return; 2144 return;
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
2337 2378
2338 // Assign arguments to registers and stack. Also reserve stack. 2379 // Assign arguments to registers and stack. Also reserve stack.
2339 TargetARM32::CallingConv CC; 2380 TargetARM32::CallingConv CC;
2340 // Pair of Arg Operand -> GPR number assignments. 2381 // Pair of Arg Operand -> GPR number assignments.
2341 llvm::SmallVector<std::pair<Operand *, int32_t>, 2382 llvm::SmallVector<std::pair<Operand *, int32_t>,
2342 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs; 2383 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;
2343 llvm::SmallVector<std::pair<Operand *, int32_t>, 2384 llvm::SmallVector<std::pair<Operand *, int32_t>,
2344 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs; 2385 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;
2345 // Pair of Arg Operand -> stack offset. 2386 // Pair of Arg Operand -> stack offset.
2346 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs; 2387 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
2347 int32_t ParameterAreaSizeBytes = 0; 2388 size_t ParameterAreaSizeBytes = 0;
2348 2389
2349 // Classify each argument operand according to the location where the 2390 // Classify each argument operand according to the location where the
2350 // argument is passed. 2391 // argument is passed.
2351 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 2392 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
2352 Operand *Arg = legalizeUndef(Instr->getArg(i)); 2393 Operand *Arg = legalizeUndef(Instr->getArg(i));
2353 Type Ty = Arg->getType(); 2394 Type Ty = Arg->getType();
2354 bool InRegs = false; 2395 bool InRegs = false;
2355 if (Ty == IceType_i64) { 2396 if (Ty == IceType_i64) {
2356 std::pair<int32_t, int32_t> Regs; 2397 std::pair<int32_t, int32_t> Regs;
2357 if (CC.I64InRegs(&Regs)) { 2398 if (CC.I64InRegs(&Regs)) {
(...skipping 23 matching lines...) Expand all
2381 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); 2422 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
2382 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); 2423 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
2383 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty); 2424 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
2384 } 2425 }
2385 } 2426 }
2386 2427
2387 // Adjust the parameter area so that the stack is aligned. It is assumed that 2428 // Adjust the parameter area so that the stack is aligned. It is assumed that
2388 // the stack is already aligned at the start of the calling sequence. 2429 // the stack is already aligned at the start of the calling sequence.
2389 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 2430 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
2390 2431
2391 // Subtract the appropriate amount for the argument area. This also takes 2432 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
2392 // care of setting the stack adjustment during emission. 2433 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
2393 //
2394 // TODO: If for some reason the call instruction gets dead-code eliminated
2395 // after lowering, we would need to ensure that the pre-call and the
2396 // post-call esp adjustment get eliminated as well.
2397 if (ParameterAreaSizeBytes) {
2398 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
2399 Legal_Reg | Legal_Flex);
2400 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
2401 } 2434 }
2402 2435
2403 // Copy arguments that are passed on the stack to the appropriate stack 2436 // Copy arguments that are passed on the stack to the appropriate stack
2404 // locations. 2437 // locations.
2405 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 2438 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2406 for (auto &StackArg : StackArgs) { 2439 for (auto &StackArg : StackArgs) {
2407 ConstantInteger32 *Loc = 2440 ConstantInteger32 *Loc =
2408 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 2441 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
2409 Type Ty = StackArg.first->getType(); 2442 Type Ty = StackArg.first->getType();
2410 OperandARM32Mem *Addr; 2443 OperandARM32Mem *Addr;
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
2483 // Generate a FakeUse of register arguments so that they do not get dead 2516 // Generate a FakeUse of register arguments so that they do not get dead
2484 // code eliminated as a result of the FakeKill of scratch registers after 2517 // code eliminated as a result of the FakeKill of scratch registers after
2485 // the call. 2518 // the call.
2486 Context.insert(InstFakeUse::create(Func, Reg)); 2519 Context.insert(InstFakeUse::create(Func, Reg));
2487 } 2520 }
2488 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); 2521 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
2489 Context.insert(NewCall); 2522 Context.insert(NewCall);
2490 if (ReturnRegHi) 2523 if (ReturnRegHi)
2491 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2524 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2492 2525
2493 // Add the appropriate offset to SP. The call instruction takes care of
2494 // resetting the stack offset during emission.
2495 if (ParameterAreaSizeBytes) {
2496 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
2497 Legal_Reg | Legal_Flex);
2498 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2499 _add(SP, SP, AddAmount);
2500 }
2501
2502 // Insert a register-kill pseudo instruction. 2526 // Insert a register-kill pseudo instruction.
2503 Context.insert(InstFakeKill::create(Func, NewCall)); 2527 Context.insert(InstFakeKill::create(Func, NewCall));
2504 2528
2505 // Generate a FakeUse to keep the call live if necessary. 2529 // Generate a FakeUse to keep the call live if necessary.
2506 if (Instr->hasSideEffects() && ReturnReg) { 2530 if (Instr->hasSideEffects() && ReturnReg) {
2507 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg); 2531 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
2508 Context.insert(FakeUse); 2532 Context.insert(FakeUse);
2509 } 2533 }
2510 2534
2511 if (!Dest) 2535 if (!Dest)
(...skipping 2869 matching lines...) Expand 10 before | Expand all | Expand 10 after
5381 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5405 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
5382 // However, for compatibility with current NaCl LLVM, don't claim that. 5406 // However, for compatibility with current NaCl LLVM, don't claim that.
5383 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5407 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
5384 } 5408 }
5385 5409
5386 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5410 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
5387 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5411 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
5388 llvm::SmallBitVector TargetARM32::ScratchRegs; 5412 llvm::SmallBitVector TargetARM32::ScratchRegs;
5389 5413
5390 } // end of namespace Ice 5414 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698