src/IceTargetLoweringARM32.cpp - Issue 1467473003: Subzero. ARM32. No more SP frobbing.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1467473003: Subzero. ARM32. No more SP frobbing. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Adds TODO() for fixing an unrelated bug. Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 215 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
226 continue;	226 continue;

227 }	227 }

228 Lo->setRegNum(FirstReg);	228 Lo->setRegNum(FirstReg);

229 Lo->setMustHaveReg();	229 Lo->setMustHaveReg();

230 Hi->setRegNum(FirstReg + 1);	230 Hi->setRegNum(FirstReg + 1);

231 Hi->setMustHaveReg();	231 Hi->setMustHaveReg();

232 }	232 }

233 }	233 }

234 } // end of anonymous namespace	234 } // end of anonymous namespace

235	235

	236 void TargetARM32::findMaxStackOutArgsSize() {

	237 // MinNeededOutArgsBytes should be updated if the Target ever creates an

	238 // high-level InstCall that requires more stack bytes.

	239 constexpr size_t MinNeededOutArgsBytes = 0;

	240 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;

	241 for (CfgNode *Node : Func->getNodes()) {

	242 Context.init(Node);

	243 while (!Context.atEnd()) {
	sehr 2015/11/20 20:07:35 Again, factor to call the virtual method here. Again, factor to call the virtual method here. John 2015/11/20 22:17:17 Done. Show quoted text On 2015/11/20 20:07:35, sehr (please use this account) wrote: > Again, factor to call the virtual method here. Done.
	244 PostIncrLoweringContext PostIncrement(Context);

	245 Inst *CurInstr = Context.getCur();

	246 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {

	247 TargetARM32::CallingConv CC;

	248 size_t OutArgsSizeBytes = 0;

	249 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {

	250 Operand *Arg = legalizeUndef(Call->getArg(i));

	251 Type Ty = Arg->getType();

	252 if (Ty == IceType_i64) {

	253 std::pair<int32_t, int32_t> Regs;

	254 if (CC.I64InRegs(&Regs)) {

	255 continue;

	256 }

	257 } else if (isVectorType(Ty) \|\| isFloatingType(Ty)) {

	258 int32_t Reg;

	259 if (CC.FPInReg(Ty, &Reg)) {

	260 continue;

	261 }

	262 } else {

	263 assert(Ty == IceType_i32);

	264 int32_t Reg;

	265 if (CC.I32InReg(&Reg)) {

	266 continue;

	267 }

	268 }

	269

	270 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);

	271 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);

	272 }

	273 OutArgsSizeBytes = applyStackAlignment(OutArgsSizeBytes);

	274 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);

	275 }

	276 }

	277 }

	278 }

	279

236 void TargetARM32::translateO2() {	280 void TargetARM32::translateO2() {

237 TimerMarker T(TimerStack::TT_O2, Func);	281 TimerMarker T(TimerStack::TT_O2, Func);

238	282

239 // TODO(stichnot): share passes with X86?	283 // TODO(stichnot): share passes with X86?

240 // https://code.google.com/p/nativeclient/issues/detail?id=4094	284 // https://code.google.com/p/nativeclient/issues/detail?id=4094

	285 findMaxStackOutArgsSize();

241	286

242 // Do not merge Alloca instructions, and lay out the stack.	287 // Do not merge Alloca instructions, and lay out the stack.

243 static constexpr bool SortAndCombineAllocas = false;	288 static constexpr bool SortAndCombineAllocas = false;

244 Func->processAllocas(SortAndCombineAllocas);	289 Func->processAllocas(SortAndCombineAllocas);

245 Func->dump("After Alloca processing");	290 Func->dump("After Alloca processing");

246	291

247 if (!Ctx->getFlags().getPhiEdgeSplit()) {	292 if (!Ctx->getFlags().getPhiEdgeSplit()) {

248 // Lower Phi instructions.	293 // Lower Phi instructions.

249 Func->placePhiLoads();	294 Func->placePhiLoads();

250 if (Func->hasError())	295 if (Func->hasError())

(...skipping 86 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
337 // Nop insertion	382 // Nop insertion

338 if (Ctx->getFlags().shouldDoNopInsertion()) {	383 if (Ctx->getFlags().shouldDoNopInsertion()) {

339 Func->doNopInsertion();	384 Func->doNopInsertion();

340 }	385 }

341 }	386 }

342	387

343 void TargetARM32::translateOm1() {	388 void TargetARM32::translateOm1() {

344 TimerMarker T(TimerStack::TT_Om1, Func);	389 TimerMarker T(TimerStack::TT_Om1, Func);

345	390

346 // TODO: share passes with X86?	391 // TODO: share passes with X86?

	392 findMaxStackOutArgsSize();

347	393

348 // Do not merge Alloca instructions, and lay out the stack.	394 // Do not merge Alloca instructions, and lay out the stack.

349 static constexpr bool SortAndCombineAllocas = false;	395 static constexpr bool SortAndCombineAllocas = false;

350 Func->processAllocas(SortAndCombineAllocas);	396 Func->processAllocas(SortAndCombineAllocas);

351 Func->dump("After Alloca processing");	397 Func->dump("After Alloca processing");

352	398

353 Func->placePhiLoads();	399 Func->placePhiLoads();

354 if (Func->hasError())	400 if (Func->hasError())

355 return;	401 return;

356 Func->placePhiStores();	402 Func->placePhiStores();

(...skipping 107 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
464 return;	510 return;

465 }	511 }

466 if (Var->mustHaveReg()) {	512 if (Var->mustHaveReg()) {

467 llvm::report_fatal_error(	513 llvm::report_fatal_error(

468 "Infinite-weight Variable has no register assigned");	514 "Infinite-weight Variable has no register assigned");

469 }	515 }

470 int32_t Offset = Var->getStackOffset();	516 int32_t Offset = Var->getStackOffset();

471 int32_t BaseRegNum = Var->getBaseRegNum();	517 int32_t BaseRegNum = Var->getBaseRegNum();

472 if (BaseRegNum == Variable::NoRegister) {	518 if (BaseRegNum == Variable::NoRegister) {

473 BaseRegNum = getFrameOrStackReg();	519 BaseRegNum = getFrameOrStackReg();

474 if (!hasFramePointer())

475 Offset += getStackAdjustment();

476 }	520 }

477 const Type VarTy = Var->getType();	521 const Type VarTy = Var->getType();

478 Str << "[" << getRegName(BaseRegNum, VarTy);	522 Str << "[" << getRegName(BaseRegNum, VarTy);

479 if (Offset != 0) {	523 if (Offset != 0) {

480 Str << ", " << getConstantPrefix() << Offset;	524 Str << ", " << getConstantPrefix() << Offset;

481 }	525 }

482 Str << "]";	526 Str << "]";

483 }	527 }

484	528

485 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {	529 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {

(...skipping 175 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
661 // \| 2. padding \|	705 // \| 2. padding \|

662 // +------------------------+ <--- FramePointer (if used)	706 // +------------------------+ <--- FramePointer (if used)

663 // \| 3. global spill area \|	707 // \| 3. global spill area \|

664 // +------------------------+	708 // +------------------------+

665 // \| 4. padding \|	709 // \| 4. padding \|

666 // +------------------------+	710 // +------------------------+

667 // \| 5. local spill area \|	711 // \| 5. local spill area \|

668 // +------------------------+	712 // +------------------------+

669 // \| 6. padding \|	713 // \| 6. padding \|

670 // +------------------------+	714 // +------------------------+

671 // \| 7. allocas \|	715 // \| 7. allocas (variable) \|

	716 // +------------------------+

	717 // \| 8. padding \|

	718 // +------------------------+

	719 // \| 9. out args \|

672 // +------------------------+ <--- StackPointer	720 // +------------------------+ <--- StackPointer

673 //	721 //

674 // The following variables record the size in bytes of the given areas:	722 // The following variables record the size in bytes of the given areas:

675 // * PreservedRegsSizeBytes: area 1	723 // * PreservedRegsSizeBytes: area 1

676 // * SpillAreaPaddingBytes: area 2	724 // * SpillAreaPaddingBytes: area 2

677 // * GlobalsSize: area 3	725 // * GlobalsSize: area 3

678 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4	726 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4

679 // * LocalsSpillAreaSize: area 5	727 // * LocalsSpillAreaSize: area 5

680 // * SpillAreaSizeBytes: areas 2 - 6	728 // * SpillAreaSizeBytes: areas 2 - 6, and 9

	729 // * MaxOutArgsSizeBytes: area 9

	730 //

681 // Determine stack frame offsets for each Variable without a register	731 // Determine stack frame offsets for each Variable without a register

682 // assignment. This can be done as one variable per stack slot. Or, do	732 // assignment. This can be done as one variable per stack slot. Or, do

683 // coalescing by running the register allocator again with an infinite set of	733 // coalescing by running the register allocator again with an infinite set of

684 // registers (as a side effect, this gives variables a second chance at	734 // registers (as a side effect, this gives variables a second chance at

685 // physical register assignment).	735 // physical register assignment).

686 //	736 //

687 // A middle ground approach is to leverage sparsity and allocate one block of	737 // A middle ground approach is to leverage sparsity and allocate one block of

688 // space on the frame for globals (variables with multi-block lifetime), and	738 // space on the frame for globals (variables with multi-block lifetime), and

689 // one block to share for locals (single-block lifetime).	739 // one block to share for locals (single-block lifetime).

690	740

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
776 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);	826 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

777 uint32_t SpillAreaPaddingBytes = 0;	827 uint32_t SpillAreaPaddingBytes = 0;

778 uint32_t LocalsSlotsPaddingBytes = 0;	828 uint32_t LocalsSlotsPaddingBytes = 0;

779 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,	829 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,

780 GlobalsSize, LocalsSlotsAlignmentBytes,	830 GlobalsSize, LocalsSlotsAlignmentBytes,

781 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);	831 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);

782 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;	832 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

783 uint32_t GlobalsAndSubsequentPaddingSize =	833 uint32_t GlobalsAndSubsequentPaddingSize =

784 GlobalsSize + LocalsSlotsPaddingBytes;	834 GlobalsSize + LocalsSlotsPaddingBytes;

785	835

786 // Align SP if necessary.	836 // Adds the out args space to the stack, and align SP if necessary.

787 if (NeedsStackAlignment) {	837 if (!NeedsStackAlignment) {

	838 SpillAreaSizeBytes += MaxOutArgsSizeBytes;

	839 } else {

788 uint32_t StackOffset = PreservedRegsSizeBytes;	840 uint32_t StackOffset = PreservedRegsSizeBytes;

789 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);	841 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

	842 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);

790 SpillAreaSizeBytes = StackSize - StackOffset;	843 SpillAreaSizeBytes = StackSize - StackOffset;

791 }	844 }

792	845

793 // Generate "sub sp, SpillAreaSizeBytes"	846 // Generate "sub sp, SpillAreaSizeBytes"

794 if (SpillAreaSizeBytes) {	847 if (SpillAreaSizeBytes) {

795 // Use the scratch register if needed to legalize the immediate.	848 // Use the scratch register if needed to legalize the immediate.

796 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),	849 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),

797 Legal_Reg \| Legal_Flex, getReservedTmpReg());	850 Legal_Reg \| Legal_Flex, getReservedTmpReg());

798 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);	851 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

799 _sub(SP, SP, SubAmount);	852 _sub(SP, SP, SubAmount);

800 }	853 }

801 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);	854 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

802	855

803 resetStackAdjustment();

804

805 // Fill in stack offsets for stack args, and copy args into registers for	856 // Fill in stack offsets for stack args, and copy args into registers for

806 // those that were register-allocated. Args are pushed right to left, so	857 // those that were register-allocated. Args are pushed right to left, so

807 // Arg[0] is closest to the stack/frame pointer.	858 // Arg[0] is closest to the stack/frame pointer.

808 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	859 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

809 size_t BasicFrameOffset = PreservedRegsSizeBytes;	860 size_t BasicFrameOffset = PreservedRegsSizeBytes;

810 if (!UsesFramePointer)	861 if (!UsesFramePointer)

811 BasicFrameOffset += SpillAreaSizeBytes;	862 BasicFrameOffset += SpillAreaSizeBytes;

812	863

813 const VarList &Args = Func->getArgs();	864 const VarList &Args = Func->getArgs();

814 size_t InArgsSizeBytes = 0;	865 size_t InArgsSizeBytes = 0;

(...skipping 23 matching lines...) Expand all Loading...
838 UsesFramePointer);	889 UsesFramePointer);

839 this->HasComputedFrame = true;	890 this->HasComputedFrame = true;

840	891

841 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {	892 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {

842 OstreamLocker _(Func->getContext());	893 OstreamLocker _(Func->getContext());

843 Ostream &Str = Func->getContext()->getStrDump();	894 Ostream &Str = Func->getContext()->getStrDump();

844	895

845 Str << "Stack layout:\n";	896 Str << "Stack layout:\n";

846 uint32_t SPAdjustmentPaddingSize =	897 uint32_t SPAdjustmentPaddingSize =

847 SpillAreaSizeBytes - LocalsSpillAreaSize -	898 SpillAreaSizeBytes - LocalsSpillAreaSize -

848 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;	899 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -

	900 MaxOutArgsSizeBytes;

849 Str << " in-args = " << InArgsSizeBytes << " bytes\n"	901 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

850 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"	902 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

851 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"	903 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

852 << " globals spill area = " << GlobalsSize << " bytes\n"	904 << " globals spill area = " << GlobalsSize << " bytes\n"

853 << " globals-locals spill areas intermediate padding = "	905 << " globals-locals spill areas intermediate padding = "

854 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"	906 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

855 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"	907 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

856 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";	908 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";

857	909

858 Str << "Stack details:\n"	910 Str << "Stack details:\n"

859 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"	911 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"

860 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"	912 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

	913 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"

861 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes	914 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

862 << " bytes\n"	915 << " bytes\n"

863 << " is FP based = " << UsesFramePointer << "\n";	916 << " is FP based = " << UsesFramePointer << "\n";

864 }	917 }

865 }	918 }

866	919

867 void TargetARM32::addEpilog(CfgNode *Node) {	920 void TargetARM32::addEpilog(CfgNode *Node) {

868 InstList &Insts = Node->getInsts();	921 InstList &Insts = Node->getInsts();

869 InstList::reverse_iterator RI, E;	922 InstList::reverse_iterator RI, E;

870 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {	923 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
947 _ret(LR, RetValue);	1000 _ret(LR, RetValue);

948 _bundle_unlock();	1001 _bundle_unlock();

949 RI->setDeleted();	1002 RI->setDeleted();

950 }	1003 }

951	1004

952 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {	1005 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {

953 constexpr bool ZeroExt = false;	1006 constexpr bool ZeroExt = false;

954 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);	1007 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);

955 }	1008 }

956	1009

957 Variable *TargetARM32::newBaseRegister(int32_t OriginalOffset,	1010 Variable TargetARM32::newBaseRegister(int32_t Offset, Variable OrigBaseReg) {

958 int32_t StackAdjust,

959 Variable *OrigBaseReg) {

960 int32_t Offset = OriginalOffset + StackAdjust;

961 // Legalize will likely need a movw/movt combination, but if the top bits are	1011 // Legalize will likely need a movw/movt combination, but if the top bits are

962 // all 0 from negating the offset and subtracting, we could use that instead.	1012 // all 0 from negating the offset and subtracting, we could use that instead.

963 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;	1013 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;

964 if (ShouldSub)	1014 if (ShouldSub)

965 Offset = -Offset;	1015 Offset = -Offset;

966 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),	1016 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),

967 Legal_Reg \| Legal_Flex, getReservedTmpReg());	1017 Legal_Reg \| Legal_Flex, getReservedTmpReg());

968 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());	1018 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());

969 if (ShouldSub)	1019 if (ShouldSub)

970 _sub(ScratchReg, OrigBaseReg, OffsetVal);	1020 _sub(ScratchReg, OrigBaseReg, OffsetVal);

971 else	1021 else

972 _add(ScratchReg, OrigBaseReg, OffsetVal);	1022 _add(ScratchReg, OrigBaseReg, OffsetVal);

973 return ScratchReg;	1023 return ScratchReg;

974 }	1024 }

975	1025

976 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,	1026 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,

977 int32_t StackAdjust,

978 Variable *OrigBaseReg,	1027 Variable *OrigBaseReg,

979 Variable **NewBaseReg,	1028 Variable **NewBaseReg,

980 int32_t *NewBaseOffset) {	1029 int32_t *NewBaseOffset) {

981 if (isLegalMemOffset(Ty, Offset + StackAdjust)) {	1030 if (isLegalMemOffset(Ty, Offset)) {

982 return OperandARM32Mem::create(	1031 return OperandARM32Mem::create(

983 Func, Ty, OrigBaseReg, llvm::cast<ConstantInteger32>(	1032 Func, Ty, OrigBaseReg,

984 Ctx->getConstantInt32(Offset + StackAdjust)),	1033 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)),

985 OperandARM32Mem::Offset);	1034 OperandARM32Mem::Offset);

986 }	1035 }

987	1036

988 if (*NewBaseReg == nullptr) {	1037 if (*NewBaseReg == nullptr) {

989 *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg);	1038 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);

990 *NewBaseOffset = Offset + StackAdjust;	1039 *NewBaseOffset = Offset;

991 }	1040 }

992	1041

993 int32_t OffsetDiff = Offset + StackAdjust - *NewBaseOffset;	1042 int32_t OffsetDiff = Offset - *NewBaseOffset;

994 if (!isLegalMemOffset(Ty, OffsetDiff)) {	1043 if (!isLegalMemOffset(Ty, OffsetDiff)) {

995 *NewBaseReg = newBaseRegister(Offset, StackAdjust, OrigBaseReg);	1044 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);

996 *NewBaseOffset = Offset + StackAdjust;	1045 *NewBaseOffset = Offset;

997 OffsetDiff = 0;	1046 OffsetDiff = 0;

998 }	1047 }

999	1048

1000 return OperandARM32Mem::create(	1049 return OperandARM32Mem::create(

1001 Func, Ty, *NewBaseReg,	1050 Func, Ty, *NewBaseReg,

1002 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),	1051 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),

1003 OperandARM32Mem::Offset);	1052 OperandARM32Mem::Offset);

1004 }	1053 }

1005	1054

1006 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, int32_t StackAdjust,	1055 void TargetARM32::legalizeMov(InstARM32Mov MovInstr, Variable OrigBaseReg,

1007 Variable OrigBaseReg, Variable *NewBaseReg,	1056 Variable *NewBaseReg, int32_t NewBaseOffset) {

1008 int32_t *NewBaseOffset) {

1009 Variable *Dest = MovInstr->getDest();	1057 Variable *Dest = MovInstr->getDest();

1010 assert(Dest != nullptr);	1058 assert(Dest != nullptr);

1011 Type DestTy = Dest->getType();	1059 Type DestTy = Dest->getType();

1012 assert(DestTy != IceType_i64);	1060 assert(DestTy != IceType_i64);

1013	1061

1014 Operand *Src = MovInstr->getSrc(0);	1062 Operand *Src = MovInstr->getSrc(0);

1015 Type SrcTy = Src->getType();	1063 Type SrcTy = Src->getType();

1016 (void)SrcTy;	1064 (void)SrcTy;

1017 assert(SrcTy != IceType_i64);	1065 assert(SrcTy != IceType_i64);

1018	1066

1019 if (MovInstr->isMultiDest() \|\| MovInstr->isMultiSource())	1067 if (MovInstr->isMultiDest() \|\| MovInstr->isMultiSource())

1020 return;	1068 return;

1021	1069

1022 bool Legalized = false;	1070 bool Legalized = false;

1023 if (!Dest->hasReg()) {	1071 if (!Dest->hasReg()) {

1024 auto *const SrcR = llvm::cast<Variable>(Src);	1072 auto *const SrcR = llvm::cast<Variable>(Src);

1025 assert(SrcR->hasReg());	1073 assert(SrcR->hasReg());

1026 const int32_t Offset = Dest->getStackOffset();	1074 const int32_t Offset = Dest->getStackOffset();

1027 // This is a _mov(Mem(), Variable), i.e., a store.	1075 // This is a _mov(Mem(), Variable), i.e., a store.

1028 _str(SrcR, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg,	1076 _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,

1029 NewBaseReg, NewBaseOffset),	1077 NewBaseOffset),

1030 MovInstr->getPredicate());	1078 MovInstr->getPredicate());

1031 // _str() does not have a Dest, so we add a fake-def(Dest).	1079 // _str() does not have a Dest, so we add a fake-def(Dest).

1032 Context.insert(InstFakeDef::create(Func, Dest));	1080 Context.insert(InstFakeDef::create(Func, Dest));

1033 Legalized = true;	1081 Legalized = true;

1034 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {	1082 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {

1035 if (!Var->hasReg()) {	1083 if (!Var->hasReg()) {

1036 const int32_t Offset = Var->getStackOffset();	1084 const int32_t Offset = Var->getStackOffset();

1037 _ldr(Dest, createMemOperand(DestTy, Offset, StackAdjust, OrigBaseReg,	1085 _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,

1038 NewBaseReg, NewBaseOffset),	1086 NewBaseOffset),

1039 MovInstr->getPredicate());	1087 MovInstr->getPredicate());

1040 Legalized = true;	1088 Legalized = true;

1041 }	1089 }

1042 }	1090 }

1043	1091

1044 if (Legalized) {	1092 if (Legalized) {

1045 if (MovInstr->isDestRedefined()) {	1093 if (MovInstr->isDestRedefined()) {

1046 _set_dest_redefined();	1094 _set_dest_redefined();

1047 }	1095 }

1048 MovInstr->setDeleted();	1096 MovInstr->setDeleted();

1049 }	1097 }

1050 }	1098 }

1051	1099

1052 void TargetARM32::legalizeStackSlots() {	1100 void TargetARM32::legalizeStackSlots() {

1053 // If a stack variable's frame offset doesn't fit, convert from:	1101 // If a stack variable's frame offset doesn't fit, convert from:

1054 // ldr X, OFF[SP]	1102 // ldr X, OFF[SP]

1055 // to:	1103 // to:

1056 // movw/movt TMP, OFF_PART	1104 // movw/movt TMP, OFF_PART

1057 // add TMP, TMP, SP	1105 // add TMP, TMP, SP

1058 // ldr X, OFF_MORE[TMP]	1106 // ldr X, OFF_MORE[TMP]

1059 //	1107 //

1060 // This is safe because we have reserved TMP, and add for ARM does not	1108 // This is safe because we have reserved TMP, and add for ARM does not

1061 // clobber the flags register.	1109 // clobber the flags register.

1062 Func->dump("Before legalizeStackSlots");	1110 Func->dump("Before legalizeStackSlots");

1063 assert(hasComputedFrame());	1111 assert(hasComputedFrame());

1064 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());	1112 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());

1065 int32_t StackAdjust = 0;

1066 // Do a fairly naive greedy clustering for now. Pick the first stack slot	1113 // Do a fairly naive greedy clustering for now. Pick the first stack slot

1067 // that's out of bounds and make a new base reg using the architecture's temp	1114 // that's out of bounds and make a new base reg using the architecture's temp

1068 // register. If that works for the next slot, then great. Otherwise, create a	1115 // register. If that works for the next slot, then great. Otherwise, create a

1069 // new base register, clobbering the previous base register. Never share a	1116 // new base register, clobbering the previous base register. Never share a

1070 // base reg across different basic blocks. This isn't ideal if local and	1117 // base reg across different basic blocks. This isn't ideal if local and

1071 // multi-block variables are far apart and their references are interspersed.	1118 // multi-block variables are far apart and their references are interspersed.

1072 // It may help to be more coordinated about assign stack slot numbers and may	1119 // It may help to be more coordinated about assign stack slot numbers and may

1073 // help to assign smaller offsets to higher-weight variables so that they	1120 // help to assign smaller offsets to higher-weight variables so that they

1074 // don't depend on this legalization.	1121 // don't depend on this legalization.

1075 for (CfgNode *Node : Func->getNodes()) {	1122 for (CfgNode *Node : Func->getNodes()) {

1076 Context.init(Node);	1123 Context.init(Node);

1077 Variable *NewBaseReg = nullptr;	1124 Variable *NewBaseReg = nullptr;

1078 int32_t NewBaseOffset = 0;	1125 int32_t NewBaseOffset = 0;

1079 while (!Context.atEnd()) {	1126 while (!Context.atEnd()) {

1080 PostIncrLoweringContext PostIncrement(Context);	1127 PostIncrLoweringContext PostIncrement(Context);

1081 Inst *CurInstr = Context.getCur();	1128 Inst *CurInstr = Context.getCur();

1082 Variable *Dest = CurInstr->getDest();	1129 Variable *Dest = CurInstr->getDest();

1083	1130

1084 // Check if the previous NewBaseReg is clobbered, and reset if needed.	1131 // Check if the previous NewBaseReg is clobbered, and reset if needed.

1085 if ((Dest && NewBaseReg && Dest->hasReg() &&	1132 if ((Dest && NewBaseReg && Dest->hasReg() &&

1086 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) \|\|	1133 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) \|\|

1087 llvm::isa<InstFakeKill>(CurInstr)) {	1134 llvm::isa<InstFakeKill>(CurInstr)) {

1088 NewBaseReg = nullptr;	1135 NewBaseReg = nullptr;

1089 NewBaseOffset = 0;	1136 NewBaseOffset = 0;

1090 }	1137 }

1091	1138

1092 // The stack adjustment only matters if we are using SP instead of FP.

1093 if (!hasFramePointer()) {

1094 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {

1095 StackAdjust += AdjInst->getAmount();

1096 NewBaseOffset += AdjInst->getAmount();

1097 continue;

1098 }

1099 if (llvm::isa<InstARM32Call>(CurInstr)) {

1100 NewBaseOffset -= StackAdjust;

1101 StackAdjust = 0;

1102 continue;

1103 }

1104 }

1105

1106 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {	1139 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {

1107 legalizeMov(MovInstr, StackAdjust, OrigBaseReg, &NewBaseReg,	1140 legalizeMov(MovInstr, OrigBaseReg, &NewBaseReg, &NewBaseOffset);

1108 &NewBaseOffset);

1109 }	1141 }

1110 }	1142 }

1111 }	1143 }

1112 }	1144 }

1113	1145

1114 Operand TargetARM32::loOperand(Operand Operand) {	1146 Operand TargetARM32::loOperand(Operand Operand) {

1115 assert(Operand->getType() == IceType_i64);	1147 assert(Operand->getType() == IceType_i64);

1116 if (Operand->getType() != IceType_i64)	1148 if (Operand->getType() != IceType_i64)

1117 return Operand;	1149 return Operand;

1118 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))	1150 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))

(...skipping 141 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1260 // Non-constant sizes need to be adjusted to the next highest multiple of	1292 // Non-constant sizes need to be adjusted to the next highest multiple of

1261 // the required alignment at runtime.	1293 // the required alignment at runtime.

1262 TotalSize = legalize(TotalSize, Legal_Reg \| Legal_Flex);	1294 TotalSize = legalize(TotalSize, Legal_Reg \| Legal_Flex);

1263 Variable *T = makeReg(IceType_i32);	1295 Variable *T = makeReg(IceType_i32);

1264 _mov(T, TotalSize);	1296 _mov(T, TotalSize);

1265 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));	1297 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));

1266 _add(T, T, AddAmount);	1298 _add(T, T, AddAmount);

1267 alignRegisterPow2(T, Alignment);	1299 alignRegisterPow2(T, Alignment);

1268 _sub(SP, SP, T);	1300 _sub(SP, SP, T);

1269 }	1301 }

1270 _mov(Dest, SP);	1302 Variable *T = SP;

	1303 if (MaxOutArgsSizeBytes != 0) {

	1304 T = makeReg(getPointerType());

	1305 Operand *OutArgsSizeRF = legalize(

	1306 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg \| Legal_Flex);

	1307 _add(T, SP, OutArgsSizeRF);

	1308 }

	1309 _mov(Dest, T);

1271 }	1310 }

1272	1311

1273 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {	1312 void TargetARM32::div0Check(Type Ty, Operand SrcLo, Operand SrcHi) {

1274 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))	1313 if (isGuaranteedNonzeroInt(SrcLo) \|\| isGuaranteedNonzeroInt(SrcHi))

1275 return;	1314 return;

1276 Variable *SrcLoReg = legalizeToReg(SrcLo);	1315 Variable *SrcLoReg = legalizeToReg(SrcLo);

1277 switch (Ty) {	1316 switch (Ty) {

1278 default:	1317 default:

1279 llvm::report_fatal_error("Unexpected type");	1318 llvm::report_fatal_error("Unexpected type");

1280 case IceType_i8:	1319 case IceType_i8:

(...skipping 803 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2084 }	2123 }

2085 case InstArithmetic::Xor: {	2124 case InstArithmetic::Xor: {

2086 Variable *Src0R = Srcs.src0R(this);	2125 Variable *Src0R = Srcs.src0R(this);

2087 Operand *Src1RF = Srcs.src1RF(this);	2126 Operand *Src1RF = Srcs.src1RF(this);

2088 _eor(T, Src0R, Src1RF);	2127 _eor(T, Src0R, Src1RF);

2089 _mov(Dest, T);	2128 _mov(Dest, T);

2090 return;	2129 return;

2091 }	2130 }

2092 case InstArithmetic::Sub: {	2131 case InstArithmetic::Sub: {

2093 if (Srcs.hasConstOperand()) {	2132 if (Srcs.hasConstOperand()) {

	2133 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed

	2134 // to be used.

2094 Variable *Src0R = Srcs.src0R(this);	2135 Variable *Src0R = Srcs.src0R(this);

2095 if (Srcs.immediateIsFlexEncodable()) {	2136 if (Srcs.immediateIsFlexEncodable()) {

2096 Operand *Src1RF = Srcs.src1RF(this);	2137 Operand *Src1RF = Srcs.src1RF(this);

2097 if (Srcs.swappedOperands()) {	2138 if (Srcs.swappedOperands()) {

2098 _rsb(T, Src0R, Src1RF);	2139 _rsb(T, Src0R, Src1RF);

2099 } else {	2140 } else {

2100 _sub(T, Src0R, Src1RF);	2141 _sub(T, Src0R, Src1RF);

2101 }	2142 }

2102 _mov(Dest, T);	2143 _mov(Dest, T);

2103 return;	2144 return;

(...skipping 233 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2337	2378

2338 // Assign arguments to registers and stack. Also reserve stack.	2379 // Assign arguments to registers and stack. Also reserve stack.

2339 TargetARM32::CallingConv CC;	2380 TargetARM32::CallingConv CC;

2340 // Pair of Arg Operand -> GPR number assignments.	2381 // Pair of Arg Operand -> GPR number assignments.

2341 llvm::SmallVector<std::pair<Operand *, int32_t>,	2382 llvm::SmallVector<std::pair<Operand *, int32_t>,

2342 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;	2383 TargetARM32::CallingConv::ARM32_MAX_GPR_ARG> GPRArgs;

2343 llvm::SmallVector<std::pair<Operand *, int32_t>,	2384 llvm::SmallVector<std::pair<Operand *, int32_t>,

2344 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;	2385 TargetARM32::CallingConv::ARM32_MAX_FP_REG_UNITS> FPArgs;

2345 // Pair of Arg Operand -> stack offset.	2386 // Pair of Arg Operand -> stack offset.

2346 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;	2387 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;

2347 int32_t ParameterAreaSizeBytes = 0;	2388 size_t ParameterAreaSizeBytes = 0;

2348	2389

2349 // Classify each argument operand according to the location where the	2390 // Classify each argument operand according to the location where the

2350 // argument is passed.	2391 // argument is passed.

2351 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {	2392 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

2352 Operand *Arg = legalizeUndef(Instr->getArg(i));	2393 Operand *Arg = legalizeUndef(Instr->getArg(i));

2353 Type Ty = Arg->getType();	2394 Type Ty = Arg->getType();

2354 bool InRegs = false;	2395 bool InRegs = false;

2355 if (Ty == IceType_i64) {	2396 if (Ty == IceType_i64) {

2356 std::pair<int32_t, int32_t> Regs;	2397 std::pair<int32_t, int32_t> Regs;

2357 if (CC.I64InRegs(&Regs)) {	2398 if (CC.I64InRegs(&Regs)) {

(...skipping 23 matching lines...) Expand all Loading...
2381 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);	2422 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);

2382 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));	2423 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));

2383 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);	2424 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);

2384 }	2425 }

2385 }	2426 }

2386	2427

2387 // Adjust the parameter area so that the stack is aligned. It is assumed that	2428 // Adjust the parameter area so that the stack is aligned. It is assumed that

2388 // the stack is already aligned at the start of the calling sequence.	2429 // the stack is already aligned at the start of the calling sequence.

2389 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	2430 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

2390	2431

2391 // Subtract the appropriate amount for the argument area. This also takes	2432 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {

2392 // care of setting the stack adjustment during emission.	2433 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");

2393 //

2394 // TODO: If for some reason the call instruction gets dead-code eliminated

2395 // after lowering, we would need to ensure that the pre-call and the

2396 // post-call esp adjustment get eliminated as well.

2397 if (ParameterAreaSizeBytes) {

2398 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),

2399 Legal_Reg \| Legal_Flex);

2400 _adjust_stack(ParameterAreaSizeBytes, SubAmount);

2401 }	2434 }

2402	2435

2403 // Copy arguments that are passed on the stack to the appropriate stack	2436 // Copy arguments that are passed on the stack to the appropriate stack

2404 // locations.	2437 // locations.

2405 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);	2438 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

2406 for (auto &StackArg : StackArgs) {	2439 for (auto &StackArg : StackArgs) {

2407 ConstantInteger32 *Loc =	2440 ConstantInteger32 *Loc =

2408 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));	2441 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));

2409 Type Ty = StackArg.first->getType();	2442 Type Ty = StackArg.first->getType();

2410 OperandARM32Mem *Addr;	2443 OperandARM32Mem *Addr;

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2483 // Generate a FakeUse of register arguments so that they do not get dead	2516 // Generate a FakeUse of register arguments so that they do not get dead

2484 // code eliminated as a result of the FakeKill of scratch registers after	2517 // code eliminated as a result of the FakeKill of scratch registers after

2485 // the call.	2518 // the call.

2486 Context.insert(InstFakeUse::create(Func, Reg));	2519 Context.insert(InstFakeUse::create(Func, Reg));

2487 }	2520 }

2488 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);	2521 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);

2489 Context.insert(NewCall);	2522 Context.insert(NewCall);

2490 if (ReturnRegHi)	2523 if (ReturnRegHi)

2491 Context.insert(InstFakeDef::create(Func, ReturnRegHi));	2524 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

2492	2525

2493 // Add the appropriate offset to SP. The call instruction takes care of

2494 // resetting the stack offset during emission.

2495 if (ParameterAreaSizeBytes) {

2496 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),

2497 Legal_Reg \| Legal_Flex);

2498 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

2499 _add(SP, SP, AddAmount);

2500 }

2501

2502 // Insert a register-kill pseudo instruction.	2526 // Insert a register-kill pseudo instruction.

2503 Context.insert(InstFakeKill::create(Func, NewCall));	2527 Context.insert(InstFakeKill::create(Func, NewCall));

2504	2528

2505 // Generate a FakeUse to keep the call live if necessary.	2529 // Generate a FakeUse to keep the call live if necessary.

2506 if (Instr->hasSideEffects() && ReturnReg) {	2530 if (Instr->hasSideEffects() && ReturnReg) {

2507 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);	2531 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

2508 Context.insert(FakeUse);	2532 Context.insert(FakeUse);

2509 }	2533 }

2510	2534

2511 if (!Dest)	2535 if (!Dest)

(...skipping 2869 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5381 // Technically R9 is used for TLS with Sandboxing, and we reserve it.	5405 // Technically R9 is used for TLS with Sandboxing, and we reserve it.

5382 // However, for compatibility with current NaCl LLVM, don't claim that.	5406 // However, for compatibility with current NaCl LLVM, don't claim that.

5383 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";	5407 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";

5384 }	5408 }

5385	5409

5386 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];	5410 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];

5387 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];	5411 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];

5388 llvm::SmallBitVector TargetARM32::ScratchRegs;	5412 llvm::SmallBitVector TargetARM32::ScratchRegs;

5389	5413

5390 } // end of namespace Ice	5414 } // end of namespace Ice

OLD	NEW

« src/IceTargetLoweringARM32.h ('K') | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('j') | no next file with comments »