OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); | 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); |
46 } | 46 } |
47 | 47 |
48 std::unique_ptr<::Ice::TargetHeaderLowering> | 48 std::unique_ptr<::Ice::TargetHeaderLowering> |
49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { | 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { |
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); | 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); |
51 } | 51 } |
52 | 52 |
53 void staticInit(::Ice::GlobalContext *Ctx) { | 53 void staticInit(::Ice::GlobalContext *Ctx) { |
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); | 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); |
55 if (Ctx->getFlags().getUseNonsfi()) { | |
56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing | |
57 // globals. The GOT is an external symbol (i.e., it is not defined in the | |
58 // pexe) so we need to register it as such so that ELF emission won't barf | |
59 // on an "unknown" symbol. The GOT is added to the External symbols list | |
60 // here because staticInit() is invoked in a single-thread context. | |
61 Ctx->getConstantExternSym(::Ice::GlobalOffsetTable); | |
62 } | |
55 } | 63 } |
56 | 64 |
57 } // end of namespace ARM32 | 65 } // end of namespace ARM32 |
58 | 66 |
59 namespace Ice { | 67 namespace Ice { |
60 namespace ARM32 { | 68 namespace ARM32 { |
61 | 69 |
62 namespace { | 70 namespace { |
63 | 71 |
64 /// SizeOf is used to obtain the size of an initializer list as a constexpr | 72 /// SizeOf is used to obtain the size of an initializer list as a constexpr |
(...skipping 615 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
680 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); | 688 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); |
681 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 689 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, |
682 NoTailCall, IsTargetHelperCall); | 690 NoTailCall, IsTargetHelperCall); |
683 Call->addArg(IntrinsicCall->getArg(0)); | 691 Call->addArg(IntrinsicCall->getArg(0)); |
684 Call->addArg(ValExt); | 692 Call->addArg(ValExt); |
685 Call->addArg(IntrinsicCall->getArg(2)); | 693 Call->addArg(IntrinsicCall->getArg(2)); |
686 Instr->setDeleted(); | 694 Instr->setDeleted(); |
687 return; | 695 return; |
688 } | 696 } |
689 case Intrinsics::NaClReadTP: { | 697 case Intrinsics::NaClReadTP: { |
690 if (NeedSandboxing) { | 698 if (SandboxingType == ST_NaCl) { |
691 return; | 699 return; |
692 } | 700 } |
693 static constexpr SizeT MaxArgs = 0; | 701 static constexpr SizeT MaxArgs = 0; |
694 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); | 702 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); |
695 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, | 703 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, |
696 IsTargetHelperCall); | 704 IsTargetHelperCall); |
697 Instr->setDeleted(); | 705 Instr->setDeleted(); |
698 return; | 706 return; |
699 } | 707 } |
700 case Intrinsics::Setjmp: { | 708 case Intrinsics::Setjmp: { |
(...skipping 22 matching lines...) Expand all Loading... | |
723 PostIncrLoweringContext PostIncrement(Context); | 731 PostIncrLoweringContext PostIncrement(Context); |
724 Inst *CurInstr = Context.getCur(); | 732 Inst *CurInstr = Context.getCur(); |
725 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 733 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
726 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 734 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
727 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 735 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
728 } | 736 } |
729 } | 737 } |
730 } | 738 } |
731 } | 739 } |
732 | 740 |
741 void TargetARM32::createGotPtr() { | |
742 if (SandboxingType != ST_Nonsfi) { | |
743 return; | |
744 } | |
745 GotPtr = Func->makeVariable(IceType_i32); | |
746 } | |
747 | |
748 void TargetARM32::initGotPtr() { | |
749 if (SandboxingType != ST_Nonsfi) { | |
750 return; | |
751 } | |
752 assert(GotPtr != nullptr); | |
753 // TODO(jpp): explain fake def. | |
Jim Stichnoth
2016/02/10 06:36:00
yes, I'm curious...
edit: I see, reading ahead in
John
2016/02/10 15:41:13
And I missed this comment. :-/
I wanted to leave
| |
754 Variable *T = makeReg(IceType_i32); | |
755 Context.insert<InstFakeDef>(T); | |
756 Context.insert<InstFakeDef>(GotPtr, T); | |
757 } | |
758 | |
759 IceString TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) { | |
760 const IceString CRName = CR->getName(); | |
Jim Stichnoth
2016/02/10 06:36:00
maybe const IceString &CRName
John
2016/02/10 15:41:14
In general I don't like saving a return value by r
| |
761 const IceString CRGotoffName = | |
762 "GOTOFF$" + Func->getFunctionName() + "$" + CRName; | |
763 if (KnownGotoffs.count(CRGotoffName) == 0) { | |
764 auto *Global = VariableDeclaration::create(Ctx); | |
765 Global->setIsConstant(true); | |
766 Global->setName(CRName); | |
767 Global->setSuppressMangling(); | |
768 | |
769 auto *Gotoff = VariableDeclaration::create(Ctx); | |
770 constexpr auto GotFixup = R_ARM_GOTOFF32; | |
771 Gotoff->setIsConstant(true); | |
772 Gotoff->setName(CRGotoffName); | |
773 Gotoff->setSuppressMangling(); | |
774 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create( | |
775 Global, {RelocOffset::create(Ctx, 0)}, GotFixup)); | |
776 Func->addGlobal(Gotoff); | |
777 KnownGotoffs.emplace(CRGotoffName); | |
778 } | |
779 return CRGotoffName; | |
780 } | |
781 | |
782 void TargetARM32::materializeGotAddr(CfgNode *Node) { | |
783 if (SandboxingType != ST_Nonsfi) { | |
784 return; | |
785 } | |
786 | |
787 // At first, we try to find the | |
788 // GotPtr = def T | |
789 // pseudo-instruction that we placed for defining the got ptr. That | |
790 // instruction is not just a place-holder for defining the GotPtr (thus | |
791 // keeping liveness consistent), but it is also located at a point where it is | |
792 // safe to materialize the got addr -- i.e., before loading parameters to | |
793 // registers, but after moving register parameters from their home location. | |
794 InstFakeDef *DefGotPtr = nullptr; | |
795 for (auto &Inst : Node->getInsts()) { | |
796 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst); | |
797 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) { | |
798 DefGotPtr = FakeDef; | |
799 break; | |
800 } | |
801 } | |
802 | |
803 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) { | |
804 return; | |
805 } | |
806 | |
807 // The got addr needs to be materialized at the same point where DefGotPtr | |
808 // lives. | |
809 Context.setInsertPoint(DefGotPtr); | |
810 assert(DefGotPtr->getSrcSize() == 1); | |
811 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0)); | |
812 loadNamedConstantRelocatablePIC(GlobalOffsetTable, T, | |
813 [this, T](Variable *PC) { _add(T, PC, T); }); | |
814 _mov(GotPtr, T); | |
815 DefGotPtr->setDeleted(); | |
816 } | |
817 | |
818 void TargetARM32::loadNamedConstantRelocatablePIC( | |
819 const IceString &Name, Variable *Register, | |
820 std::function<void(Variable *PC)> Finish, bool SuppressMangling) { | |
821 assert(SandboxingType == ST_Nonsfi); | |
822 // We makeReg() here instead of getPhysicalRegister() because the latter ends | |
823 // up creating multi-blocks temporaries that liveness fails to validate. | |
824 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); | |
825 | |
826 auto *AddPcReloc = RelocOffset::create(Ctx); | |
827 AddPcReloc->setSubtract(true); | |
828 auto *AddPcLabel = InstARM32Label::create(Func, this); | |
829 AddPcLabel->setRelocOffset(AddPcReloc); | |
830 | |
831 const IceString EmitText = Name; | |
832 // We need a -8 in the relocation expression to account for the pc's value | |
833 // read by the first instruction emitted in Finish(PC). | |
834 auto *Imm8 = RelocOffset::create(Ctx, -8); | |
835 | |
836 auto *MovwReloc = RelocOffset::create(Ctx); | |
837 auto *MovwLabel = InstARM32Label::create(Func, this); | |
838 MovwLabel->setRelocOffset(MovwReloc); | |
839 | |
840 auto *MovtReloc = RelocOffset::create(Ctx); | |
841 auto *MovtLabel = InstARM32Label::create(Func, this); | |
842 MovtLabel->setRelocOffset(MovtReloc); | |
843 | |
844 // The EmitString for these constant relocatables have hardcoded offsets | |
845 // attached to them. This could be dangerous if, e.g., we ever implemented | |
846 // instruction scheduling but llvm-mc currently does not support | |
847 // | |
848 // movw reg, #:lower16:(Symbol - Label - Number) | |
849 // movt reg, #:upper16:(Symbol - Label - Number) | |
850 // | |
851 // relocations. | |
852 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name, | |
853 EmitText + " -16", SuppressMangling); | |
854 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name, | |
855 EmitText + " -12", SuppressMangling); | |
856 | |
857 Context.insert(MovwLabel); | |
858 _movw(Register, CRLower); | |
859 Context.insert(MovtLabel); | |
860 _movt(Register, CRUpper); | |
861 // PC = fake-def to keep liveness consistent. | |
862 Context.insert<InstFakeDef>(PC); | |
863 Context.insert(AddPcLabel); | |
864 Finish(PC); | |
865 } | |
866 | |
733 void TargetARM32::translateO2() { | 867 void TargetARM32::translateO2() { |
734 TimerMarker T(TimerStack::TT_O2, Func); | 868 TimerMarker T(TimerStack::TT_O2, Func); |
735 | 869 |
736 // TODO(stichnot): share passes with X86? | 870 // TODO(stichnot): share passes with other targets? |
737 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 871 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
872 if (SandboxingType == ST_Nonsfi) { | |
873 createGotPtr(); | |
874 } | |
738 genTargetHelperCalls(); | 875 genTargetHelperCalls(); |
739 findMaxStackOutArgsSize(); | 876 findMaxStackOutArgsSize(); |
740 | 877 |
741 // Do not merge Alloca instructions, and lay out the stack. | 878 // Do not merge Alloca instructions, and lay out the stack. |
742 static constexpr bool SortAndCombineAllocas = true; | 879 static constexpr bool SortAndCombineAllocas = true; |
743 Func->processAllocas(SortAndCombineAllocas); | 880 Func->processAllocas(SortAndCombineAllocas); |
744 Func->dump("After Alloca processing"); | 881 Func->dump("After Alloca processing"); |
745 | 882 |
746 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 883 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
747 // Lower Phi instructions. | 884 // Lower Phi instructions. |
(...skipping 26 matching lines...) Expand all Loading... | |
774 return; | 911 return; |
775 | 912 |
776 // TODO: It should be sufficient to use the fastest liveness calculation, | 913 // TODO: It should be sufficient to use the fastest liveness calculation, |
777 // i.e. livenessLightweight(). However, for some reason that slows down the | 914 // i.e. livenessLightweight(). However, for some reason that slows down the |
778 // rest of the translation. Investigate. | 915 // rest of the translation. Investigate. |
779 Func->liveness(Liveness_Basic); | 916 Func->liveness(Liveness_Basic); |
780 if (Func->hasError()) | 917 if (Func->hasError()) |
781 return; | 918 return; |
782 Func->dump("After ARM32 address mode opt"); | 919 Func->dump("After ARM32 address mode opt"); |
783 | 920 |
921 if (SandboxingType == ST_Nonsfi) { | |
922 initGotPtr(); | |
923 } | |
784 Func->genCode(); | 924 Func->genCode(); |
785 if (Func->hasError()) | 925 if (Func->hasError()) |
786 return; | 926 return; |
787 Func->dump("After ARM32 codegen"); | 927 Func->dump("After ARM32 codegen"); |
788 | 928 |
789 // Register allocation. This requires instruction renumbering and full | 929 // Register allocation. This requires instruction renumbering and full |
790 // liveness analysis. | 930 // liveness analysis. |
791 Func->renumberInstructions(); | 931 Func->renumberInstructions(); |
792 if (Func->hasError()) | 932 if (Func->hasError()) |
793 return; | 933 return; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
838 | 978 |
839 // Nop insertion | 979 // Nop insertion |
840 if (Ctx->getFlags().shouldDoNopInsertion()) { | 980 if (Ctx->getFlags().shouldDoNopInsertion()) { |
841 Func->doNopInsertion(); | 981 Func->doNopInsertion(); |
842 } | 982 } |
843 } | 983 } |
844 | 984 |
845 void TargetARM32::translateOm1() { | 985 void TargetARM32::translateOm1() { |
846 TimerMarker T(TimerStack::TT_Om1, Func); | 986 TimerMarker T(TimerStack::TT_Om1, Func); |
847 | 987 |
848 // TODO: share passes with X86? | 988 // TODO(stichnot): share passes with other targets? |
989 if (SandboxingType == ST_Nonsfi) { | |
990 createGotPtr(); | |
991 } | |
992 | |
849 genTargetHelperCalls(); | 993 genTargetHelperCalls(); |
850 findMaxStackOutArgsSize(); | 994 findMaxStackOutArgsSize(); |
851 | 995 |
852 // Do not merge Alloca instructions, and lay out the stack. | 996 // Do not merge Alloca instructions, and lay out the stack. |
853 static constexpr bool DontSortAndCombineAllocas = false; | 997 static constexpr bool DontSortAndCombineAllocas = false; |
854 Func->processAllocas(DontSortAndCombineAllocas); | 998 Func->processAllocas(DontSortAndCombineAllocas); |
855 Func->dump("After Alloca processing"); | 999 Func->dump("After Alloca processing"); |
856 | 1000 |
857 Func->placePhiLoads(); | 1001 Func->placePhiLoads(); |
858 if (Func->hasError()) | 1002 if (Func->hasError()) |
859 return; | 1003 return; |
860 Func->placePhiStores(); | 1004 Func->placePhiStores(); |
861 if (Func->hasError()) | 1005 if (Func->hasError()) |
862 return; | 1006 return; |
863 Func->deletePhis(); | 1007 Func->deletePhis(); |
864 if (Func->hasError()) | 1008 if (Func->hasError()) |
865 return; | 1009 return; |
866 Func->dump("After Phi lowering"); | 1010 Func->dump("After Phi lowering"); |
867 | 1011 |
868 Func->doArgLowering(); | 1012 Func->doArgLowering(); |
869 | 1013 |
1014 if (SandboxingType == ST_Nonsfi) { | |
1015 initGotPtr(); | |
1016 } | |
870 Func->genCode(); | 1017 Func->genCode(); |
871 if (Func->hasError()) | 1018 if (Func->hasError()) |
872 return; | 1019 return; |
873 Func->dump("After initial ARM32 codegen"); | 1020 Func->dump("After initial ARM32 codegen"); |
874 | 1021 |
875 regAlloc(RAK_InfOnly); | 1022 regAlloc(RAK_InfOnly); |
876 if (Func->hasError()) | 1023 if (Func->hasError()) |
877 return; | 1024 return; |
878 | 1025 |
879 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | 1026 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1357 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1504 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
1358 | 1505 |
1359 // Fill in stack offsets for stack args, and copy args into registers for | 1506 // Fill in stack offsets for stack args, and copy args into registers for |
1360 // those that were register-allocated. Args are pushed right to left, so | 1507 // those that were register-allocated. Args are pushed right to left, so |
1361 // Arg[0] is closest to the stack/frame pointer. | 1508 // Arg[0] is closest to the stack/frame pointer. |
1362 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1509 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
1363 size_t BasicFrameOffset = PreservedRegsSizeBytes; | 1510 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
1364 if (!UsesFramePointer) | 1511 if (!UsesFramePointer) |
1365 BasicFrameOffset += SpillAreaSizeBytes; | 1512 BasicFrameOffset += SpillAreaSizeBytes; |
1366 | 1513 |
1514 materializeGotAddr(Node); | |
1515 | |
1367 const VarList &Args = Func->getArgs(); | 1516 const VarList &Args = Func->getArgs(); |
1368 size_t InArgsSizeBytes = 0; | 1517 size_t InArgsSizeBytes = 0; |
1369 TargetARM32::CallingConv CC; | 1518 TargetARM32::CallingConv CC; |
1370 for (Variable *Arg : Args) { | 1519 for (Variable *Arg : Args) { |
1371 int32_t DummyReg; | 1520 int32_t DummyReg; |
1372 const Type Ty = Arg->getType(); | 1521 const Type Ty = Arg->getType(); |
1373 | 1522 |
1374 // Skip arguments passed in registers. | 1523 // Skip arguments passed in registers. |
1375 if (isScalarIntegerType(Ty)) { | 1524 if (isScalarIntegerType(Ty)) { |
1376 if (CC.argInGPR(Ty, &DummyReg)) { | 1525 if (CC.argInGPR(Ty, &DummyReg)) { |
(...skipping 2103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3480 } | 3629 } |
3481 | 3630 |
3482 // Allow ConstantRelocatable to be left alone as a direct call, but force | 3631 // Allow ConstantRelocatable to be left alone as a direct call, but force |
3483 // other constants like ConstantInteger32 to be in a register and make it an | 3632 // other constants like ConstantInteger32 to be in a register and make it an |
3484 // indirect call. | 3633 // indirect call. |
3485 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { | 3634 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { |
3486 CallTarget = legalize(CallTarget, Legal_Reg); | 3635 CallTarget = legalize(CallTarget, Legal_Reg); |
3487 } | 3636 } |
3488 | 3637 |
3489 // Copy arguments to be passed in registers to the appropriate registers. | 3638 // Copy arguments to be passed in registers to the appropriate registers. |
3639 CfgVector<Variable *> RegArgs; | |
3490 for (auto &FPArg : FPArgs) { | 3640 for (auto &FPArg : FPArgs) { |
3491 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | 3641 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); |
3492 Context.insert<InstFakeUse>(Reg); | |
3493 } | 3642 } |
3494 for (auto &GPRArg : GPRArgs) { | 3643 for (auto &GPRArg : GPRArgs) { |
3495 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | 3644 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); |
3496 // Generate a FakeUse of register arguments so that they do not get dead | 3645 } |
3497 // code eliminated as a result of the FakeKill of scratch registers after | 3646 |
3498 // the call. | 3647 // Generate a FakeUse of register arguments so that they do not get dead code |
3499 Context.insert<InstFakeUse>(Reg); | 3648 // eliminated as a result of the FakeKill of scratch registers after the call. |
3649 // These fake-uses need to be placed here to avoid argument registers from | |
3650 // being used during the legalizeToReg() calls above. | |
3651 for (auto *RegArg : RegArgs) { | |
3652 Context.insert<InstFakeUse>(RegArg); | |
3500 } | 3653 } |
3501 | 3654 |
3502 InstARM32Call *NewCall = | 3655 InstARM32Call *NewCall = |
3503 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); | 3656 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); |
3504 | 3657 |
3505 if (ReturnRegHi) | 3658 if (ReturnRegHi) |
3506 Context.insert<InstFakeDef>(ReturnRegHi); | 3659 Context.insert<InstFakeDef>(ReturnRegHi); |
3507 | 3660 |
3508 // Insert a register-kill pseudo instruction. | 3661 // Insert a register-kill pseudo instruction. |
3509 Context.insert<InstFakeKill>(NewCall); | 3662 Context.insert<InstFakeKill>(NewCall); |
(...skipping 1324 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4834 case Intrinsics::Memcpy: { | 4987 case Intrinsics::Memcpy: { |
4835 llvm::report_fatal_error("memcpy should have been prelowered."); | 4988 llvm::report_fatal_error("memcpy should have been prelowered."); |
4836 } | 4989 } |
4837 case Intrinsics::Memmove: { | 4990 case Intrinsics::Memmove: { |
4838 llvm::report_fatal_error("memmove should have been prelowered."); | 4991 llvm::report_fatal_error("memmove should have been prelowered."); |
4839 } | 4992 } |
4840 case Intrinsics::Memset: { | 4993 case Intrinsics::Memset: { |
4841 llvm::report_fatal_error("memmove should have been prelowered."); | 4994 llvm::report_fatal_error("memmove should have been prelowered."); |
4842 } | 4995 } |
4843 case Intrinsics::NaClReadTP: { | 4996 case Intrinsics::NaClReadTP: { |
4844 if (!NeedSandboxing) { | 4997 if (SandboxingType != ST_NaCl) { |
4845 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); | 4998 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); |
4846 } | 4999 } |
4847 Variable *TP = legalizeToReg(OperandARM32Mem::create( | 5000 Variable *TP = legalizeToReg(OperandARM32Mem::create( |
4848 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), | 5001 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), |
4849 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); | 5002 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); |
4850 _mov(Dest, TP); | 5003 _mov(Dest, TP); |
4851 return; | 5004 return; |
4852 } | 5005 } |
4853 case Intrinsics::Setjmp: { | 5006 case Intrinsics::Setjmp: { |
4854 llvm::report_fatal_error("setjmp should have been prelowered."); | 5007 llvm::report_fatal_error("setjmp should have been prelowered."); |
(...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5494 _cmp(Src0Var, Value); | 5647 _cmp(Src0Var, Value); |
5495 _br(Instr->getLabel(I), CondARM32::EQ); | 5648 _br(Instr->getLabel(I), CondARM32::EQ); |
5496 } | 5649 } |
5497 _br(Instr->getLabelDefault()); | 5650 _br(Instr->getLabelDefault()); |
5498 } | 5651 } |
5499 | 5652 |
5500 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { | 5653 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { |
5501 _trap(); | 5654 _trap(); |
5502 } | 5655 } |
5503 | 5656 |
5657 namespace { | |
5658 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables, | |
5659 // and fp constants will need access to the GOT address. | |
5660 bool operandNeedsGot(const Operand *Opnd) { | |
5661 return llvm::isa<ConstantRelocatable>(Opnd) || | |
5662 llvm::isa<ConstantFloat>(Opnd) || llvm::isa<ConstantDouble>(Opnd); | |
Jim Stichnoth
2016/02/10 06:36:00
Can/should this be more precise? I thought that +
John
2016/02/10 15:41:14
You spotted my laziness. Done.
| |
5663 } | |
5664 | |
5665 // Returns whether Phi needs the GOT address (which it does if any of its | |
5666 // operands needs the GOT address.) | |
5667 bool phiNeedsGot(const InstPhi *Phi) { | |
5668 if (Phi->isDeleted()) { | |
5669 return false; | |
5670 } | |
5671 | |
5672 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) { | |
5673 if (operandNeedsGot(Phi->getSrc(I))) { | |
5674 return true; | |
5675 } | |
5676 } | |
5677 | |
5678 return false; | |
5679 } | |
5680 | |
5681 // Returns whether **any** phi in Node needs the GOT address. | |
5682 bool anyPhiInNodeNeedsGot(CfgNode *Node) { | |
5683 for (auto &Inst : Node->getPhis()) { | |
5684 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) { | |
5685 return true; | |
5686 } | |
5687 } | |
5688 return false; | |
5689 } | |
5690 | |
5691 } // end of anonymous namespace | |
5692 | |
5504 void TargetARM32::prelowerPhis() { | 5693 void TargetARM32::prelowerPhis() { |
5505 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); | 5694 CfgNode *Node = Context.getNode(); |
5695 | |
5696 if (SandboxingType == ST_Nonsfi) { | |
5697 assert(GotPtr != nullptr); | |
5698 if (anyPhiInNodeNeedsGot(Node)) { | |
5699 // If any phi instruction needs the GOT address, we place a | |
5700 // fake-use GotPtr | |
5701 // in Node to prevent the GotPtr's initialization from being dead code | |
5702 // eliminated. | |
5703 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr)); | |
5704 } | |
5705 } | |
5706 | |
5707 PhiLowering::prelowerPhis32Bit(this, Node, Func); | |
5506 } | 5708 } |
5507 | 5709 |
5508 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5710 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
5509 Variable *Reg = makeReg(Ty, RegNum); | 5711 Variable *Reg = makeReg(Ty, RegNum); |
5510 Context.insert<InstFakeDef>(Reg); | 5712 Context.insert<InstFakeDef>(Reg); |
5511 UnimplementedError(Func->getContext()->getFlags()); | 5713 UnimplementedError(Func->getContext()->getFlags()); |
5512 return Reg; | 5714 return Reg; |
5513 } | 5715 } |
5514 | 5716 |
5515 // Helper for legalize() to emit the right code to lower an operand to a | 5717 // Helper for legalize() to emit the right code to lower an operand to a |
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5657 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 5859 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
5658 _movw(Reg, | 5860 _movw(Reg, |
5659 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 5861 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
5660 if (UpperBits != 0) { | 5862 if (UpperBits != 0) { |
5661 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 5863 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
5662 } | 5864 } |
5663 return Reg; | 5865 return Reg; |
5664 } | 5866 } |
5665 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 5867 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
5666 Variable *Reg = makeReg(Ty, RegNum); | 5868 Variable *Reg = makeReg(Ty, RegNum); |
5667 _movw(Reg, C); | 5869 if (SandboxingType != ST_Nonsfi) { |
5668 _movt(Reg, C); | 5870 _movw(Reg, C); |
5871 _movt(Reg, C); | |
5872 } else { | |
5873 auto *GotAddr = legalizeToReg(GotPtr); | |
5874 const IceString CGotoffName = createGotoffRelocation(C); | |
5875 loadNamedConstantRelocatablePIC( | |
5876 CGotoffName, Reg, [this, Reg](Variable *PC) { | |
5877 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg)); | |
5878 }); | |
5879 _add(Reg, GotAddr, Reg); | |
5880 } | |
5669 return Reg; | 5881 return Reg; |
5670 } else { | 5882 } else { |
5671 assert(isScalarFloatingType(Ty)); | 5883 assert(isScalarFloatingType(Ty)); |
5672 uint32_t ModifiedImm; | 5884 uint32_t ModifiedImm; |
5673 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | 5885 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
5674 Variable *T = makeReg(Ty, RegNum); | 5886 Variable *T = makeReg(Ty, RegNum); |
5675 _mov(T, | 5887 _mov(T, |
5676 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | 5888 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
5677 return T; | 5889 return T; |
5678 } | 5890 } |
5679 | 5891 |
5680 if (Ty == IceType_f64 && isFloatingPointZero(From)) { | 5892 if (Ty == IceType_f64 && isFloatingPointZero(From)) { |
5681 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 | 5893 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 |
5682 // because ARM does not have a veor instruction with S registers. | 5894 // because ARM does not have a veor instruction with S registers. |
5683 Variable *T = makeReg(IceType_f64, RegNum); | 5895 Variable *T = makeReg(IceType_f64, RegNum); |
5684 Context.insert<InstFakeDef>(T); | 5896 Context.insert<InstFakeDef>(T); |
5685 _veor(T, T, T); | 5897 _veor(T, T, T); |
5686 return T; | 5898 return T; |
5687 } | 5899 } |
5688 | 5900 |
5689 // Load floats/doubles from literal pool. | 5901 // Load floats/doubles from literal pool. |
5690 std::string Buffer; | 5902 std::string Buffer; |
5691 llvm::raw_string_ostream StrBuf(Buffer); | 5903 llvm::raw_string_ostream StrBuf(Buffer); |
5692 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 5904 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
5693 llvm::cast<Constant>(From)->setShouldBePooled(true); | 5905 llvm::cast<Constant>(From)->setShouldBePooled(true); |
5694 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 5906 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
5695 Variable *BaseReg = makeReg(getPointerType()); | 5907 Variable *BaseReg = nullptr; |
5696 _movw(BaseReg, Offset); | 5908 if (SandboxingType == ST_Nonsfi) { |
5697 _movt(BaseReg, Offset); | 5909 // vldr does not support the [base, index] addressing mode, so we need |
5910 // to legalize Offset to a register. Otherwise, we could simply | |
5911 // vldr dest, [got, reg(Offset)] | |
5912 BaseReg = legalizeToReg(Offset); | |
5913 } else { | |
5914 BaseReg = makeReg(getPointerType()); | |
5915 _movw(BaseReg, Offset); | |
5916 _movt(BaseReg, Offset); | |
5917 } | |
5698 From = formMemoryOperand(BaseReg, Ty); | 5918 From = formMemoryOperand(BaseReg, Ty); |
5699 return copyToReg(From, RegNum); | 5919 return copyToReg(From, RegNum); |
5700 } | 5920 } |
5701 } | 5921 } |
5702 | 5922 |
5703 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 5923 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
5704 if (Var->isRematerializable()) { | 5924 if (Var->isRematerializable()) { |
5705 if (Allowed & Legal_Rematerializable) { | 5925 if (Allowed & Legal_Rematerializable) { |
5706 return From; | 5926 return From; |
5707 } | 5927 } |
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6571 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6791 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
6572 } | 6792 } |
6573 | 6793 |
6574 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 6794 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
6575 llvm::SmallBitVector | 6795 llvm::SmallBitVector |
6576 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 6796 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
6577 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6797 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
6578 | 6798 |
6579 } // end of namespace ARM32 | 6799 } // end of namespace ARM32 |
6580 } // end of namespace Ice | 6800 } // end of namespace Ice |
OLD | NEW |