OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); | 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); |
46 } | 46 } |
47 | 47 |
48 std::unique_ptr<::Ice::TargetHeaderLowering> | 48 std::unique_ptr<::Ice::TargetHeaderLowering> |
49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { | 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { |
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); | 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); |
51 } | 51 } |
52 | 52 |
53 void staticInit(::Ice::GlobalContext *Ctx) { | 53 void staticInit(::Ice::GlobalContext *Ctx) { |
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); | 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); |
| 55 if (Ctx->getFlags().getUseNonsfi()) { |
| 56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing |
| 57 // globals. The GOT is an external symbol (i.e., it is not defined in the |
| 58 // pexe) so we need to register it as such so that ELF emission won't barf |
| 59 // on an "unknown" symbol. The GOT is added to the External symbols list |
| 60 // here because staticInit() is invoked in a single-thread context. |
| 61 Ctx->getConstantExternSym(::Ice::GlobalOffsetTable); |
| 62 } |
55 } | 63 } |
56 | 64 |
57 } // end of namespace ARM32 | 65 } // end of namespace ARM32 |
58 | 66 |
59 namespace Ice { | 67 namespace Ice { |
60 namespace ARM32 { | 68 namespace ARM32 { |
61 | 69 |
62 namespace { | 70 namespace { |
63 | 71 |
64 /// SizeOf is used to obtain the size of an initializer list as a constexpr | 72 /// SizeOf is used to obtain the size of an initializer list as a constexpr |
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
706 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); | 714 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); |
707 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 715 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, |
708 NoTailCall, IsTargetHelperCall); | 716 NoTailCall, IsTargetHelperCall); |
709 Call->addArg(IntrinsicCall->getArg(0)); | 717 Call->addArg(IntrinsicCall->getArg(0)); |
710 Call->addArg(ValExt); | 718 Call->addArg(ValExt); |
711 Call->addArg(IntrinsicCall->getArg(2)); | 719 Call->addArg(IntrinsicCall->getArg(2)); |
712 Instr->setDeleted(); | 720 Instr->setDeleted(); |
713 return; | 721 return; |
714 } | 722 } |
715 case Intrinsics::NaClReadTP: { | 723 case Intrinsics::NaClReadTP: { |
716 if (NeedSandboxing) { | 724 if (SandboxingType == ST_NaCl) { |
717 return; | 725 return; |
718 } | 726 } |
719 static constexpr SizeT MaxArgs = 0; | 727 static constexpr SizeT MaxArgs = 0; |
720 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); | 728 const char *ReadTP = |
| 729 SandboxingType == ST_Nonsfi ? "__aeabi_read_tp" : H_call_read_tp; |
| 730 Operand *TargetHelper = Ctx->getConstantExternSym(ReadTP); |
721 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, | 731 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, |
722 IsTargetHelperCall); | 732 IsTargetHelperCall); |
723 Instr->setDeleted(); | 733 Instr->setDeleted(); |
724 return; | 734 return; |
725 } | 735 } |
726 case Intrinsics::Setjmp: { | 736 case Intrinsics::Setjmp: { |
727 static constexpr SizeT MaxArgs = 1; | 737 static constexpr SizeT MaxArgs = 1; |
728 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_setjmp); | 738 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_setjmp); |
729 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, | 739 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, |
730 NoTailCall, IsTargetHelperCall); | 740 NoTailCall, IsTargetHelperCall); |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
779 PostIncrLoweringContext PostIncrement(Context); | 789 PostIncrLoweringContext PostIncrement(Context); |
780 Inst *CurInstr = Context.getCur(); | 790 Inst *CurInstr = Context.getCur(); |
781 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 791 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
782 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 792 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
783 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 793 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
784 } | 794 } |
785 } | 795 } |
786 } | 796 } |
787 } | 797 } |
788 | 798 |
| 799 void TargetARM32::createGotPtr() { |
| 800 if (SandboxingType != ST_Nonsfi) { |
| 801 return; |
| 802 } |
| 803 GotPtr = Func->makeVariable(IceType_i32); |
| 804 } |
| 805 |
| 806 void TargetARM32::insertGotPtrInitPlaceholder() { |
| 807 if (SandboxingType != ST_Nonsfi) { |
| 808 return; |
| 809 } |
| 810 assert(GotPtr != nullptr); |
| 811 // We add the two placeholder instructions here. The first fakedefs T, an |
| 812 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T. |
| 813 // This is needed because the GotPtr initialization, if needed, will require |
| 814 // a register: |
| 815 // |
| 816 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - . |
| 817 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - . |
| 818 // add reg, pc, reg |
| 819 // mov GotPtr, reg |
| 820 // |
| 821 // If GotPtr is not used, then both these pseudo-instructions are dce'd. |
| 822 Variable *T = makeReg(IceType_i32); |
| 823 Context.insert<InstFakeDef>(T); |
| 824 Context.insert<InstFakeDef>(GotPtr, T); |
| 825 } |
| 826 |
| 827 IceString TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) { |
| 828 const IceString &CRName = CR->getName(); |
| 829 const IceString CRGotoffName = |
| 830 "GOTOFF$" + Func->getFunctionName() + "$" + CRName; |
| 831 if (KnownGotoffs.count(CRGotoffName) == 0) { |
| 832 auto *Global = VariableDeclaration::create(Ctx); |
| 833 Global->setIsConstant(true); |
| 834 Global->setName(CRName); |
| 835 Global->setSuppressMangling(); |
| 836 |
| 837 auto *Gotoff = VariableDeclaration::create(Ctx); |
| 838 constexpr auto GotFixup = R_ARM_GOTOFF32; |
| 839 Gotoff->setIsConstant(true); |
| 840 Gotoff->setName(CRGotoffName); |
| 841 Gotoff->setSuppressMangling(); |
| 842 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create( |
| 843 Global, {RelocOffset::create(Ctx, 0)}, GotFixup)); |
| 844 Func->addGlobal(Gotoff); |
| 845 KnownGotoffs.emplace(CRGotoffName); |
| 846 } |
| 847 return CRGotoffName; |
| 848 } |
| 849 |
| 850 void TargetARM32::materializeGotAddr(CfgNode *Node) { |
| 851 if (SandboxingType != ST_Nonsfi) { |
| 852 return; |
| 853 } |
| 854 |
| 855 // At first, we try to find the |
| 856 // GotPtr = def T |
| 857 // pseudo-instruction that we placed for defining the got ptr. That |
| 858 // instruction is not just a place-holder for defining the GotPtr (thus |
| 859 // keeping liveness consistent), but it is also located at a point where it is |
| 860 // safe to materialize the got addr -- i.e., before loading parameters to |
| 861 // registers, but after moving register parameters from their home location. |
| 862 InstFakeDef *DefGotPtr = nullptr; |
| 863 for (auto &Inst : Node->getInsts()) { |
| 864 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst); |
| 865 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) { |
| 866 DefGotPtr = FakeDef; |
| 867 break; |
| 868 } |
| 869 } |
| 870 |
| 871 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) { |
| 872 return; |
| 873 } |
| 874 |
| 875 // The got addr needs to be materialized at the same point where DefGotPtr |
| 876 // lives. |
| 877 Context.setInsertPoint(DefGotPtr); |
| 878 assert(DefGotPtr->getSrcSize() == 1); |
| 879 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0)); |
| 880 loadNamedConstantRelocatablePIC(GlobalOffsetTable, T, |
| 881 [this, T](Variable *PC) { _add(T, PC, T); }); |
| 882 _mov(GotPtr, T); |
| 883 DefGotPtr->setDeleted(); |
| 884 } |
| 885 |
| 886 void TargetARM32::loadNamedConstantRelocatablePIC( |
| 887 const IceString &Name, Variable *Register, |
| 888 std::function<void(Variable *PC)> Finish, bool SuppressMangling) { |
| 889 assert(SandboxingType == ST_Nonsfi); |
| 890 // We makeReg() here instead of getPhysicalRegister() because the latter ends |
| 891 // up creating multi-blocks temporaries that liveness fails to validate. |
| 892 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); |
| 893 |
| 894 auto *AddPcReloc = RelocOffset::create(Ctx); |
| 895 AddPcReloc->setSubtract(true); |
| 896 auto *AddPcLabel = InstARM32Label::create(Func, this); |
| 897 AddPcLabel->setRelocOffset(AddPcReloc); |
| 898 |
| 899 const IceString EmitText = Name; |
| 900 // We need a -8 in the relocation expression to account for the pc's value |
| 901 // read by the first instruction emitted in Finish(PC). |
| 902 auto *Imm8 = RelocOffset::create(Ctx, -8); |
| 903 |
| 904 auto *MovwReloc = RelocOffset::create(Ctx); |
| 905 auto *MovwLabel = InstARM32Label::create(Func, this); |
| 906 MovwLabel->setRelocOffset(MovwReloc); |
| 907 |
| 908 auto *MovtReloc = RelocOffset::create(Ctx); |
| 909 auto *MovtLabel = InstARM32Label::create(Func, this); |
| 910 MovtLabel->setRelocOffset(MovtReloc); |
| 911 |
| 912 // The EmitString for these constant relocatables have hardcoded offsets |
| 913 // attached to them. This could be dangerous if, e.g., we ever implemented |
| 914 // instruction scheduling but llvm-mc currently does not support |
| 915 // |
| 916 // movw reg, #:lower16:(Symbol - Label - Number) |
| 917 // movt reg, #:upper16:(Symbol - Label - Number) |
| 918 // |
| 919 // relocations. |
| 920 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name, |
| 921 EmitText + " -16", SuppressMangling); |
| 922 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name, |
| 923 EmitText + " -12", SuppressMangling); |
| 924 |
| 925 Context.insert(MovwLabel); |
| 926 _movw(Register, CRLower); |
| 927 Context.insert(MovtLabel); |
| 928 _movt(Register, CRUpper); |
| 929 // PC = fake-def to keep liveness consistent. |
| 930 Context.insert<InstFakeDef>(PC); |
| 931 Context.insert(AddPcLabel); |
| 932 Finish(PC); |
| 933 } |
| 934 |
789 void TargetARM32::translateO2() { | 935 void TargetARM32::translateO2() { |
790 TimerMarker T(TimerStack::TT_O2, Func); | 936 TimerMarker T(TimerStack::TT_O2, Func); |
791 | 937 |
792 // TODO(stichnot): share passes with X86? | 938 // TODO(stichnot): share passes with other targets? |
793 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 939 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
| 940 if (SandboxingType == ST_Nonsfi) { |
| 941 createGotPtr(); |
| 942 } |
794 genTargetHelperCalls(); | 943 genTargetHelperCalls(); |
795 findMaxStackOutArgsSize(); | 944 findMaxStackOutArgsSize(); |
796 | 945 |
797 // Do not merge Alloca instructions, and lay out the stack. | 946 // Do not merge Alloca instructions, and lay out the stack. |
798 static constexpr bool SortAndCombineAllocas = true; | 947 static constexpr bool SortAndCombineAllocas = true; |
799 Func->processAllocas(SortAndCombineAllocas); | 948 Func->processAllocas(SortAndCombineAllocas); |
800 Func->dump("After Alloca processing"); | 949 Func->dump("After Alloca processing"); |
801 | 950 |
802 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 951 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
803 // Lower Phi instructions. | 952 // Lower Phi instructions. |
(...skipping 26 matching lines...) Expand all Loading... |
830 return; | 979 return; |
831 | 980 |
832 // TODO: It should be sufficient to use the fastest liveness calculation, | 981 // TODO: It should be sufficient to use the fastest liveness calculation, |
833 // i.e. livenessLightweight(). However, for some reason that slows down the | 982 // i.e. livenessLightweight(). However, for some reason that slows down the |
834 // rest of the translation. Investigate. | 983 // rest of the translation. Investigate. |
835 Func->liveness(Liveness_Basic); | 984 Func->liveness(Liveness_Basic); |
836 if (Func->hasError()) | 985 if (Func->hasError()) |
837 return; | 986 return; |
838 Func->dump("After ARM32 address mode opt"); | 987 Func->dump("After ARM32 address mode opt"); |
839 | 988 |
| 989 if (SandboxingType == ST_Nonsfi) { |
| 990 insertGotPtrInitPlaceholder(); |
| 991 } |
840 Func->genCode(); | 992 Func->genCode(); |
841 if (Func->hasError()) | 993 if (Func->hasError()) |
842 return; | 994 return; |
843 Func->dump("After ARM32 codegen"); | 995 Func->dump("After ARM32 codegen"); |
844 | 996 |
845 // Register allocation. This requires instruction renumbering and full | 997 // Register allocation. This requires instruction renumbering and full |
846 // liveness analysis. | 998 // liveness analysis. |
847 Func->renumberInstructions(); | 999 Func->renumberInstructions(); |
848 if (Func->hasError()) | 1000 if (Func->hasError()) |
849 return; | 1001 return; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
894 | 1046 |
895 // Nop insertion | 1047 // Nop insertion |
896 if (Ctx->getFlags().shouldDoNopInsertion()) { | 1048 if (Ctx->getFlags().shouldDoNopInsertion()) { |
897 Func->doNopInsertion(); | 1049 Func->doNopInsertion(); |
898 } | 1050 } |
899 } | 1051 } |
900 | 1052 |
901 void TargetARM32::translateOm1() { | 1053 void TargetARM32::translateOm1() { |
902 TimerMarker T(TimerStack::TT_Om1, Func); | 1054 TimerMarker T(TimerStack::TT_Om1, Func); |
903 | 1055 |
904 // TODO: share passes with X86? | 1056 // TODO(stichnot): share passes with other targets? |
| 1057 if (SandboxingType == ST_Nonsfi) { |
| 1058 createGotPtr(); |
| 1059 } |
| 1060 |
905 genTargetHelperCalls(); | 1061 genTargetHelperCalls(); |
906 findMaxStackOutArgsSize(); | 1062 findMaxStackOutArgsSize(); |
907 | 1063 |
908 // Do not merge Alloca instructions, and lay out the stack. | 1064 // Do not merge Alloca instructions, and lay out the stack. |
909 static constexpr bool DontSortAndCombineAllocas = false; | 1065 static constexpr bool DontSortAndCombineAllocas = false; |
910 Func->processAllocas(DontSortAndCombineAllocas); | 1066 Func->processAllocas(DontSortAndCombineAllocas); |
911 Func->dump("After Alloca processing"); | 1067 Func->dump("After Alloca processing"); |
912 | 1068 |
913 Func->placePhiLoads(); | 1069 Func->placePhiLoads(); |
914 if (Func->hasError()) | 1070 if (Func->hasError()) |
915 return; | 1071 return; |
916 Func->placePhiStores(); | 1072 Func->placePhiStores(); |
917 if (Func->hasError()) | 1073 if (Func->hasError()) |
918 return; | 1074 return; |
919 Func->deletePhis(); | 1075 Func->deletePhis(); |
920 if (Func->hasError()) | 1076 if (Func->hasError()) |
921 return; | 1077 return; |
922 Func->dump("After Phi lowering"); | 1078 Func->dump("After Phi lowering"); |
923 | 1079 |
924 Func->doArgLowering(); | 1080 Func->doArgLowering(); |
925 | 1081 |
| 1082 if (SandboxingType == ST_Nonsfi) { |
| 1083 insertGotPtrInitPlaceholder(); |
| 1084 } |
926 Func->genCode(); | 1085 Func->genCode(); |
927 if (Func->hasError()) | 1086 if (Func->hasError()) |
928 return; | 1087 return; |
929 Func->dump("After initial ARM32 codegen"); | 1088 Func->dump("After initial ARM32 codegen"); |
930 | 1089 |
931 regAlloc(RAK_InfOnly); | 1090 regAlloc(RAK_InfOnly); |
932 if (Func->hasError()) | 1091 if (Func->hasError()) |
933 return; | 1092 return; |
934 | 1093 |
935 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | 1094 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
(...skipping 475 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1411 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1570 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
1412 | 1571 |
1413 // Fill in stack offsets for stack args, and copy args into registers for | 1572 // Fill in stack offsets for stack args, and copy args into registers for |
1414 // those that were register-allocated. Args are pushed right to left, so | 1573 // those that were register-allocated. Args are pushed right to left, so |
1415 // Arg[0] is closest to the stack/frame pointer. | 1574 // Arg[0] is closest to the stack/frame pointer. |
1416 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1575 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
1417 size_t BasicFrameOffset = PreservedRegsSizeBytes; | 1576 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
1418 if (!UsesFramePointer) | 1577 if (!UsesFramePointer) |
1419 BasicFrameOffset += SpillAreaSizeBytes; | 1578 BasicFrameOffset += SpillAreaSizeBytes; |
1420 | 1579 |
| 1580 materializeGotAddr(Node); |
| 1581 |
1421 const VarList &Args = Func->getArgs(); | 1582 const VarList &Args = Func->getArgs(); |
1422 size_t InArgsSizeBytes = 0; | 1583 size_t InArgsSizeBytes = 0; |
1423 TargetARM32::CallingConv CC; | 1584 TargetARM32::CallingConv CC; |
1424 for (Variable *Arg : Args) { | 1585 for (Variable *Arg : Args) { |
1425 RegNumT DummyReg; | 1586 RegNumT DummyReg; |
1426 const Type Ty = Arg->getType(); | 1587 const Type Ty = Arg->getType(); |
1427 | 1588 |
1428 // Skip arguments passed in registers. | 1589 // Skip arguments passed in registers. |
1429 if (isScalarIntegerType(Ty)) { | 1590 if (isScalarIntegerType(Ty)) { |
1430 if (CC.argInGPR(Ty, &DummyReg)) { | 1591 if (CC.argInGPR(Ty, &DummyReg)) { |
(...skipping 2102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3533 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); | 3694 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); |
3534 break; | 3695 break; |
3535 } | 3696 } |
3536 } | 3697 } |
3537 | 3698 |
3538 // Note: To allow far calls, even for constant relocatables, we force | 3699 // Note: To allow far calls, even for constant relocatables, we force |
3539 // the call target into a register, and make an indirect call. | 3700 // the call target into a register, and make an indirect call. |
3540 CallTarget = legalizeToReg(CallTarget); | 3701 CallTarget = legalizeToReg(CallTarget); |
3541 | 3702 |
3542 // Copy arguments to be passed in registers to the appropriate registers. | 3703 // Copy arguments to be passed in registers to the appropriate registers. |
| 3704 CfgVector<Variable *> RegArgs; |
3543 for (auto &FPArg : FPArgs) { | 3705 for (auto &FPArg : FPArgs) { |
3544 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | 3706 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); |
3545 Context.insert<InstFakeUse>(Reg); | |
3546 } | 3707 } |
3547 for (auto &GPRArg : GPRArgs) { | 3708 for (auto &GPRArg : GPRArgs) { |
3548 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | 3709 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); |
3549 // Generate a FakeUse of register arguments so that they do not get dead | 3710 } |
3550 // code eliminated as a result of the FakeKill of scratch registers after | 3711 |
3551 // the call. | 3712 // Generate a FakeUse of register arguments so that they do not get dead code |
3552 Context.insert<InstFakeUse>(Reg); | 3713 // eliminated as a result of the FakeKill of scratch registers after the call. |
| 3714 // These fake-uses need to be placed here to avoid argument registers from |
| 3715 // being used during the legalizeToReg() calls above. |
| 3716 for (auto *RegArg : RegArgs) { |
| 3717 Context.insert<InstFakeUse>(RegArg); |
3553 } | 3718 } |
3554 | 3719 |
3555 InstARM32Call *NewCall = | 3720 InstARM32Call *NewCall = |
3556 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); | 3721 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); |
3557 | 3722 |
3558 if (ReturnRegHi) | 3723 if (ReturnRegHi) |
3559 Context.insert<InstFakeDef>(ReturnRegHi); | 3724 Context.insert<InstFakeDef>(ReturnRegHi); |
3560 | 3725 |
3561 // Insert a register-kill pseudo instruction. | 3726 // Insert a register-kill pseudo instruction. |
3562 Context.insert<InstFakeKill>(NewCall); | 3727 Context.insert<InstFakeKill>(NewCall); |
(...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3947 CondARM32::Cond CC0; | 4112 CondARM32::Cond CC0; |
3948 CondARM32::Cond CC1; | 4113 CondARM32::Cond CC1; |
3949 } TableFcmp[] = { | 4114 } TableFcmp[] = { |
3950 #define X(val, CC0, CC1) \ | 4115 #define X(val, CC0, CC1) \ |
3951 { CondARM32::CC0, CondARM32::CC1 } \ | 4116 { CondARM32::CC0, CondARM32::CC1 } \ |
3952 , | 4117 , |
3953 FCMPARM32_TABLE | 4118 FCMPARM32_TABLE |
3954 #undef X | 4119 #undef X |
3955 }; | 4120 }; |
3956 | 4121 |
3957 bool isFloatingPointZero(Operand *Src) { | 4122 bool isFloatingPointZero(const Operand *Src) { |
3958 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { | 4123 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) { |
3959 return Utils::isPositiveZero(F32->getValue()); | 4124 return Utils::isPositiveZero(F32->getValue()); |
3960 } | 4125 } |
3961 | 4126 |
3962 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { | 4127 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) { |
3963 return Utils::isPositiveZero(F64->getValue()); | 4128 return Utils::isPositiveZero(F64->getValue()); |
3964 } | 4129 } |
3965 | 4130 |
3966 return false; | 4131 return false; |
3967 } | 4132 } |
3968 } // end of anonymous namespace | 4133 } // end of anonymous namespace |
3969 | 4134 |
3970 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 4135 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
3971 InstFcmp::FCond Condition = Instr->getCondition(); | 4136 InstFcmp::FCond Condition = Instr->getCondition(); |
3972 switch (Condition) { | 4137 switch (Condition) { |
(...skipping 912 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4885 case Intrinsics::Memcpy: { | 5050 case Intrinsics::Memcpy: { |
4886 llvm::report_fatal_error("memcpy should have been prelowered."); | 5051 llvm::report_fatal_error("memcpy should have been prelowered."); |
4887 } | 5052 } |
4888 case Intrinsics::Memmove: { | 5053 case Intrinsics::Memmove: { |
4889 llvm::report_fatal_error("memmove should have been prelowered."); | 5054 llvm::report_fatal_error("memmove should have been prelowered."); |
4890 } | 5055 } |
4891 case Intrinsics::Memset: { | 5056 case Intrinsics::Memset: { |
4892 llvm::report_fatal_error("memmove should have been prelowered."); | 5057 llvm::report_fatal_error("memmove should have been prelowered."); |
4893 } | 5058 } |
4894 case Intrinsics::NaClReadTP: { | 5059 case Intrinsics::NaClReadTP: { |
4895 if (!NeedSandboxing) { | 5060 if (SandboxingType != ST_NaCl) { |
4896 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); | 5061 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); |
4897 } | 5062 } |
4898 Variable *TP = legalizeToReg(OperandARM32Mem::create( | 5063 Variable *TP = legalizeToReg(OperandARM32Mem::create( |
4899 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), | 5064 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), |
4900 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); | 5065 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); |
4901 _mov(Dest, TP); | 5066 _mov(Dest, TP); |
4902 return; | 5067 return; |
4903 } | 5068 } |
4904 case Intrinsics::Setjmp: { | 5069 case Intrinsics::Setjmp: { |
4905 llvm::report_fatal_error("setjmp should have been prelowered."); | 5070 llvm::report_fatal_error("setjmp should have been prelowered."); |
(...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5545 _cmp(Src0Var, Value); | 5710 _cmp(Src0Var, Value); |
5546 _br(Instr->getLabel(I), CondARM32::EQ); | 5711 _br(Instr->getLabel(I), CondARM32::EQ); |
5547 } | 5712 } |
5548 _br(Instr->getLabelDefault()); | 5713 _br(Instr->getLabelDefault()); |
5549 } | 5714 } |
5550 | 5715 |
5551 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { | 5716 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { |
5552 _trap(); | 5717 _trap(); |
5553 } | 5718 } |
5554 | 5719 |
| 5720 namespace { |
| 5721 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables, |
| 5722 // and fp constants will need access to the GOT address. |
| 5723 bool operandNeedsGot(const Operand *Opnd) { |
| 5724 if (llvm::isa<ConstantRelocatable>(Opnd)) { |
| 5725 return true; |
| 5726 } |
| 5727 |
| 5728 if (llvm::isa<ConstantFloat>(Opnd)) { |
| 5729 uint32_t _; |
| 5730 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_); |
| 5731 } |
| 5732 |
| 5733 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd); |
| 5734 if (F64 != nullptr) { |
| 5735 uint32_t _; |
| 5736 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) && |
| 5737 !isFloatingPointZero(F64); |
| 5738 } |
| 5739 |
| 5740 return false; |
| 5741 } |
| 5742 |
| 5743 // Returns whether Phi needs the GOT address (which it does if any of its |
| 5744 // operands needs the GOT address.) |
| 5745 bool phiNeedsGot(const InstPhi *Phi) { |
| 5746 if (Phi->isDeleted()) { |
| 5747 return false; |
| 5748 } |
| 5749 |
| 5750 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) { |
| 5751 if (operandNeedsGot(Phi->getSrc(I))) { |
| 5752 return true; |
| 5753 } |
| 5754 } |
| 5755 |
| 5756 return false; |
| 5757 } |
| 5758 |
| 5759 // Returns whether **any** phi in Node needs the GOT address. |
| 5760 bool anyPhiInNodeNeedsGot(CfgNode *Node) { |
| 5761 for (auto &Inst : Node->getPhis()) { |
| 5762 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) { |
| 5763 return true; |
| 5764 } |
| 5765 } |
| 5766 return false; |
| 5767 } |
| 5768 |
| 5769 } // end of anonymous namespace |
| 5770 |
5555 void TargetARM32::prelowerPhis() { | 5771 void TargetARM32::prelowerPhis() { |
5556 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); | 5772 CfgNode *Node = Context.getNode(); |
| 5773 |
| 5774 if (SandboxingType == ST_Nonsfi) { |
| 5775 assert(GotPtr != nullptr); |
| 5776 if (anyPhiInNodeNeedsGot(Node)) { |
| 5777 // If any phi instruction needs the GOT address, we place a |
| 5778 // fake-use GotPtr |
| 5779 // in Node to prevent the GotPtr's initialization from being dead code |
| 5780 // eliminated. |
| 5781 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr)); |
| 5782 } |
| 5783 } |
| 5784 |
| 5785 PhiLowering::prelowerPhis32Bit(this, Node, Func); |
5557 } | 5786 } |
5558 | 5787 |
5559 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) { | 5788 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) { |
5560 Variable *Reg = makeReg(Ty, RegNum); | 5789 Variable *Reg = makeReg(Ty, RegNum); |
5561 Context.insert<InstFakeDef>(Reg); | 5790 Context.insert<InstFakeDef>(Reg); |
5562 assert(isVectorType(Ty)); | 5791 assert(isVectorType(Ty)); |
5563 _veor(Reg, Reg, Reg); | 5792 _veor(Reg, Reg, Reg); |
5564 return Reg; | 5793 return Reg; |
5565 } | 5794 } |
5566 | 5795 |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5709 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 5938 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
5710 _movw(Reg, | 5939 _movw(Reg, |
5711 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 5940 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
5712 if (UpperBits != 0) { | 5941 if (UpperBits != 0) { |
5713 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 5942 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
5714 } | 5943 } |
5715 return Reg; | 5944 return Reg; |
5716 } | 5945 } |
5717 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 5946 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
5718 Variable *Reg = makeReg(Ty, RegNum); | 5947 Variable *Reg = makeReg(Ty, RegNum); |
5719 _movw(Reg, C); | 5948 if (SandboxingType != ST_Nonsfi) { |
5720 _movt(Reg, C); | 5949 _movw(Reg, C); |
| 5950 _movt(Reg, C); |
| 5951 } else { |
| 5952 auto *GotAddr = legalizeToReg(GotPtr); |
| 5953 const IceString CGotoffName = createGotoffRelocation(C); |
| 5954 loadNamedConstantRelocatablePIC( |
| 5955 CGotoffName, Reg, [this, Reg](Variable *PC) { |
| 5956 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg)); |
| 5957 }); |
| 5958 _add(Reg, GotAddr, Reg); |
| 5959 } |
5721 return Reg; | 5960 return Reg; |
5722 } else { | 5961 } else { |
5723 assert(isScalarFloatingType(Ty)); | 5962 assert(isScalarFloatingType(Ty)); |
5724 uint32_t ModifiedImm; | 5963 uint32_t ModifiedImm; |
5725 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | 5964 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
5726 Variable *T = makeReg(Ty, RegNum); | 5965 Variable *T = makeReg(Ty, RegNum); |
5727 _mov(T, | 5966 _mov(T, |
5728 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | 5967 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
5729 return T; | 5968 return T; |
5730 } | 5969 } |
5731 | 5970 |
5732 if (Ty == IceType_f64 && isFloatingPointZero(From)) { | 5971 if (Ty == IceType_f64 && isFloatingPointZero(From)) { |
5733 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 | 5972 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 |
5734 // because ARM does not have a veor instruction with S registers. | 5973 // because ARM does not have a veor instruction with S registers. |
5735 Variable *T = makeReg(IceType_f64, RegNum); | 5974 Variable *T = makeReg(IceType_f64, RegNum); |
5736 Context.insert<InstFakeDef>(T); | 5975 Context.insert<InstFakeDef>(T); |
5737 _veor(T, T, T); | 5976 _veor(T, T, T); |
5738 return T; | 5977 return T; |
5739 } | 5978 } |
5740 | 5979 |
5741 // Load floats/doubles from literal pool. | 5980 // Load floats/doubles from literal pool. |
5742 std::string Buffer; | 5981 std::string Buffer; |
5743 llvm::raw_string_ostream StrBuf(Buffer); | 5982 llvm::raw_string_ostream StrBuf(Buffer); |
5744 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 5983 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
5745 llvm::cast<Constant>(From)->setShouldBePooled(true); | 5984 llvm::cast<Constant>(From)->setShouldBePooled(true); |
5746 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 5985 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
5747 Variable *BaseReg = makeReg(getPointerType()); | 5986 Variable *BaseReg = nullptr; |
5748 _movw(BaseReg, Offset); | 5987 if (SandboxingType == ST_Nonsfi) { |
5749 _movt(BaseReg, Offset); | 5988 // vldr does not support the [base, index] addressing mode, so we need |
| 5989 // to legalize Offset to a register. Otherwise, we could simply |
| 5990 // vldr dest, [got, reg(Offset)] |
| 5991 BaseReg = legalizeToReg(Offset); |
| 5992 } else { |
| 5993 BaseReg = makeReg(getPointerType()); |
| 5994 _movw(BaseReg, Offset); |
| 5995 _movt(BaseReg, Offset); |
| 5996 } |
5750 From = formMemoryOperand(BaseReg, Ty); | 5997 From = formMemoryOperand(BaseReg, Ty); |
5751 return copyToReg(From, RegNum); | 5998 return copyToReg(From, RegNum); |
5752 } | 5999 } |
5753 } | 6000 } |
5754 | 6001 |
5755 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 6002 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
5756 if (Var->isRematerializable()) { | 6003 if (Var->isRematerializable()) { |
5757 if (Allowed & Legal_Rematerializable) { | 6004 if (Allowed & Legal_Rematerializable) { |
5758 return From; | 6005 return From; |
5759 } | 6006 } |
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6623 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6870 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
6624 } | 6871 } |
6625 | 6872 |
6626 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 6873 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
6627 llvm::SmallBitVector | 6874 llvm::SmallBitVector |
6628 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 6875 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
6629 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6876 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
6630 | 6877 |
6631 } // end of namespace ARM32 | 6878 } // end of namespace ARM32 |
6632 } // end of namespace Ice | 6879 } // end of namespace Ice |
OLD | NEW |