Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(30)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1665263003: Subzero. ARM32. Nonsfi. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/nonsfi.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 } 46 }
47 47
48 std::unique_ptr<::Ice::TargetHeaderLowering> 48 std::unique_ptr<::Ice::TargetHeaderLowering>
49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 } 51 }
52 52
53 void staticInit(::Ice::GlobalContext *Ctx) { 53 void staticInit(::Ice::GlobalContext *Ctx) {
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55 if (Ctx->getFlags().getUseNonsfi()) {
56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57 // globals. The GOT is an external symbol (i.e., it is not defined in the
58 // pexe) so we need to register it as such so that ELF emission won't barf
59 // on an "unknown" symbol. The GOT is added to the External symbols list
60 // here because staticInit() is invoked in a single-thread context.
61 Ctx->getConstantExternSym(::Ice::GlobalOffsetTable);
62 }
55 } 63 }
56 64
57 } // end of namespace ARM32 65 } // end of namespace ARM32
58 66
59 namespace Ice { 67 namespace Ice {
60 namespace ARM32 { 68 namespace ARM32 {
61 69
62 namespace { 70 namespace {
63 71
64 /// SizeOf is used to obtain the size of an initializer list as a constexpr 72 /// SizeOf is used to obtain the size of an initializer list as a constexpr
(...skipping 641 matching lines...) Expand 10 before | Expand all | Expand 10 after
706 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); 714 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset);
707 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, 715 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
708 NoTailCall, IsTargetHelperCall); 716 NoTailCall, IsTargetHelperCall);
709 Call->addArg(IntrinsicCall->getArg(0)); 717 Call->addArg(IntrinsicCall->getArg(0));
710 Call->addArg(ValExt); 718 Call->addArg(ValExt);
711 Call->addArg(IntrinsicCall->getArg(2)); 719 Call->addArg(IntrinsicCall->getArg(2));
712 Instr->setDeleted(); 720 Instr->setDeleted();
713 return; 721 return;
714 } 722 }
715 case Intrinsics::NaClReadTP: { 723 case Intrinsics::NaClReadTP: {
716 if (NeedSandboxing) { 724 if (SandboxingType == ST_NaCl) {
717 return; 725 return;
718 } 726 }
719 static constexpr SizeT MaxArgs = 0; 727 static constexpr SizeT MaxArgs = 0;
720 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); 728 const char *ReadTP =
729 SandboxingType == ST_Nonsfi ? "__aeabi_read_tp" : H_call_read_tp;
730 Operand *TargetHelper = Ctx->getConstantExternSym(ReadTP);
721 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, 731 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
722 IsTargetHelperCall); 732 IsTargetHelperCall);
723 Instr->setDeleted(); 733 Instr->setDeleted();
724 return; 734 return;
725 } 735 }
726 case Intrinsics::Setjmp: { 736 case Intrinsics::Setjmp: {
727 static constexpr SizeT MaxArgs = 1; 737 static constexpr SizeT MaxArgs = 1;
728 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_setjmp); 738 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_setjmp);
729 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, 739 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
730 NoTailCall, IsTargetHelperCall); 740 NoTailCall, IsTargetHelperCall);
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
779 PostIncrLoweringContext PostIncrement(Context); 789 PostIncrLoweringContext PostIncrement(Context);
780 Inst *CurInstr = Context.getCur(); 790 Inst *CurInstr = Context.getCur();
781 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { 791 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
782 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); 792 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
783 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); 793 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
784 } 794 }
785 } 795 }
786 } 796 }
787 } 797 }
788 798
799 void TargetARM32::createGotPtr() {
800 if (SandboxingType != ST_Nonsfi) {
801 return;
802 }
803 GotPtr = Func->makeVariable(IceType_i32);
804 }
805
806 void TargetARM32::insertGotPtrInitPlaceholder() {
807 if (SandboxingType != ST_Nonsfi) {
808 return;
809 }
810 assert(GotPtr != nullptr);
811 // We add the two placeholder instructions here. The first fakedefs T, an
812 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
813 // This is needed because the GotPtr initialization, if needed, will require
814 // a register:
815 //
816 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
817 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
818 // add reg, pc, reg
819 // mov GotPtr, reg
820 //
821 // If GotPtr is not used, then both these pseudo-instructions are dce'd.
822 Variable *T = makeReg(IceType_i32);
823 Context.insert<InstFakeDef>(T);
824 Context.insert<InstFakeDef>(GotPtr, T);
825 }
826
827 IceString TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
828 const IceString &CRName = CR->getName();
829 const IceString CRGotoffName =
830 "GOTOFF$" + Func->getFunctionName() + "$" + CRName;
831 if (KnownGotoffs.count(CRGotoffName) == 0) {
832 auto *Global = VariableDeclaration::create(Ctx);
833 Global->setIsConstant(true);
834 Global->setName(CRName);
835 Global->setSuppressMangling();
836
837 auto *Gotoff = VariableDeclaration::create(Ctx);
838 constexpr auto GotFixup = R_ARM_GOTOFF32;
839 Gotoff->setIsConstant(true);
840 Gotoff->setName(CRGotoffName);
841 Gotoff->setSuppressMangling();
842 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
843 Global, {RelocOffset::create(Ctx, 0)}, GotFixup));
844 Func->addGlobal(Gotoff);
845 KnownGotoffs.emplace(CRGotoffName);
846 }
847 return CRGotoffName;
848 }
849
850 void TargetARM32::materializeGotAddr(CfgNode *Node) {
851 if (SandboxingType != ST_Nonsfi) {
852 return;
853 }
854
855 // At first, we try to find the
856 // GotPtr = def T
857 // pseudo-instruction that we placed for defining the got ptr. That
858 // instruction is not just a place-holder for defining the GotPtr (thus
859 // keeping liveness consistent), but it is also located at a point where it is
860 // safe to materialize the got addr -- i.e., before loading parameters to
861 // registers, but after moving register parameters from their home location.
862 InstFakeDef *DefGotPtr = nullptr;
863 for (auto &Inst : Node->getInsts()) {
864 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
865 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
866 DefGotPtr = FakeDef;
867 break;
868 }
869 }
870
871 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
872 return;
873 }
874
875 // The got addr needs to be materialized at the same point where DefGotPtr
876 // lives.
877 Context.setInsertPoint(DefGotPtr);
878 assert(DefGotPtr->getSrcSize() == 1);
879 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
880 loadNamedConstantRelocatablePIC(GlobalOffsetTable, T,
881 [this, T](Variable *PC) { _add(T, PC, T); });
882 _mov(GotPtr, T);
883 DefGotPtr->setDeleted();
884 }
885
886 void TargetARM32::loadNamedConstantRelocatablePIC(
887 const IceString &Name, Variable *Register,
888 std::function<void(Variable *PC)> Finish, bool SuppressMangling) {
889 assert(SandboxingType == ST_Nonsfi);
890 // We makeReg() here instead of getPhysicalRegister() because the latter ends
891 // up creating multi-blocks temporaries that liveness fails to validate.
892 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
893
894 auto *AddPcReloc = RelocOffset::create(Ctx);
895 AddPcReloc->setSubtract(true);
896 auto *AddPcLabel = InstARM32Label::create(Func, this);
897 AddPcLabel->setRelocOffset(AddPcReloc);
898
899 const IceString EmitText = Name;
900 // We need a -8 in the relocation expression to account for the pc's value
901 // read by the first instruction emitted in Finish(PC).
902 auto *Imm8 = RelocOffset::create(Ctx, -8);
903
904 auto *MovwReloc = RelocOffset::create(Ctx);
905 auto *MovwLabel = InstARM32Label::create(Func, this);
906 MovwLabel->setRelocOffset(MovwReloc);
907
908 auto *MovtReloc = RelocOffset::create(Ctx);
909 auto *MovtLabel = InstARM32Label::create(Func, this);
910 MovtLabel->setRelocOffset(MovtReloc);
911
912 // The EmitString for these constant relocatables have hardcoded offsets
913 // attached to them. This could be dangerous if, e.g., we ever implemented
914 // instruction scheduling but llvm-mc currently does not support
915 //
916 // movw reg, #:lower16:(Symbol - Label - Number)
917 // movt reg, #:upper16:(Symbol - Label - Number)
918 //
919 // relocations.
920 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name,
921 EmitText + " -16", SuppressMangling);
922 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name,
923 EmitText + " -12", SuppressMangling);
924
925 Context.insert(MovwLabel);
926 _movw(Register, CRLower);
927 Context.insert(MovtLabel);
928 _movt(Register, CRUpper);
929 // PC = fake-def to keep liveness consistent.
930 Context.insert<InstFakeDef>(PC);
931 Context.insert(AddPcLabel);
932 Finish(PC);
933 }
934
789 void TargetARM32::translateO2() { 935 void TargetARM32::translateO2() {
790 TimerMarker T(TimerStack::TT_O2, Func); 936 TimerMarker T(TimerStack::TT_O2, Func);
791 937
792 // TODO(stichnot): share passes with X86? 938 // TODO(stichnot): share passes with other targets?
793 // https://code.google.com/p/nativeclient/issues/detail?id=4094 939 // https://code.google.com/p/nativeclient/issues/detail?id=4094
940 if (SandboxingType == ST_Nonsfi) {
941 createGotPtr();
942 }
794 genTargetHelperCalls(); 943 genTargetHelperCalls();
795 findMaxStackOutArgsSize(); 944 findMaxStackOutArgsSize();
796 945
797 // Do not merge Alloca instructions, and lay out the stack. 946 // Do not merge Alloca instructions, and lay out the stack.
798 static constexpr bool SortAndCombineAllocas = true; 947 static constexpr bool SortAndCombineAllocas = true;
799 Func->processAllocas(SortAndCombineAllocas); 948 Func->processAllocas(SortAndCombineAllocas);
800 Func->dump("After Alloca processing"); 949 Func->dump("After Alloca processing");
801 950
802 if (!Ctx->getFlags().getPhiEdgeSplit()) { 951 if (!Ctx->getFlags().getPhiEdgeSplit()) {
803 // Lower Phi instructions. 952 // Lower Phi instructions.
(...skipping 26 matching lines...) Expand all
830 return; 979 return;
831 980
832 // TODO: It should be sufficient to use the fastest liveness calculation, 981 // TODO: It should be sufficient to use the fastest liveness calculation,
833 // i.e. livenessLightweight(). However, for some reason that slows down the 982 // i.e. livenessLightweight(). However, for some reason that slows down the
834 // rest of the translation. Investigate. 983 // rest of the translation. Investigate.
835 Func->liveness(Liveness_Basic); 984 Func->liveness(Liveness_Basic);
836 if (Func->hasError()) 985 if (Func->hasError())
837 return; 986 return;
838 Func->dump("After ARM32 address mode opt"); 987 Func->dump("After ARM32 address mode opt");
839 988
989 if (SandboxingType == ST_Nonsfi) {
990 insertGotPtrInitPlaceholder();
991 }
840 Func->genCode(); 992 Func->genCode();
841 if (Func->hasError()) 993 if (Func->hasError())
842 return; 994 return;
843 Func->dump("After ARM32 codegen"); 995 Func->dump("After ARM32 codegen");
844 996
845 // Register allocation. This requires instruction renumbering and full 997 // Register allocation. This requires instruction renumbering and full
846 // liveness analysis. 998 // liveness analysis.
847 Func->renumberInstructions(); 999 Func->renumberInstructions();
848 if (Func->hasError()) 1000 if (Func->hasError())
849 return; 1001 return;
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
894 1046
895 // Nop insertion 1047 // Nop insertion
896 if (Ctx->getFlags().shouldDoNopInsertion()) { 1048 if (Ctx->getFlags().shouldDoNopInsertion()) {
897 Func->doNopInsertion(); 1049 Func->doNopInsertion();
898 } 1050 }
899 } 1051 }
900 1052
901 void TargetARM32::translateOm1() { 1053 void TargetARM32::translateOm1() {
902 TimerMarker T(TimerStack::TT_Om1, Func); 1054 TimerMarker T(TimerStack::TT_Om1, Func);
903 1055
904 // TODO: share passes with X86? 1056 // TODO(stichnot): share passes with other targets?
1057 if (SandboxingType == ST_Nonsfi) {
1058 createGotPtr();
1059 }
1060
905 genTargetHelperCalls(); 1061 genTargetHelperCalls();
906 findMaxStackOutArgsSize(); 1062 findMaxStackOutArgsSize();
907 1063
908 // Do not merge Alloca instructions, and lay out the stack. 1064 // Do not merge Alloca instructions, and lay out the stack.
909 static constexpr bool DontSortAndCombineAllocas = false; 1065 static constexpr bool DontSortAndCombineAllocas = false;
910 Func->processAllocas(DontSortAndCombineAllocas); 1066 Func->processAllocas(DontSortAndCombineAllocas);
911 Func->dump("After Alloca processing"); 1067 Func->dump("After Alloca processing");
912 1068
913 Func->placePhiLoads(); 1069 Func->placePhiLoads();
914 if (Func->hasError()) 1070 if (Func->hasError())
915 return; 1071 return;
916 Func->placePhiStores(); 1072 Func->placePhiStores();
917 if (Func->hasError()) 1073 if (Func->hasError())
918 return; 1074 return;
919 Func->deletePhis(); 1075 Func->deletePhis();
920 if (Func->hasError()) 1076 if (Func->hasError())
921 return; 1077 return;
922 Func->dump("After Phi lowering"); 1078 Func->dump("After Phi lowering");
923 1079
924 Func->doArgLowering(); 1080 Func->doArgLowering();
925 1081
1082 if (SandboxingType == ST_Nonsfi) {
1083 insertGotPtrInitPlaceholder();
1084 }
926 Func->genCode(); 1085 Func->genCode();
927 if (Func->hasError()) 1086 if (Func->hasError())
928 return; 1087 return;
929 Func->dump("After initial ARM32 codegen"); 1088 Func->dump("After initial ARM32 codegen");
930 1089
931 regAlloc(RAK_InfOnly); 1090 regAlloc(RAK_InfOnly);
932 if (Func->hasError()) 1091 if (Func->hasError())
933 return; 1092 return;
934 1093
935 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); 1094 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
(...skipping 475 matching lines...) Expand 10 before | Expand all | Expand 10 after
1411 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 1570 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1412 1571
1413 // Fill in stack offsets for stack args, and copy args into registers for 1572 // Fill in stack offsets for stack args, and copy args into registers for
1414 // those that were register-allocated. Args are pushed right to left, so 1573 // those that were register-allocated. Args are pushed right to left, so
1415 // Arg[0] is closest to the stack/frame pointer. 1574 // Arg[0] is closest to the stack/frame pointer.
1416 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 1575 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1417 size_t BasicFrameOffset = PreservedRegsSizeBytes; 1576 size_t BasicFrameOffset = PreservedRegsSizeBytes;
1418 if (!UsesFramePointer) 1577 if (!UsesFramePointer)
1419 BasicFrameOffset += SpillAreaSizeBytes; 1578 BasicFrameOffset += SpillAreaSizeBytes;
1420 1579
1580 materializeGotAddr(Node);
1581
1421 const VarList &Args = Func->getArgs(); 1582 const VarList &Args = Func->getArgs();
1422 size_t InArgsSizeBytes = 0; 1583 size_t InArgsSizeBytes = 0;
1423 TargetARM32::CallingConv CC; 1584 TargetARM32::CallingConv CC;
1424 for (Variable *Arg : Args) { 1585 for (Variable *Arg : Args) {
1425 RegNumT DummyReg; 1586 RegNumT DummyReg;
1426 const Type Ty = Arg->getType(); 1587 const Type Ty = Arg->getType();
1427 1588
1428 // Skip arguments passed in registers. 1589 // Skip arguments passed in registers.
1429 if (isScalarIntegerType(Ty)) { 1590 if (isScalarIntegerType(Ty)) {
1430 if (CC.argInGPR(Ty, &DummyReg)) { 1591 if (CC.argInGPR(Ty, &DummyReg)) {
(...skipping 2102 matching lines...) Expand 10 before | Expand all | Expand 10 after
3533 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); 3694 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3534 break; 3695 break;
3535 } 3696 }
3536 } 3697 }
3537 3698
3538 // Note: To allow far calls, even for constant relocatables, we force 3699 // Note: To allow far calls, even for constant relocatables, we force
3539 // the call target into a register, and make an indirect call. 3700 // the call target into a register, and make an indirect call.
3540 CallTarget = legalizeToReg(CallTarget); 3701 CallTarget = legalizeToReg(CallTarget);
3541 3702
3542 // Copy arguments to be passed in registers to the appropriate registers. 3703 // Copy arguments to be passed in registers to the appropriate registers.
3704 CfgVector<Variable *> RegArgs;
3543 for (auto &FPArg : FPArgs) { 3705 for (auto &FPArg : FPArgs) {
3544 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); 3706 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3545 Context.insert<InstFakeUse>(Reg);
3546 } 3707 }
3547 for (auto &GPRArg : GPRArgs) { 3708 for (auto &GPRArg : GPRArgs) {
3548 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); 3709 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3549 // Generate a FakeUse of register arguments so that they do not get dead 3710 }
3550 // code eliminated as a result of the FakeKill of scratch registers after 3711
3551 // the call. 3712 // Generate a FakeUse of register arguments so that they do not get dead code
3552 Context.insert<InstFakeUse>(Reg); 3713 // eliminated as a result of the FakeKill of scratch registers after the call.
3714 // These fake-uses need to be placed here to avoid argument registers from
3715 // being used during the legalizeToReg() calls above.
3716 for (auto *RegArg : RegArgs) {
3717 Context.insert<InstFakeUse>(RegArg);
3553 } 3718 }
3554 3719
3555 InstARM32Call *NewCall = 3720 InstARM32Call *NewCall =
3556 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); 3721 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3557 3722
3558 if (ReturnRegHi) 3723 if (ReturnRegHi)
3559 Context.insert<InstFakeDef>(ReturnRegHi); 3724 Context.insert<InstFakeDef>(ReturnRegHi);
3560 3725
3561 // Insert a register-kill pseudo instruction. 3726 // Insert a register-kill pseudo instruction.
3562 Context.insert<InstFakeKill>(NewCall); 3727 Context.insert<InstFakeKill>(NewCall);
(...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after
3947 CondARM32::Cond CC0; 4112 CondARM32::Cond CC0;
3948 CondARM32::Cond CC1; 4113 CondARM32::Cond CC1;
3949 } TableFcmp[] = { 4114 } TableFcmp[] = {
3950 #define X(val, CC0, CC1) \ 4115 #define X(val, CC0, CC1) \
3951 { CondARM32::CC0, CondARM32::CC1 } \ 4116 { CondARM32::CC0, CondARM32::CC1 } \
3952 , 4117 ,
3953 FCMPARM32_TABLE 4118 FCMPARM32_TABLE
3954 #undef X 4119 #undef X
3955 }; 4120 };
3956 4121
3957 bool isFloatingPointZero(Operand *Src) { 4122 bool isFloatingPointZero(const Operand *Src) {
3958 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { 4123 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
3959 return Utils::isPositiveZero(F32->getValue()); 4124 return Utils::isPositiveZero(F32->getValue());
3960 } 4125 }
3961 4126
3962 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { 4127 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
3963 return Utils::isPositiveZero(F64->getValue()); 4128 return Utils::isPositiveZero(F64->getValue());
3964 } 4129 }
3965 4130
3966 return false; 4131 return false;
3967 } 4132 }
3968 } // end of anonymous namespace 4133 } // end of anonymous namespace
3969 4134
3970 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { 4135 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
3971 InstFcmp::FCond Condition = Instr->getCondition(); 4136 InstFcmp::FCond Condition = Instr->getCondition();
3972 switch (Condition) { 4137 switch (Condition) {
(...skipping 912 matching lines...) Expand 10 before | Expand all | Expand 10 after
4885 case Intrinsics::Memcpy: { 5050 case Intrinsics::Memcpy: {
4886 llvm::report_fatal_error("memcpy should have been prelowered."); 5051 llvm::report_fatal_error("memcpy should have been prelowered.");
4887 } 5052 }
4888 case Intrinsics::Memmove: { 5053 case Intrinsics::Memmove: {
4889 llvm::report_fatal_error("memmove should have been prelowered."); 5054 llvm::report_fatal_error("memmove should have been prelowered.");
4890 } 5055 }
4891 case Intrinsics::Memset: { 5056 case Intrinsics::Memset: {
4892 llvm::report_fatal_error("memmove should have been prelowered."); 5057 llvm::report_fatal_error("memmove should have been prelowered.");
4893 } 5058 }
4894 case Intrinsics::NaClReadTP: { 5059 case Intrinsics::NaClReadTP: {
4895 if (!NeedSandboxing) { 5060 if (SandboxingType != ST_NaCl) {
4896 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); 5061 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
4897 } 5062 }
4898 Variable *TP = legalizeToReg(OperandARM32Mem::create( 5063 Variable *TP = legalizeToReg(OperandARM32Mem::create(
4899 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), 5064 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
4900 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); 5065 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
4901 _mov(Dest, TP); 5066 _mov(Dest, TP);
4902 return; 5067 return;
4903 } 5068 }
4904 case Intrinsics::Setjmp: { 5069 case Intrinsics::Setjmp: {
4905 llvm::report_fatal_error("setjmp should have been prelowered."); 5070 llvm::report_fatal_error("setjmp should have been prelowered.");
(...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after
5545 _cmp(Src0Var, Value); 5710 _cmp(Src0Var, Value);
5546 _br(Instr->getLabel(I), CondARM32::EQ); 5711 _br(Instr->getLabel(I), CondARM32::EQ);
5547 } 5712 }
5548 _br(Instr->getLabelDefault()); 5713 _br(Instr->getLabelDefault());
5549 } 5714 }
5550 5715
5551 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { 5716 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
5552 _trap(); 5717 _trap();
5553 } 5718 }
5554 5719
5720 namespace {
5721 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
5722 // and fp constants will need access to the GOT address.
5723 bool operandNeedsGot(const Operand *Opnd) {
5724 if (llvm::isa<ConstantRelocatable>(Opnd)) {
5725 return true;
5726 }
5727
5728 if (llvm::isa<ConstantFloat>(Opnd)) {
5729 uint32_t _;
5730 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
5731 }
5732
5733 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
5734 if (F64 != nullptr) {
5735 uint32_t _;
5736 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
5737 !isFloatingPointZero(F64);
5738 }
5739
5740 return false;
5741 }
5742
5743 // Returns whether Phi needs the GOT address (which it does if any of its
5744 // operands needs the GOT address.)
5745 bool phiNeedsGot(const InstPhi *Phi) {
5746 if (Phi->isDeleted()) {
5747 return false;
5748 }
5749
5750 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
5751 if (operandNeedsGot(Phi->getSrc(I))) {
5752 return true;
5753 }
5754 }
5755
5756 return false;
5757 }
5758
5759 // Returns whether **any** phi in Node needs the GOT address.
5760 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
5761 for (auto &Inst : Node->getPhis()) {
5762 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
5763 return true;
5764 }
5765 }
5766 return false;
5767 }
5768
5769 } // end of anonymous namespace
5770
5555 void TargetARM32::prelowerPhis() { 5771 void TargetARM32::prelowerPhis() {
5556 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); 5772 CfgNode *Node = Context.getNode();
5773
5774 if (SandboxingType == ST_Nonsfi) {
5775 assert(GotPtr != nullptr);
5776 if (anyPhiInNodeNeedsGot(Node)) {
5777 // If any phi instruction needs the GOT address, we place a
5778 // fake-use GotPtr
5779 // in Node to prevent the GotPtr's initialization from being dead code
5780 // eliminated.
5781 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
5782 }
5783 }
5784
5785 PhiLowering::prelowerPhis32Bit(this, Node, Func);
5557 } 5786 }
5558 5787
5559 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) { 5788 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
5560 Variable *Reg = makeReg(Ty, RegNum); 5789 Variable *Reg = makeReg(Ty, RegNum);
5561 Context.insert<InstFakeDef>(Reg); 5790 Context.insert<InstFakeDef>(Reg);
5562 assert(isVectorType(Ty)); 5791 assert(isVectorType(Ty));
5563 _veor(Reg, Reg, Reg); 5792 _veor(Reg, Reg, Reg);
5564 return Reg; 5793 return Reg;
5565 } 5794 }
5566 5795
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
5709 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 5938 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5710 _movw(Reg, 5939 _movw(Reg,
5711 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); 5940 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
5712 if (UpperBits != 0) { 5941 if (UpperBits != 0) {
5713 _movt(Reg, Ctx->getConstantInt32(UpperBits)); 5942 _movt(Reg, Ctx->getConstantInt32(UpperBits));
5714 } 5943 }
5715 return Reg; 5944 return Reg;
5716 } 5945 }
5717 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 5946 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5718 Variable *Reg = makeReg(Ty, RegNum); 5947 Variable *Reg = makeReg(Ty, RegNum);
5719 _movw(Reg, C); 5948 if (SandboxingType != ST_Nonsfi) {
5720 _movt(Reg, C); 5949 _movw(Reg, C);
5950 _movt(Reg, C);
5951 } else {
5952 auto *GotAddr = legalizeToReg(GotPtr);
5953 const IceString CGotoffName = createGotoffRelocation(C);
5954 loadNamedConstantRelocatablePIC(
5955 CGotoffName, Reg, [this, Reg](Variable *PC) {
5956 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
5957 });
5958 _add(Reg, GotAddr, Reg);
5959 }
5721 return Reg; 5960 return Reg;
5722 } else { 5961 } else {
5723 assert(isScalarFloatingType(Ty)); 5962 assert(isScalarFloatingType(Ty));
5724 uint32_t ModifiedImm; 5963 uint32_t ModifiedImm;
5725 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { 5964 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
5726 Variable *T = makeReg(Ty, RegNum); 5965 Variable *T = makeReg(Ty, RegNum);
5727 _mov(T, 5966 _mov(T,
5728 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); 5967 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
5729 return T; 5968 return T;
5730 } 5969 }
5731 5970
5732 if (Ty == IceType_f64 && isFloatingPointZero(From)) { 5971 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
5733 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 5972 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
5734 // because ARM does not have a veor instruction with S registers. 5973 // because ARM does not have a veor instruction with S registers.
5735 Variable *T = makeReg(IceType_f64, RegNum); 5974 Variable *T = makeReg(IceType_f64, RegNum);
5736 Context.insert<InstFakeDef>(T); 5975 Context.insert<InstFakeDef>(T);
5737 _veor(T, T, T); 5976 _veor(T, T, T);
5738 return T; 5977 return T;
5739 } 5978 }
5740 5979
5741 // Load floats/doubles from literal pool. 5980 // Load floats/doubles from literal pool.
5742 std::string Buffer; 5981 std::string Buffer;
5743 llvm::raw_string_ostream StrBuf(Buffer); 5982 llvm::raw_string_ostream StrBuf(Buffer);
5744 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); 5983 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx);
5745 llvm::cast<Constant>(From)->setShouldBePooled(true); 5984 llvm::cast<Constant>(From)->setShouldBePooled(true);
5746 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 5985 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
5747 Variable *BaseReg = makeReg(getPointerType()); 5986 Variable *BaseReg = nullptr;
5748 _movw(BaseReg, Offset); 5987 if (SandboxingType == ST_Nonsfi) {
5749 _movt(BaseReg, Offset); 5988 // vldr does not support the [base, index] addressing mode, so we need
5989 // to legalize Offset to a register. Otherwise, we could simply
5990 // vldr dest, [got, reg(Offset)]
5991 BaseReg = legalizeToReg(Offset);
5992 } else {
5993 BaseReg = makeReg(getPointerType());
5994 _movw(BaseReg, Offset);
5995 _movt(BaseReg, Offset);
5996 }
5750 From = formMemoryOperand(BaseReg, Ty); 5997 From = formMemoryOperand(BaseReg, Ty);
5751 return copyToReg(From, RegNum); 5998 return copyToReg(From, RegNum);
5752 } 5999 }
5753 } 6000 }
5754 6001
5755 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 6002 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
5756 if (Var->isRematerializable()) { 6003 if (Var->isRematerializable()) {
5757 if (Allowed & Legal_Rematerializable) { 6004 if (Allowed & Legal_Rematerializable) {
5758 return From; 6005 return From;
5759 } 6006 }
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after
6623 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 6870 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
6624 } 6871 }
6625 6872
6626 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; 6873 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
6627 llvm::SmallBitVector 6874 llvm::SmallBitVector
6628 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 6875 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
6629 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 6876 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
6630 6877
6631 } // end of namespace ARM32 6878 } // end of namespace ARM32
6632 } // end of namespace Ice 6879 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/llvm2ice_tests/nonsfi.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698