OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); | 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); |
46 } | 46 } |
47 | 47 |
48 std::unique_ptr<::Ice::TargetHeaderLowering> | 48 std::unique_ptr<::Ice::TargetHeaderLowering> |
49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { | 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { |
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); | 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); |
51 } | 51 } |
52 | 52 |
53 void staticInit(::Ice::GlobalContext *Ctx) { | 53 void staticInit(::Ice::GlobalContext *Ctx) { |
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); | 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); |
55 if (Ctx->getFlags().getUseNonsfi()) { | |
56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing | |
57 // globals. The GOT is an external symbol (i.e., it is not defined in the | |
58 // pexe) so we need to register it as such so that ELF emission won't barf | |
59 // on an "unknown" symbol. The GOT is added to the External symbols list | |
60 // here because staticInit() is invoked in a single-thread context. | |
61 Ctx->getConstantExternSym(::Ice::GlobalOffsetTable); | |
62 } | |
55 } | 63 } |
56 | 64 |
57 } // end of namespace ARM32 | 65 } // end of namespace ARM32 |
58 | 66 |
59 namespace Ice { | 67 namespace Ice { |
60 namespace ARM32 { | 68 namespace ARM32 { |
61 | 69 |
62 namespace { | 70 namespace { |
63 | 71 |
64 /// SizeOf is used to obtain the size of an initializer list as a constexpr | 72 /// SizeOf is used to obtain the size of an initializer list as a constexpr |
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
271 ; | 279 ; |
272 std::array<uint32_t, NumVec128Args> Vec128ArgInitializer; | 280 std::array<uint32_t, NumVec128Args> Vec128ArgInitializer; |
273 | 281 |
274 IceString getRegClassName(RegClass C) { | 282 IceString getRegClassName(RegClass C) { |
275 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C); | 283 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C); |
276 assert(ClassNum < RegARM32::RCARM32_NUM); | 284 assert(ClassNum < RegARM32::RCARM32_NUM); |
277 switch (ClassNum) { | 285 switch (ClassNum) { |
278 default: | 286 default: |
279 assert(C < RC_Target); | 287 assert(C < RC_Target); |
280 return regClassString(C); | 288 return regClassString(C); |
281 // Add handling of new register classes below. | 289 // Add handling of new register classes below. |
290 case RegARM32::RCARM32_QtoS: | |
291 return "QtoS"; | |
282 } | 292 } |
283 } | 293 } |
284 | 294 |
285 } // end of anonymous namespace | 295 } // end of anonymous namespace |
286 | 296 |
287 TargetARM32::TargetARM32(Cfg *Func) | 297 TargetARM32::TargetARM32(Cfg *Func) |
288 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), | 298 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), |
289 CPUFeatures(Func->getContext()->getFlags()) {} | 299 CPUFeatures(Func->getContext()->getFlags()) {} |
290 | 300 |
291 void TargetARM32::staticInit(GlobalContext *Ctx) { | 301 void TargetARM32::staticInit(GlobalContext *Ctx) { |
(...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
692 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); | 702 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); |
693 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 703 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, |
694 NoTailCall, IsTargetHelperCall); | 704 NoTailCall, IsTargetHelperCall); |
695 Call->addArg(IntrinsicCall->getArg(0)); | 705 Call->addArg(IntrinsicCall->getArg(0)); |
696 Call->addArg(ValExt); | 706 Call->addArg(ValExt); |
697 Call->addArg(IntrinsicCall->getArg(2)); | 707 Call->addArg(IntrinsicCall->getArg(2)); |
698 Instr->setDeleted(); | 708 Instr->setDeleted(); |
699 return; | 709 return; |
700 } | 710 } |
701 case Intrinsics::NaClReadTP: { | 711 case Intrinsics::NaClReadTP: { |
702 if (NeedSandboxing) { | 712 if (SandboxingType == ST_NaCl) { |
703 return; | 713 return; |
704 } | 714 } |
705 static constexpr SizeT MaxArgs = 0; | 715 static constexpr SizeT MaxArgs = 0; |
706 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); | 716 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); |
707 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, | 717 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, |
708 IsTargetHelperCall); | 718 IsTargetHelperCall); |
709 Instr->setDeleted(); | 719 Instr->setDeleted(); |
710 return; | 720 return; |
711 } | 721 } |
712 case Intrinsics::Setjmp: { | 722 case Intrinsics::Setjmp: { |
(...skipping 22 matching lines...) Expand all Loading... | |
735 PostIncrLoweringContext PostIncrement(Context); | 745 PostIncrLoweringContext PostIncrement(Context); |
736 Inst *CurInstr = Context.getCur(); | 746 Inst *CurInstr = Context.getCur(); |
737 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 747 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
738 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 748 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
739 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 749 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
740 } | 750 } |
741 } | 751 } |
742 } | 752 } |
743 } | 753 } |
744 | 754 |
755 void TargetARM32::createGotPtr() { | |
756 if (SandboxingType != ST_Nonsfi) { | |
757 return; | |
758 } | |
759 GotPtr = Func->makeVariable(IceType_i32); | |
760 } | |
761 | |
762 void TargetARM32::insertGotPtrInitPlaceholder() { | |
763 if (SandboxingType != ST_Nonsfi) { | |
764 return; | |
765 } | |
766 assert(GotPtr != nullptr); | |
767 // We add the two placeholder instructions here. The first fakedefs T, an | |
768 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T. | |
769 // This is needed because the GotPtr initialization, if needed, will require | |
770 // a register: | |
771 // | |
772 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - . | |
773 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - . | |
774 // add reg, pc, reg | |
775 // mov GotPtr, reg | |
776 // | |
777 // If GotPtr is not used, then both these pseudo-instructions are dce'd. | |
778 Variable *T = makeReg(IceType_i32); | |
779 Context.insert<InstFakeDef>(T); | |
780 Context.insert<InstFakeDef>(GotPtr, T); | |
781 } | |
782 | |
783 IceString TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) { | |
784 const IceString &CRName = CR->getName(); | |
785 const IceString CRGotoffName = | |
786 "GOTOFF$" + Func->getFunctionName() + "$" + CRName; | |
787 if (KnownGotoffs.count(CRGotoffName) == 0) { | |
788 auto *Global = VariableDeclaration::create(Ctx); | |
789 Global->setIsConstant(true); | |
790 Global->setName(CRName); | |
791 Global->setSuppressMangling(); | |
792 | |
793 auto *Gotoff = VariableDeclaration::create(Ctx); | |
794 constexpr auto GotFixup = R_ARM_GOTOFF32; | |
795 Gotoff->setIsConstant(true); | |
796 Gotoff->setName(CRGotoffName); | |
797 Gotoff->setSuppressMangling(); | |
798 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create( | |
799 Global, {RelocOffset::create(Ctx, 0)}, GotFixup)); | |
800 Func->addGlobal(Gotoff); | |
801 KnownGotoffs.emplace(CRGotoffName); | |
802 } | |
803 return CRGotoffName; | |
804 } | |
805 | |
806 void TargetARM32::materializeGotAddr(CfgNode *Node) { | |
807 if (SandboxingType != ST_Nonsfi) { | |
808 return; | |
809 } | |
810 | |
811 // At first, we try to find the | |
812 // GotPtr = def T | |
813 // pseudo-instruction that we placed for defining the got ptr. That | |
814 // instruction is not just a place-holder for defining the GotPtr (thus | |
815 // keeping liveness consistent), but it is also located at a point where it is | |
816 // safe to materialize the got addr -- i.e., before loading parameters to | |
817 // registers, but after moving register parameters from their home location. | |
818 InstFakeDef *DefGotPtr = nullptr; | |
819 for (auto &Inst : Node->getInsts()) { | |
820 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst); | |
821 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) { | |
822 DefGotPtr = FakeDef; | |
823 break; | |
824 } | |
825 } | |
826 | |
827 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) { | |
828 return; | |
829 } | |
830 | |
831 // The got addr needs to be materialized at the same point where DefGotPtr | |
832 // lives. | |
833 Context.setInsertPoint(DefGotPtr); | |
834 assert(DefGotPtr->getSrcSize() == 1); | |
835 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0)); | |
836 loadNamedConstantRelocatablePIC(GlobalOffsetTable, T, | |
837 [this, T](Variable *PC) { _add(T, PC, T); }); | |
838 _mov(GotPtr, T); | |
839 DefGotPtr->setDeleted(); | |
840 } | |
841 | |
842 void TargetARM32::loadNamedConstantRelocatablePIC( | |
843 const IceString &Name, Variable *Register, | |
844 std::function<void(Variable *PC)> Finish, bool SuppressMangling) { | |
845 assert(SandboxingType == ST_Nonsfi); | |
846 // We makeReg() here instead of getPhysicalRegister() because the latter ends | |
847 // up creating multi-blocks temporaries that liveness fails to validate. | |
848 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); | |
849 | |
850 auto *AddPcReloc = RelocOffset::create(Ctx); | |
851 AddPcReloc->setSubtract(true); | |
852 auto *AddPcLabel = InstARM32Label::create(Func, this); | |
853 AddPcLabel->setRelocOffset(AddPcReloc); | |
854 | |
855 const IceString EmitText = Name; | |
856 // We need a -8 in the relocation expression to account for the pc's value | |
857 // read by the first instruction emitted in Finish(PC). | |
858 auto *Imm8 = RelocOffset::create(Ctx, -8); | |
859 | |
860 auto *MovwReloc = RelocOffset::create(Ctx); | |
861 auto *MovwLabel = InstARM32Label::create(Func, this); | |
862 MovwLabel->setRelocOffset(MovwReloc); | |
863 | |
864 auto *MovtReloc = RelocOffset::create(Ctx); | |
865 auto *MovtLabel = InstARM32Label::create(Func, this); | |
866 MovtLabel->setRelocOffset(MovtReloc); | |
867 | |
868 // The EmitString for these constant relocatables have hardcoded offsets | |
869 // attached to them. This could be dangerous if, e.g., we ever implemented | |
870 // instruction scheduling but llvm-mc currently does not support | |
871 // | |
872 // movw reg, #:lower16:(Symbol - Label - Number) | |
873 // movt reg, #:upper16:(Symbol - Label - Number) | |
874 // | |
875 // relocations. | |
876 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name, | |
877 EmitText + " -16", SuppressMangling); | |
878 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name, | |
879 EmitText + " -12", SuppressMangling); | |
880 | |
881 Context.insert(MovwLabel); | |
882 _movw(Register, CRLower); | |
883 Context.insert(MovtLabel); | |
884 _movt(Register, CRUpper); | |
885 // PC = fake-def to keep liveness consistent. | |
886 Context.insert<InstFakeDef>(PC); | |
887 Context.insert(AddPcLabel); | |
888 Finish(PC); | |
889 } | |
890 | |
745 void TargetARM32::translateO2() { | 891 void TargetARM32::translateO2() { |
746 TimerMarker T(TimerStack::TT_O2, Func); | 892 TimerMarker T(TimerStack::TT_O2, Func); |
747 | 893 |
748 // TODO(stichnot): share passes with X86? | 894 // TODO(stichnot): share passes with other targets? |
749 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 895 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
896 if (SandboxingType == ST_Nonsfi) { | |
897 createGotPtr(); | |
898 } | |
750 genTargetHelperCalls(); | 899 genTargetHelperCalls(); |
751 findMaxStackOutArgsSize(); | 900 findMaxStackOutArgsSize(); |
752 | 901 |
753 // Do not merge Alloca instructions, and lay out the stack. | 902 // Do not merge Alloca instructions, and lay out the stack. |
754 static constexpr bool SortAndCombineAllocas = true; | 903 static constexpr bool SortAndCombineAllocas = true; |
755 Func->processAllocas(SortAndCombineAllocas); | 904 Func->processAllocas(SortAndCombineAllocas); |
756 Func->dump("After Alloca processing"); | 905 Func->dump("After Alloca processing"); |
757 | 906 |
758 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 907 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
759 // Lower Phi instructions. | 908 // Lower Phi instructions. |
(...skipping 26 matching lines...) Expand all Loading... | |
786 return; | 935 return; |
787 | 936 |
788 // TODO: It should be sufficient to use the fastest liveness calculation, | 937 // TODO: It should be sufficient to use the fastest liveness calculation, |
789 // i.e. livenessLightweight(). However, for some reason that slows down the | 938 // i.e. livenessLightweight(). However, for some reason that slows down the |
790 // rest of the translation. Investigate. | 939 // rest of the translation. Investigate. |
791 Func->liveness(Liveness_Basic); | 940 Func->liveness(Liveness_Basic); |
792 if (Func->hasError()) | 941 if (Func->hasError()) |
793 return; | 942 return; |
794 Func->dump("After ARM32 address mode opt"); | 943 Func->dump("After ARM32 address mode opt"); |
795 | 944 |
945 if (SandboxingType == ST_Nonsfi) { | |
946 insertGotPtrInitPlaceholder(); | |
947 } | |
796 Func->genCode(); | 948 Func->genCode(); |
797 if (Func->hasError()) | 949 if (Func->hasError()) |
798 return; | 950 return; |
799 Func->dump("After ARM32 codegen"); | 951 Func->dump("After ARM32 codegen"); |
800 | 952 |
801 // Register allocation. This requires instruction renumbering and full | 953 // Register allocation. This requires instruction renumbering and full |
802 // liveness analysis. | 954 // liveness analysis. |
803 Func->renumberInstructions(); | 955 Func->renumberInstructions(); |
804 if (Func->hasError()) | 956 if (Func->hasError()) |
805 return; | 957 return; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
850 | 1002 |
851 // Nop insertion | 1003 // Nop insertion |
852 if (Ctx->getFlags().shouldDoNopInsertion()) { | 1004 if (Ctx->getFlags().shouldDoNopInsertion()) { |
853 Func->doNopInsertion(); | 1005 Func->doNopInsertion(); |
854 } | 1006 } |
855 } | 1007 } |
856 | 1008 |
857 void TargetARM32::translateOm1() { | 1009 void TargetARM32::translateOm1() { |
858 TimerMarker T(TimerStack::TT_Om1, Func); | 1010 TimerMarker T(TimerStack::TT_Om1, Func); |
859 | 1011 |
860 // TODO: share passes with X86? | 1012 // TODO(stichnot): share passes with other targets? |
1013 if (SandboxingType == ST_Nonsfi) { | |
1014 createGotPtr(); | |
1015 } | |
1016 | |
861 genTargetHelperCalls(); | 1017 genTargetHelperCalls(); |
862 findMaxStackOutArgsSize(); | 1018 findMaxStackOutArgsSize(); |
863 | 1019 |
864 // Do not merge Alloca instructions, and lay out the stack. | 1020 // Do not merge Alloca instructions, and lay out the stack. |
865 static constexpr bool DontSortAndCombineAllocas = false; | 1021 static constexpr bool DontSortAndCombineAllocas = false; |
866 Func->processAllocas(DontSortAndCombineAllocas); | 1022 Func->processAllocas(DontSortAndCombineAllocas); |
867 Func->dump("After Alloca processing"); | 1023 Func->dump("After Alloca processing"); |
868 | 1024 |
869 Func->placePhiLoads(); | 1025 Func->placePhiLoads(); |
870 if (Func->hasError()) | 1026 if (Func->hasError()) |
871 return; | 1027 return; |
872 Func->placePhiStores(); | 1028 Func->placePhiStores(); |
873 if (Func->hasError()) | 1029 if (Func->hasError()) |
874 return; | 1030 return; |
875 Func->deletePhis(); | 1031 Func->deletePhis(); |
876 if (Func->hasError()) | 1032 if (Func->hasError()) |
877 return; | 1033 return; |
878 Func->dump("After Phi lowering"); | 1034 Func->dump("After Phi lowering"); |
879 | 1035 |
880 Func->doArgLowering(); | 1036 Func->doArgLowering(); |
881 | 1037 |
1038 if (SandboxingType == ST_Nonsfi) { | |
1039 insertGotPtrInitPlaceholder(); | |
1040 } | |
882 Func->genCode(); | 1041 Func->genCode(); |
883 if (Func->hasError()) | 1042 if (Func->hasError()) |
884 return; | 1043 return; |
885 Func->dump("After initial ARM32 codegen"); | 1044 Func->dump("After initial ARM32 codegen"); |
886 | 1045 |
887 regAlloc(RAK_InfOnly); | 1046 regAlloc(RAK_InfOnly); |
888 if (Func->hasError()) | 1047 if (Func->hasError()) |
889 return; | 1048 return; |
890 | 1049 |
891 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | 1050 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
(...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1369 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1528 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
1370 | 1529 |
1371 // Fill in stack offsets for stack args, and copy args into registers for | 1530 // Fill in stack offsets for stack args, and copy args into registers for |
1372 // those that were register-allocated. Args are pushed right to left, so | 1531 // those that were register-allocated. Args are pushed right to left, so |
1373 // Arg[0] is closest to the stack/frame pointer. | 1532 // Arg[0] is closest to the stack/frame pointer. |
1374 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1533 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
1375 size_t BasicFrameOffset = PreservedRegsSizeBytes; | 1534 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
1376 if (!UsesFramePointer) | 1535 if (!UsesFramePointer) |
1377 BasicFrameOffset += SpillAreaSizeBytes; | 1536 BasicFrameOffset += SpillAreaSizeBytes; |
1378 | 1537 |
1538 materializeGotAddr(Node); | |
1539 | |
1379 const VarList &Args = Func->getArgs(); | 1540 const VarList &Args = Func->getArgs(); |
1380 size_t InArgsSizeBytes = 0; | 1541 size_t InArgsSizeBytes = 0; |
1381 TargetARM32::CallingConv CC; | 1542 TargetARM32::CallingConv CC; |
1382 for (Variable *Arg : Args) { | 1543 for (Variable *Arg : Args) { |
1383 int32_t DummyReg; | 1544 int32_t DummyReg; |
1384 const Type Ty = Arg->getType(); | 1545 const Type Ty = Arg->getType(); |
1385 | 1546 |
1386 // Skip arguments passed in registers. | 1547 // Skip arguments passed in registers. |
1387 if (isScalarIntegerType(Ty)) { | 1548 if (isScalarIntegerType(Ty)) { |
1388 if (CC.argInGPR(Ty, &DummyReg)) { | 1549 if (CC.argInGPR(Ty, &DummyReg)) { |
(...skipping 2104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3493 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); | 3654 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); |
3494 break; | 3655 break; |
3495 } | 3656 } |
3496 } | 3657 } |
3497 | 3658 |
3498 // Note: To allow far calls, even for constant relocatables, we force | 3659 // Note: To allow far calls, even for constant relocatables, we force |
3499 // the call target into a register, and make an indirect call. | 3660 // the call target into a register, and make an indirect call. |
3500 CallTarget = legalizeToReg(CallTarget); | 3661 CallTarget = legalizeToReg(CallTarget); |
3501 | 3662 |
3502 // Copy arguments to be passed in registers to the appropriate registers. | 3663 // Copy arguments to be passed in registers to the appropriate registers. |
3664 CfgVector<Variable *> RegArgs; | |
3503 for (auto &FPArg : FPArgs) { | 3665 for (auto &FPArg : FPArgs) { |
3504 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | 3666 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); |
3505 Context.insert<InstFakeUse>(Reg); | |
3506 } | 3667 } |
3507 for (auto &GPRArg : GPRArgs) { | 3668 for (auto &GPRArg : GPRArgs) { |
3508 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | 3669 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); |
3509 // Generate a FakeUse of register arguments so that they do not get dead | 3670 } |
3510 // code eliminated as a result of the FakeKill of scratch registers after | 3671 |
3511 // the call. | 3672 // Generate a FakeUse of register arguments so that they do not get dead code |
3512 Context.insert<InstFakeUse>(Reg); | 3673 // eliminated as a result of the FakeKill of scratch registers after the call. |
3674 // These fake-uses need to be placed here to avoid argument registers from | |
3675 // being used during the legalizeToReg() calls above. | |
3676 for (auto *RegArg : RegArgs) { | |
3677 Context.insert<InstFakeUse>(RegArg); | |
3513 } | 3678 } |
3514 | 3679 |
3515 InstARM32Call *NewCall = | 3680 InstARM32Call *NewCall = |
3516 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); | 3681 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); |
3517 | 3682 |
3518 if (ReturnRegHi) | 3683 if (ReturnRegHi) |
3519 Context.insert<InstFakeDef>(ReturnRegHi); | 3684 Context.insert<InstFakeDef>(ReturnRegHi); |
3520 | 3685 |
3521 // Insert a register-kill pseudo instruction. | 3686 // Insert a register-kill pseudo instruction. |
3522 Context.insert<InstFakeKill>(NewCall); | 3687 Context.insert<InstFakeKill>(NewCall); |
(...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3907 CondARM32::Cond CC0; | 4072 CondARM32::Cond CC0; |
3908 CondARM32::Cond CC1; | 4073 CondARM32::Cond CC1; |
3909 } TableFcmp[] = { | 4074 } TableFcmp[] = { |
3910 #define X(val, CC0, CC1) \ | 4075 #define X(val, CC0, CC1) \ |
3911 { CondARM32::CC0, CondARM32::CC1 } \ | 4076 { CondARM32::CC0, CondARM32::CC1 } \ |
3912 , | 4077 , |
3913 FCMPARM32_TABLE | 4078 FCMPARM32_TABLE |
3914 #undef X | 4079 #undef X |
3915 }; | 4080 }; |
3916 | 4081 |
3917 bool isFloatingPointZero(Operand *Src) { | 4082 bool isFloatingPointZero(const Operand *Src) { |
3918 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { | 4083 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) { |
3919 return Utils::isPositiveZero(F32->getValue()); | 4084 return Utils::isPositiveZero(F32->getValue()); |
3920 } | 4085 } |
3921 | 4086 |
3922 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { | 4087 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) { |
3923 return Utils::isPositiveZero(F64->getValue()); | 4088 return Utils::isPositiveZero(F64->getValue()); |
3924 } | 4089 } |
3925 | 4090 |
3926 return false; | 4091 return false; |
3927 } | 4092 } |
3928 } // end of anonymous namespace | 4093 } // end of anonymous namespace |
3929 | 4094 |
3930 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 4095 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
3931 InstFcmp::FCond Condition = Instr->getCondition(); | 4096 InstFcmp::FCond Condition = Instr->getCondition(); |
3932 switch (Condition) { | 4097 switch (Condition) { |
(...skipping 914 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4847 case Intrinsics::Memcpy: { | 5012 case Intrinsics::Memcpy: { |
4848 llvm::report_fatal_error("memcpy should have been prelowered."); | 5013 llvm::report_fatal_error("memcpy should have been prelowered."); |
4849 } | 5014 } |
4850 case Intrinsics::Memmove: { | 5015 case Intrinsics::Memmove: { |
4851 llvm::report_fatal_error("memmove should have been prelowered."); | 5016 llvm::report_fatal_error("memmove should have been prelowered."); |
4852 } | 5017 } |
4853 case Intrinsics::Memset: { | 5018 case Intrinsics::Memset: { |
4854 llvm::report_fatal_error("memmove should have been prelowered."); | 5019 llvm::report_fatal_error("memmove should have been prelowered."); |
4855 } | 5020 } |
4856 case Intrinsics::NaClReadTP: { | 5021 case Intrinsics::NaClReadTP: { |
4857 if (!NeedSandboxing) { | 5022 if (SandboxingType != ST_NaCl) { |
4858 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); | 5023 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); |
4859 } | 5024 } |
4860 Variable *TP = legalizeToReg(OperandARM32Mem::create( | 5025 Variable *TP = legalizeToReg(OperandARM32Mem::create( |
4861 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), | 5026 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), |
4862 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); | 5027 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); |
4863 _mov(Dest, TP); | 5028 _mov(Dest, TP); |
4864 return; | 5029 return; |
4865 } | 5030 } |
4866 case Intrinsics::Setjmp: { | 5031 case Intrinsics::Setjmp: { |
4867 llvm::report_fatal_error("setjmp should have been prelowered."); | 5032 llvm::report_fatal_error("setjmp should have been prelowered."); |
(...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5507 _cmp(Src0Var, Value); | 5672 _cmp(Src0Var, Value); |
5508 _br(Instr->getLabel(I), CondARM32::EQ); | 5673 _br(Instr->getLabel(I), CondARM32::EQ); |
5509 } | 5674 } |
5510 _br(Instr->getLabelDefault()); | 5675 _br(Instr->getLabelDefault()); |
5511 } | 5676 } |
5512 | 5677 |
5513 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { | 5678 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { |
5514 _trap(); | 5679 _trap(); |
5515 } | 5680 } |
5516 | 5681 |
5682 namespace { | |
5683 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables, | |
5684 // and fp constants will need access to the GOT address. | |
5685 bool operandNeedsGot(const Operand *Opnd) { | |
5686 if (llvm::isa<ConstantRelocatable>(Opnd)) { | |
5687 return true; | |
5688 } | |
5689 | |
5690 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd); | |
5691 if (F64 != nullptr || llvm::isa<ConstantFloat>(Opnd)) { | |
5692 uint32_t _; | |
5693 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_); | |
5694 } | |
5695 | |
5696 if (F64 != nullptr) { | |
Jim Stichnoth
2016/02/10 17:00:21
This doesn't look right. If F64!=nullptr, it shou
John
2016/02/10 23:38:21
Done.
| |
5697 return !isFloatingPointZero(F64); | |
5698 } | |
5699 | |
5700 return false; | |
5701 } | |
5702 | |
5703 // Returns whether Phi needs the GOT address (which it does if any of its | |
5704 // operands needs the GOT address.) | |
5705 bool phiNeedsGot(const InstPhi *Phi) { | |
5706 if (Phi->isDeleted()) { | |
5707 return false; | |
5708 } | |
5709 | |
5710 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) { | |
5711 if (operandNeedsGot(Phi->getSrc(I))) { | |
5712 return true; | |
5713 } | |
5714 } | |
5715 | |
5716 return false; | |
5717 } | |
5718 | |
5719 // Returns whether **any** phi in Node needs the GOT address. | |
5720 bool anyPhiInNodeNeedsGot(CfgNode *Node) { | |
5721 for (auto &Inst : Node->getPhis()) { | |
5722 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) { | |
5723 return true; | |
5724 } | |
5725 } | |
5726 return false; | |
5727 } | |
5728 | |
5729 } // end of anonymous namespace | |
5730 | |
5517 void TargetARM32::prelowerPhis() { | 5731 void TargetARM32::prelowerPhis() { |
5518 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); | 5732 CfgNode *Node = Context.getNode(); |
5733 | |
5734 if (SandboxingType == ST_Nonsfi) { | |
5735 assert(GotPtr != nullptr); | |
5736 if (anyPhiInNodeNeedsGot(Node)) { | |
5737 // If any phi instruction needs the GOT address, we place a | |
5738 // fake-use GotPtr | |
5739 // in Node to prevent the GotPtr's initialization from being dead code | |
5740 // eliminated. | |
5741 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr)); | |
5742 } | |
5743 } | |
5744 | |
5745 PhiLowering::prelowerPhis32Bit(this, Node, Func); | |
5519 } | 5746 } |
5520 | 5747 |
5521 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5748 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
5522 Variable *Reg = makeReg(Ty, RegNum); | 5749 Variable *Reg = makeReg(Ty, RegNum); |
5523 Context.insert<InstFakeDef>(Reg); | 5750 Context.insert<InstFakeDef>(Reg); |
5524 assert(isVectorType(Ty)); | 5751 assert(isVectorType(Ty)); |
5525 _veor(Reg, Reg, Reg); | 5752 _veor(Reg, Reg, Reg); |
5526 return Reg; | 5753 return Reg; |
5527 } | 5754 } |
5528 | 5755 |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5671 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 5898 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
5672 _movw(Reg, | 5899 _movw(Reg, |
5673 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 5900 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
5674 if (UpperBits != 0) { | 5901 if (UpperBits != 0) { |
5675 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 5902 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
5676 } | 5903 } |
5677 return Reg; | 5904 return Reg; |
5678 } | 5905 } |
5679 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 5906 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
5680 Variable *Reg = makeReg(Ty, RegNum); | 5907 Variable *Reg = makeReg(Ty, RegNum); |
5681 _movw(Reg, C); | 5908 if (SandboxingType != ST_Nonsfi) { |
5682 _movt(Reg, C); | 5909 _movw(Reg, C); |
5910 _movt(Reg, C); | |
5911 } else { | |
5912 auto *GotAddr = legalizeToReg(GotPtr); | |
5913 const IceString CGotoffName = createGotoffRelocation(C); | |
5914 loadNamedConstantRelocatablePIC( | |
5915 CGotoffName, Reg, [this, Reg](Variable *PC) { | |
5916 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg)); | |
5917 }); | |
5918 _add(Reg, GotAddr, Reg); | |
5919 } | |
5683 return Reg; | 5920 return Reg; |
5684 } else { | 5921 } else { |
5685 assert(isScalarFloatingType(Ty)); | 5922 assert(isScalarFloatingType(Ty)); |
5686 uint32_t ModifiedImm; | 5923 uint32_t ModifiedImm; |
5687 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | 5924 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
5688 Variable *T = makeReg(Ty, RegNum); | 5925 Variable *T = makeReg(Ty, RegNum); |
5689 _mov(T, | 5926 _mov(T, |
5690 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | 5927 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
5691 return T; | 5928 return T; |
5692 } | 5929 } |
5693 | 5930 |
5694 if (Ty == IceType_f64 && isFloatingPointZero(From)) { | 5931 if (Ty == IceType_f64 && isFloatingPointZero(From)) { |
5695 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 | 5932 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 |
5696 // because ARM does not have a veor instruction with S registers. | 5933 // because ARM does not have a veor instruction with S registers. |
5697 Variable *T = makeReg(IceType_f64, RegNum); | 5934 Variable *T = makeReg(IceType_f64, RegNum); |
5698 Context.insert<InstFakeDef>(T); | 5935 Context.insert<InstFakeDef>(T); |
5699 _veor(T, T, T); | 5936 _veor(T, T, T); |
5700 return T; | 5937 return T; |
5701 } | 5938 } |
5702 | 5939 |
5703 // Load floats/doubles from literal pool. | 5940 // Load floats/doubles from literal pool. |
5704 std::string Buffer; | 5941 std::string Buffer; |
5705 llvm::raw_string_ostream StrBuf(Buffer); | 5942 llvm::raw_string_ostream StrBuf(Buffer); |
5706 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 5943 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
5707 llvm::cast<Constant>(From)->setShouldBePooled(true); | 5944 llvm::cast<Constant>(From)->setShouldBePooled(true); |
5708 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 5945 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
5709 Variable *BaseReg = makeReg(getPointerType()); | 5946 Variable *BaseReg = nullptr; |
5710 _movw(BaseReg, Offset); | 5947 if (SandboxingType == ST_Nonsfi) { |
5711 _movt(BaseReg, Offset); | 5948 // vldr does not support the [base, index] addressing mode, so we need |
5949 // to legalize Offset to a register. Otherwise, we could simply | |
5950 // vldr dest, [got, reg(Offset)] | |
5951 BaseReg = legalizeToReg(Offset); | |
5952 } else { | |
5953 BaseReg = makeReg(getPointerType()); | |
5954 _movw(BaseReg, Offset); | |
5955 _movt(BaseReg, Offset); | |
5956 } | |
5712 From = formMemoryOperand(BaseReg, Ty); | 5957 From = formMemoryOperand(BaseReg, Ty); |
5713 return copyToReg(From, RegNum); | 5958 return copyToReg(From, RegNum); |
5714 } | 5959 } |
5715 } | 5960 } |
5716 | 5961 |
5717 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 5962 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
5718 if (Var->isRematerializable()) { | 5963 if (Var->isRematerializable()) { |
5719 if (Allowed & Legal_Rematerializable) { | 5964 if (Allowed & Legal_Rematerializable) { |
5720 return From; | 5965 return From; |
5721 } | 5966 } |
(...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
6585 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6830 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
6586 } | 6831 } |
6587 | 6832 |
6588 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 6833 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
6589 llvm::SmallBitVector | 6834 llvm::SmallBitVector |
6590 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 6835 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
6591 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6836 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
6592 | 6837 |
6593 } // end of namespace ARM32 | 6838 } // end of namespace ARM32 |
6594 } // end of namespace Ice | 6839 } // end of namespace Ice |
OLD | NEW |