Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); | 45 return ::Ice::ARM32::TargetDataARM32::create(Ctx); |
| 46 } | 46 } |
| 47 | 47 |
| 48 std::unique_ptr<::Ice::TargetHeaderLowering> | 48 std::unique_ptr<::Ice::TargetHeaderLowering> |
| 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { | 49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) { |
| 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); | 50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx); |
| 51 } | 51 } |
| 52 | 52 |
| 53 void staticInit(::Ice::GlobalContext *Ctx) { | 53 void staticInit(::Ice::GlobalContext *Ctx) { |
| 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); | 54 ::Ice::ARM32::TargetARM32::staticInit(Ctx); |
| 55 if (Ctx->getFlags().getUseNonsfi()) { | |
| 56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing | |
| 57 // globals. The GOT is an external symbol (i.e., it is not defined in the | |
| 58 // pexe) so we need to register it as such so that ELF emission won't barf | |
| 59 // on an "unknown" symbol. The GOT is added to the External symbols list | |
| 60 // here because staticInit() is invoked in a single-thread context. | |
| 61 Ctx->getConstantExternSym(::Ice::GlobalOffsetTable); | |
| 62 } | |
| 55 } | 63 } |
| 56 | 64 |
| 57 } // end of namespace ARM32 | 65 } // end of namespace ARM32 |
| 58 | 66 |
| 59 namespace Ice { | 67 namespace Ice { |
| 60 namespace ARM32 { | 68 namespace ARM32 { |
| 61 | 69 |
| 62 namespace { | 70 namespace { |
| 63 | 71 |
| 64 /// SizeOf is used to obtain the size of an initializer list as a constexpr | 72 /// SizeOf is used to obtain the size of an initializer list as a constexpr |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 271 ; | 279 ; |
| 272 std::array<uint32_t, NumVec128Args> Vec128ArgInitializer; | 280 std::array<uint32_t, NumVec128Args> Vec128ArgInitializer; |
| 273 | 281 |
| 274 IceString getRegClassName(RegClass C) { | 282 IceString getRegClassName(RegClass C) { |
| 275 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C); | 283 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C); |
| 276 assert(ClassNum < RegARM32::RCARM32_NUM); | 284 assert(ClassNum < RegARM32::RCARM32_NUM); |
| 277 switch (ClassNum) { | 285 switch (ClassNum) { |
| 278 default: | 286 default: |
| 279 assert(C < RC_Target); | 287 assert(C < RC_Target); |
| 280 return regClassString(C); | 288 return regClassString(C); |
| 281 // Add handling of new register classes below. | 289 // Add handling of new register classes below. |
| 290 case RegARM32::RCARM32_QtoS: | |
| 291 return "QtoS"; | |
| 282 } | 292 } |
| 283 } | 293 } |
| 284 | 294 |
| 285 } // end of anonymous namespace | 295 } // end of anonymous namespace |
| 286 | 296 |
| 287 TargetARM32::TargetARM32(Cfg *Func) | 297 TargetARM32::TargetARM32(Cfg *Func) |
| 288 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), | 298 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl), |
| 289 CPUFeatures(Func->getContext()->getFlags()) {} | 299 CPUFeatures(Func->getContext()->getFlags()) {} |
| 290 | 300 |
| 291 void TargetARM32::staticInit(GlobalContext *Ctx) { | 301 void TargetARM32::staticInit(GlobalContext *Ctx) { |
| (...skipping 400 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 692 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); | 702 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_memset); |
| 693 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, | 703 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper, |
| 694 NoTailCall, IsTargetHelperCall); | 704 NoTailCall, IsTargetHelperCall); |
| 695 Call->addArg(IntrinsicCall->getArg(0)); | 705 Call->addArg(IntrinsicCall->getArg(0)); |
| 696 Call->addArg(ValExt); | 706 Call->addArg(ValExt); |
| 697 Call->addArg(IntrinsicCall->getArg(2)); | 707 Call->addArg(IntrinsicCall->getArg(2)); |
| 698 Instr->setDeleted(); | 708 Instr->setDeleted(); |
| 699 return; | 709 return; |
| 700 } | 710 } |
| 701 case Intrinsics::NaClReadTP: { | 711 case Intrinsics::NaClReadTP: { |
| 702 if (NeedSandboxing) { | 712 if (SandboxingType == ST_NaCl) { |
| 703 return; | 713 return; |
| 704 } | 714 } |
| 705 static constexpr SizeT MaxArgs = 0; | 715 static constexpr SizeT MaxArgs = 0; |
| 706 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); | 716 Operand *TargetHelper = Ctx->getConstantExternSym(H_call_read_tp); |
| 707 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, | 717 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall, |
| 708 IsTargetHelperCall); | 718 IsTargetHelperCall); |
| 709 Instr->setDeleted(); | 719 Instr->setDeleted(); |
| 710 return; | 720 return; |
| 711 } | 721 } |
| 712 case Intrinsics::Setjmp: { | 722 case Intrinsics::Setjmp: { |
| (...skipping 22 matching lines...) Expand all Loading... | |
| 735 PostIncrLoweringContext PostIncrement(Context); | 745 PostIncrLoweringContext PostIncrement(Context); |
| 736 Inst *CurInstr = Context.getCur(); | 746 Inst *CurInstr = Context.getCur(); |
| 737 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { | 747 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { |
| 738 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); | 748 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); |
| 739 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); | 749 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); |
| 740 } | 750 } |
| 741 } | 751 } |
| 742 } | 752 } |
| 743 } | 753 } |
| 744 | 754 |
| 755 void TargetARM32::createGotPtr() { | |
| 756 if (SandboxingType != ST_Nonsfi) { | |
| 757 return; | |
| 758 } | |
| 759 GotPtr = Func->makeVariable(IceType_i32); | |
| 760 } | |
| 761 | |
| 762 void TargetARM32::insertGotPtrInitPlaceholder() { | |
| 763 if (SandboxingType != ST_Nonsfi) { | |
| 764 return; | |
| 765 } | |
| 766 assert(GotPtr != nullptr); | |
| 767 // We add the two placeholder instructions here. The first fakedefs T, an | |
| 768 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T. | |
| 769 // This is needed because the GotPtr initialization, if needed, will require | |
| 770 // a register: | |
| 771 // | |
| 772 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - . | |
| 773 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - . | |
| 774 // add reg, pc, reg | |
| 775 // mov GotPtr, reg | |
| 776 // | |
| 777 // If GotPtr is not used, then both these pseudo-instructions are dce'd. | |
| 778 Variable *T = makeReg(IceType_i32); | |
| 779 Context.insert<InstFakeDef>(T); | |
| 780 Context.insert<InstFakeDef>(GotPtr, T); | |
| 781 } | |
| 782 | |
| 783 IceString TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) { | |
| 784 const IceString &CRName = CR->getName(); | |
| 785 const IceString CRGotoffName = | |
| 786 "GOTOFF$" + Func->getFunctionName() + "$" + CRName; | |
| 787 if (KnownGotoffs.count(CRGotoffName) == 0) { | |
| 788 auto *Global = VariableDeclaration::create(Ctx); | |
| 789 Global->setIsConstant(true); | |
| 790 Global->setName(CRName); | |
| 791 Global->setSuppressMangling(); | |
| 792 | |
| 793 auto *Gotoff = VariableDeclaration::create(Ctx); | |
| 794 constexpr auto GotFixup = R_ARM_GOTOFF32; | |
| 795 Gotoff->setIsConstant(true); | |
| 796 Gotoff->setName(CRGotoffName); | |
| 797 Gotoff->setSuppressMangling(); | |
| 798 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create( | |
| 799 Global, {RelocOffset::create(Ctx, 0)}, GotFixup)); | |
| 800 Func->addGlobal(Gotoff); | |
| 801 KnownGotoffs.emplace(CRGotoffName); | |
| 802 } | |
| 803 return CRGotoffName; | |
| 804 } | |
| 805 | |
| 806 void TargetARM32::materializeGotAddr(CfgNode *Node) { | |
| 807 if (SandboxingType != ST_Nonsfi) { | |
| 808 return; | |
| 809 } | |
| 810 | |
| 811 // At first, we try to find the | |
| 812 // GotPtr = def T | |
| 813 // pseudo-instruction that we placed for defining the got ptr. That | |
| 814 // instruction is not just a place-holder for defining the GotPtr (thus | |
| 815 // keeping liveness consistent), but it is also located at a point where it is | |
| 816 // safe to materialize the got addr -- i.e., before loading parameters to | |
| 817 // registers, but after moving register parameters from their home location. | |
| 818 InstFakeDef *DefGotPtr = nullptr; | |
| 819 for (auto &Inst : Node->getInsts()) { | |
| 820 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst); | |
| 821 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) { | |
| 822 DefGotPtr = FakeDef; | |
| 823 break; | |
| 824 } | |
| 825 } | |
| 826 | |
| 827 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) { | |
| 828 return; | |
| 829 } | |
| 830 | |
| 831 // The got addr needs to be materialized at the same point where DefGotPtr | |
| 832 // lives. | |
| 833 Context.setInsertPoint(DefGotPtr); | |
| 834 assert(DefGotPtr->getSrcSize() == 1); | |
| 835 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0)); | |
| 836 loadNamedConstantRelocatablePIC(GlobalOffsetTable, T, | |
| 837 [this, T](Variable *PC) { _add(T, PC, T); }); | |
| 838 _mov(GotPtr, T); | |
| 839 DefGotPtr->setDeleted(); | |
| 840 } | |
| 841 | |
| 842 void TargetARM32::loadNamedConstantRelocatablePIC( | |
| 843 const IceString &Name, Variable *Register, | |
| 844 std::function<void(Variable *PC)> Finish, bool SuppressMangling) { | |
| 845 assert(SandboxingType == ST_Nonsfi); | |
| 846 // We makeReg() here instead of getPhysicalRegister() because the latter ends | |
| 847 // up creating multi-blocks temporaries that liveness fails to validate. | |
| 848 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc); | |
| 849 | |
| 850 auto *AddPcReloc = RelocOffset::create(Ctx); | |
| 851 AddPcReloc->setSubtract(true); | |
| 852 auto *AddPcLabel = InstARM32Label::create(Func, this); | |
| 853 AddPcLabel->setRelocOffset(AddPcReloc); | |
| 854 | |
| 855 const IceString EmitText = Name; | |
| 856 // We need a -8 in the relocation expression to account for the pc's value | |
| 857 // read by the first instruction emitted in Finish(PC). | |
| 858 auto *Imm8 = RelocOffset::create(Ctx, -8); | |
| 859 | |
| 860 auto *MovwReloc = RelocOffset::create(Ctx); | |
| 861 auto *MovwLabel = InstARM32Label::create(Func, this); | |
| 862 MovwLabel->setRelocOffset(MovwReloc); | |
| 863 | |
| 864 auto *MovtReloc = RelocOffset::create(Ctx); | |
| 865 auto *MovtLabel = InstARM32Label::create(Func, this); | |
| 866 MovtLabel->setRelocOffset(MovtReloc); | |
| 867 | |
| 868 // The EmitString for these constant relocatables have hardcoded offsets | |
| 869 // attached to them. This could be dangerous if, e.g., we ever implemented | |
| 870 // instruction scheduling but llvm-mc currently does not support | |
| 871 // | |
| 872 // movw reg, #:lower16:(Symbol - Label - Number) | |
| 873 // movt reg, #:upper16:(Symbol - Label - Number) | |
| 874 // | |
| 875 // relocations. | |
| 876 auto *CRLower = Ctx->getConstantSym({MovwReloc, AddPcReloc, Imm8}, Name, | |
| 877 EmitText + " -16", SuppressMangling); | |
| 878 auto *CRUpper = Ctx->getConstantSym({MovtReloc, AddPcReloc, Imm8}, Name, | |
| 879 EmitText + " -12", SuppressMangling); | |
| 880 | |
| 881 Context.insert(MovwLabel); | |
| 882 _movw(Register, CRLower); | |
| 883 Context.insert(MovtLabel); | |
| 884 _movt(Register, CRUpper); | |
| 885 // PC = fake-def to keep liveness consistent. | |
| 886 Context.insert<InstFakeDef>(PC); | |
| 887 Context.insert(AddPcLabel); | |
| 888 Finish(PC); | |
| 889 } | |
| 890 | |
| 745 void TargetARM32::translateO2() { | 891 void TargetARM32::translateO2() { |
| 746 TimerMarker T(TimerStack::TT_O2, Func); | 892 TimerMarker T(TimerStack::TT_O2, Func); |
| 747 | 893 |
| 748 // TODO(stichnot): share passes with X86? | 894 // TODO(stichnot): share passes with other targets? |
| 749 // https://code.google.com/p/nativeclient/issues/detail?id=4094 | 895 // https://code.google.com/p/nativeclient/issues/detail?id=4094 |
| 896 if (SandboxingType == ST_Nonsfi) { | |
| 897 createGotPtr(); | |
| 898 } | |
| 750 genTargetHelperCalls(); | 899 genTargetHelperCalls(); |
| 751 findMaxStackOutArgsSize(); | 900 findMaxStackOutArgsSize(); |
| 752 | 901 |
| 753 // Do not merge Alloca instructions, and lay out the stack. | 902 // Do not merge Alloca instructions, and lay out the stack. |
| 754 static constexpr bool SortAndCombineAllocas = true; | 903 static constexpr bool SortAndCombineAllocas = true; |
| 755 Func->processAllocas(SortAndCombineAllocas); | 904 Func->processAllocas(SortAndCombineAllocas); |
| 756 Func->dump("After Alloca processing"); | 905 Func->dump("After Alloca processing"); |
| 757 | 906 |
| 758 if (!Ctx->getFlags().getPhiEdgeSplit()) { | 907 if (!Ctx->getFlags().getPhiEdgeSplit()) { |
| 759 // Lower Phi instructions. | 908 // Lower Phi instructions. |
| (...skipping 26 matching lines...) Expand all Loading... | |
| 786 return; | 935 return; |
| 787 | 936 |
| 788 // TODO: It should be sufficient to use the fastest liveness calculation, | 937 // TODO: It should be sufficient to use the fastest liveness calculation, |
| 789 // i.e. livenessLightweight(). However, for some reason that slows down the | 938 // i.e. livenessLightweight(). However, for some reason that slows down the |
| 790 // rest of the translation. Investigate. | 939 // rest of the translation. Investigate. |
| 791 Func->liveness(Liveness_Basic); | 940 Func->liveness(Liveness_Basic); |
| 792 if (Func->hasError()) | 941 if (Func->hasError()) |
| 793 return; | 942 return; |
| 794 Func->dump("After ARM32 address mode opt"); | 943 Func->dump("After ARM32 address mode opt"); |
| 795 | 944 |
| 945 if (SandboxingType == ST_Nonsfi) { | |
| 946 insertGotPtrInitPlaceholder(); | |
| 947 } | |
| 796 Func->genCode(); | 948 Func->genCode(); |
| 797 if (Func->hasError()) | 949 if (Func->hasError()) |
| 798 return; | 950 return; |
| 799 Func->dump("After ARM32 codegen"); | 951 Func->dump("After ARM32 codegen"); |
| 800 | 952 |
| 801 // Register allocation. This requires instruction renumbering and full | 953 // Register allocation. This requires instruction renumbering and full |
| 802 // liveness analysis. | 954 // liveness analysis. |
| 803 Func->renumberInstructions(); | 955 Func->renumberInstructions(); |
| 804 if (Func->hasError()) | 956 if (Func->hasError()) |
| 805 return; | 957 return; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 850 | 1002 |
| 851 // Nop insertion | 1003 // Nop insertion |
| 852 if (Ctx->getFlags().shouldDoNopInsertion()) { | 1004 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 853 Func->doNopInsertion(); | 1005 Func->doNopInsertion(); |
| 854 } | 1006 } |
| 855 } | 1007 } |
| 856 | 1008 |
| 857 void TargetARM32::translateOm1() { | 1009 void TargetARM32::translateOm1() { |
| 858 TimerMarker T(TimerStack::TT_Om1, Func); | 1010 TimerMarker T(TimerStack::TT_Om1, Func); |
| 859 | 1011 |
| 860 // TODO: share passes with X86? | 1012 // TODO(stichnot): share passes with other targets? |
| 1013 if (SandboxingType == ST_Nonsfi) { | |
| 1014 createGotPtr(); | |
| 1015 } | |
| 1016 | |
| 861 genTargetHelperCalls(); | 1017 genTargetHelperCalls(); |
| 862 findMaxStackOutArgsSize(); | 1018 findMaxStackOutArgsSize(); |
| 863 | 1019 |
| 864 // Do not merge Alloca instructions, and lay out the stack. | 1020 // Do not merge Alloca instructions, and lay out the stack. |
| 865 static constexpr bool DontSortAndCombineAllocas = false; | 1021 static constexpr bool DontSortAndCombineAllocas = false; |
| 866 Func->processAllocas(DontSortAndCombineAllocas); | 1022 Func->processAllocas(DontSortAndCombineAllocas); |
| 867 Func->dump("After Alloca processing"); | 1023 Func->dump("After Alloca processing"); |
| 868 | 1024 |
| 869 Func->placePhiLoads(); | 1025 Func->placePhiLoads(); |
| 870 if (Func->hasError()) | 1026 if (Func->hasError()) |
| 871 return; | 1027 return; |
| 872 Func->placePhiStores(); | 1028 Func->placePhiStores(); |
| 873 if (Func->hasError()) | 1029 if (Func->hasError()) |
| 874 return; | 1030 return; |
| 875 Func->deletePhis(); | 1031 Func->deletePhis(); |
| 876 if (Func->hasError()) | 1032 if (Func->hasError()) |
| 877 return; | 1033 return; |
| 878 Func->dump("After Phi lowering"); | 1034 Func->dump("After Phi lowering"); |
| 879 | 1035 |
| 880 Func->doArgLowering(); | 1036 Func->doArgLowering(); |
| 881 | 1037 |
| 1038 if (SandboxingType == ST_Nonsfi) { | |
| 1039 insertGotPtrInitPlaceholder(); | |
| 1040 } | |
| 882 Func->genCode(); | 1041 Func->genCode(); |
| 883 if (Func->hasError()) | 1042 if (Func->hasError()) |
| 884 return; | 1043 return; |
| 885 Func->dump("After initial ARM32 codegen"); | 1044 Func->dump("After initial ARM32 codegen"); |
| 886 | 1045 |
| 887 regAlloc(RAK_InfOnly); | 1046 regAlloc(RAK_InfOnly); |
| 888 if (Func->hasError()) | 1047 if (Func->hasError()) |
| 889 return; | 1048 return; |
| 890 | 1049 |
| 891 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); | 1050 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); |
| (...skipping 477 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1369 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 1528 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 1370 | 1529 |
| 1371 // Fill in stack offsets for stack args, and copy args into registers for | 1530 // Fill in stack offsets for stack args, and copy args into registers for |
| 1372 // those that were register-allocated. Args are pushed right to left, so | 1531 // those that were register-allocated. Args are pushed right to left, so |
| 1373 // Arg[0] is closest to the stack/frame pointer. | 1532 // Arg[0] is closest to the stack/frame pointer. |
| 1374 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 1533 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 1375 size_t BasicFrameOffset = PreservedRegsSizeBytes; | 1534 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
| 1376 if (!UsesFramePointer) | 1535 if (!UsesFramePointer) |
| 1377 BasicFrameOffset += SpillAreaSizeBytes; | 1536 BasicFrameOffset += SpillAreaSizeBytes; |
| 1378 | 1537 |
| 1538 materializeGotAddr(Node); | |
| 1539 | |
| 1379 const VarList &Args = Func->getArgs(); | 1540 const VarList &Args = Func->getArgs(); |
| 1380 size_t InArgsSizeBytes = 0; | 1541 size_t InArgsSizeBytes = 0; |
| 1381 TargetARM32::CallingConv CC; | 1542 TargetARM32::CallingConv CC; |
| 1382 for (Variable *Arg : Args) { | 1543 for (Variable *Arg : Args) { |
| 1383 int32_t DummyReg; | 1544 int32_t DummyReg; |
| 1384 const Type Ty = Arg->getType(); | 1545 const Type Ty = Arg->getType(); |
| 1385 | 1546 |
| 1386 // Skip arguments passed in registers. | 1547 // Skip arguments passed in registers. |
| 1387 if (isScalarIntegerType(Ty)) { | 1548 if (isScalarIntegerType(Ty)) { |
| 1388 if (CC.argInGPR(Ty, &DummyReg)) { | 1549 if (CC.argInGPR(Ty, &DummyReg)) { |
| (...skipping 2104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3493 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); | 3654 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); |
| 3494 break; | 3655 break; |
| 3495 } | 3656 } |
| 3496 } | 3657 } |
| 3497 | 3658 |
| 3498 // Note: To allow far calls, even for constant relocatables, we force | 3659 // Note: To allow far calls, even for constant relocatables, we force |
| 3499 // the call target into a register, and make an indirect call. | 3660 // the call target into a register, and make an indirect call. |
| 3500 CallTarget = legalizeToReg(CallTarget); | 3661 CallTarget = legalizeToReg(CallTarget); |
| 3501 | 3662 |
| 3502 // Copy arguments to be passed in registers to the appropriate registers. | 3663 // Copy arguments to be passed in registers to the appropriate registers. |
| 3664 CfgVector<Variable *> RegArgs; | |
| 3503 for (auto &FPArg : FPArgs) { | 3665 for (auto &FPArg : FPArgs) { |
| 3504 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | 3666 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second)); |
| 3505 Context.insert<InstFakeUse>(Reg); | |
| 3506 } | 3667 } |
| 3507 for (auto &GPRArg : GPRArgs) { | 3668 for (auto &GPRArg : GPRArgs) { |
| 3508 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | 3669 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second)); |
| 3509 // Generate a FakeUse of register arguments so that they do not get dead | 3670 } |
| 3510 // code eliminated as a result of the FakeKill of scratch registers after | 3671 |
| 3511 // the call. | 3672 // Generate a FakeUse of register arguments so that they do not get dead code |
| 3512 Context.insert<InstFakeUse>(Reg); | 3673 // eliminated as a result of the FakeKill of scratch registers after the call. |
| 3674 // These fake-uses need to be placed here to avoid argument registers from | |
| 3675 // being used during the legalizeToReg() calls above. | |
| 3676 for (auto *RegArg : RegArgs) { | |
| 3677 Context.insert<InstFakeUse>(RegArg); | |
| 3513 } | 3678 } |
| 3514 | 3679 |
| 3515 InstARM32Call *NewCall = | 3680 InstARM32Call *NewCall = |
| 3516 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); | 3681 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget); |
| 3517 | 3682 |
| 3518 if (ReturnRegHi) | 3683 if (ReturnRegHi) |
| 3519 Context.insert<InstFakeDef>(ReturnRegHi); | 3684 Context.insert<InstFakeDef>(ReturnRegHi); |
| 3520 | 3685 |
| 3521 // Insert a register-kill pseudo instruction. | 3686 // Insert a register-kill pseudo instruction. |
| 3522 Context.insert<InstFakeKill>(NewCall); | 3687 Context.insert<InstFakeKill>(NewCall); |
| (...skipping 384 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3907 CondARM32::Cond CC0; | 4072 CondARM32::Cond CC0; |
| 3908 CondARM32::Cond CC1; | 4073 CondARM32::Cond CC1; |
| 3909 } TableFcmp[] = { | 4074 } TableFcmp[] = { |
| 3910 #define X(val, CC0, CC1) \ | 4075 #define X(val, CC0, CC1) \ |
| 3911 { CondARM32::CC0, CondARM32::CC1 } \ | 4076 { CondARM32::CC0, CondARM32::CC1 } \ |
| 3912 , | 4077 , |
| 3913 FCMPARM32_TABLE | 4078 FCMPARM32_TABLE |
| 3914 #undef X | 4079 #undef X |
| 3915 }; | 4080 }; |
| 3916 | 4081 |
| 3917 bool isFloatingPointZero(Operand *Src) { | 4082 bool isFloatingPointZero(const Operand *Src) { |
| 3918 if (const auto *F32 = llvm::dyn_cast<ConstantFloat>(Src)) { | 4083 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) { |
| 3919 return Utils::isPositiveZero(F32->getValue()); | 4084 return Utils::isPositiveZero(F32->getValue()); |
| 3920 } | 4085 } |
| 3921 | 4086 |
| 3922 if (const auto *F64 = llvm::dyn_cast<ConstantDouble>(Src)) { | 4087 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) { |
| 3923 return Utils::isPositiveZero(F64->getValue()); | 4088 return Utils::isPositiveZero(F64->getValue()); |
| 3924 } | 4089 } |
| 3925 | 4090 |
| 3926 return false; | 4091 return false; |
| 3927 } | 4092 } |
| 3928 } // end of anonymous namespace | 4093 } // end of anonymous namespace |
| 3929 | 4094 |
| 3930 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { | 4095 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) { |
| 3931 InstFcmp::FCond Condition = Instr->getCondition(); | 4096 InstFcmp::FCond Condition = Instr->getCondition(); |
| 3932 switch (Condition) { | 4097 switch (Condition) { |
| (...skipping 914 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4847 case Intrinsics::Memcpy: { | 5012 case Intrinsics::Memcpy: { |
| 4848 llvm::report_fatal_error("memcpy should have been prelowered."); | 5013 llvm::report_fatal_error("memcpy should have been prelowered."); |
| 4849 } | 5014 } |
| 4850 case Intrinsics::Memmove: { | 5015 case Intrinsics::Memmove: { |
| 4851 llvm::report_fatal_error("memmove should have been prelowered."); | 5016 llvm::report_fatal_error("memmove should have been prelowered."); |
| 4852 } | 5017 } |
| 4853 case Intrinsics::Memset: { | 5018 case Intrinsics::Memset: { |
| 4854 llvm::report_fatal_error("memmove should have been prelowered."); | 5019 llvm::report_fatal_error("memmove should have been prelowered."); |
| 4855 } | 5020 } |
| 4856 case Intrinsics::NaClReadTP: { | 5021 case Intrinsics::NaClReadTP: { |
| 4857 if (!NeedSandboxing) { | 5022 if (SandboxingType != ST_NaCl) { |
| 4858 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); | 5023 llvm::report_fatal_error("nacl-read-tp should have been prelowered."); |
| 4859 } | 5024 } |
| 4860 Variable *TP = legalizeToReg(OperandARM32Mem::create( | 5025 Variable *TP = legalizeToReg(OperandARM32Mem::create( |
| 4861 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), | 5026 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), |
| 4862 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); | 5027 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)))); |
| 4863 _mov(Dest, TP); | 5028 _mov(Dest, TP); |
| 4864 return; | 5029 return; |
| 4865 } | 5030 } |
| 4866 case Intrinsics::Setjmp: { | 5031 case Intrinsics::Setjmp: { |
| 4867 llvm::report_fatal_error("setjmp should have been prelowered."); | 5032 llvm::report_fatal_error("setjmp should have been prelowered."); |
| (...skipping 639 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5507 _cmp(Src0Var, Value); | 5672 _cmp(Src0Var, Value); |
| 5508 _br(Instr->getLabel(I), CondARM32::EQ); | 5673 _br(Instr->getLabel(I), CondARM32::EQ); |
| 5509 } | 5674 } |
| 5510 _br(Instr->getLabelDefault()); | 5675 _br(Instr->getLabelDefault()); |
| 5511 } | 5676 } |
| 5512 | 5677 |
| 5513 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { | 5678 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) { |
| 5514 _trap(); | 5679 _trap(); |
| 5515 } | 5680 } |
| 5516 | 5681 |
| 5682 namespace { | |
| 5683 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables, | |
| 5684 // and fp constants will need access to the GOT address. | |
| 5685 bool operandNeedsGot(const Operand *Opnd) { | |
| 5686 if (llvm::isa<ConstantRelocatable>(Opnd)) { | |
| 5687 return true; | |
| 5688 } | |
| 5689 | |
| 5690 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd); | |
| 5691 if (F64 != nullptr || llvm::isa<ConstantFloat>(Opnd)) { | |
| 5692 uint32_t _; | |
| 5693 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_); | |
| 5694 } | |
| 5695 | |
| 5696 if (F64 != nullptr) { | |
|
Jim Stichnoth
2016/02/10 17:00:21
This doesn't look right. If F64!=nullptr, it shou
John
2016/02/10 23:38:21
Done.
| |
| 5697 return !isFloatingPointZero(F64); | |
| 5698 } | |
| 5699 | |
| 5700 return false; | |
| 5701 } | |
| 5702 | |
| 5703 // Returns whether Phi needs the GOT address (which it does if any of its | |
| 5704 // operands needs the GOT address.) | |
| 5705 bool phiNeedsGot(const InstPhi *Phi) { | |
| 5706 if (Phi->isDeleted()) { | |
| 5707 return false; | |
| 5708 } | |
| 5709 | |
| 5710 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) { | |
| 5711 if (operandNeedsGot(Phi->getSrc(I))) { | |
| 5712 return true; | |
| 5713 } | |
| 5714 } | |
| 5715 | |
| 5716 return false; | |
| 5717 } | |
| 5718 | |
| 5719 // Returns whether **any** phi in Node needs the GOT address. | |
| 5720 bool anyPhiInNodeNeedsGot(CfgNode *Node) { | |
| 5721 for (auto &Inst : Node->getPhis()) { | |
| 5722 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) { | |
| 5723 return true; | |
| 5724 } | |
| 5725 } | |
| 5726 return false; | |
| 5727 } | |
| 5728 | |
| 5729 } // end of anonymous namespace | |
| 5730 | |
| 5517 void TargetARM32::prelowerPhis() { | 5731 void TargetARM32::prelowerPhis() { |
| 5518 PhiLowering::prelowerPhis32Bit<TargetARM32>(this, Context.getNode(), Func); | 5732 CfgNode *Node = Context.getNode(); |
| 5733 | |
| 5734 if (SandboxingType == ST_Nonsfi) { | |
| 5735 assert(GotPtr != nullptr); | |
| 5736 if (anyPhiInNodeNeedsGot(Node)) { | |
| 5737 // If any phi instruction needs the GOT address, we place a | |
| 5738 // fake-use GotPtr | |
| 5739 // in Node to prevent the GotPtr's initialization from being dead code | |
| 5740 // eliminated. | |
| 5741 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr)); | |
| 5742 } | |
| 5743 } | |
| 5744 | |
| 5745 PhiLowering::prelowerPhis32Bit(this, Node, Func); | |
| 5519 } | 5746 } |
| 5520 | 5747 |
| 5521 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { | 5748 Variable *TargetARM32::makeVectorOfZeros(Type Ty, int32_t RegNum) { |
| 5522 Variable *Reg = makeReg(Ty, RegNum); | 5749 Variable *Reg = makeReg(Ty, RegNum); |
| 5523 Context.insert<InstFakeDef>(Reg); | 5750 Context.insert<InstFakeDef>(Reg); |
| 5524 assert(isVectorType(Ty)); | 5751 assert(isVectorType(Ty)); |
| 5525 _veor(Reg, Reg, Reg); | 5752 _veor(Reg, Reg, Reg); |
| 5526 return Reg; | 5753 return Reg; |
| 5527 } | 5754 } |
| 5528 | 5755 |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 5671 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 5898 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
| 5672 _movw(Reg, | 5899 _movw(Reg, |
| 5673 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 5900 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
| 5674 if (UpperBits != 0) { | 5901 if (UpperBits != 0) { |
| 5675 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 5902 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
| 5676 } | 5903 } |
| 5677 return Reg; | 5904 return Reg; |
| 5678 } | 5905 } |
| 5679 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 5906 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
| 5680 Variable *Reg = makeReg(Ty, RegNum); | 5907 Variable *Reg = makeReg(Ty, RegNum); |
| 5681 _movw(Reg, C); | 5908 if (SandboxingType != ST_Nonsfi) { |
| 5682 _movt(Reg, C); | 5909 _movw(Reg, C); |
| 5910 _movt(Reg, C); | |
| 5911 } else { | |
| 5912 auto *GotAddr = legalizeToReg(GotPtr); | |
| 5913 const IceString CGotoffName = createGotoffRelocation(C); | |
| 5914 loadNamedConstantRelocatablePIC( | |
| 5915 CGotoffName, Reg, [this, Reg](Variable *PC) { | |
| 5916 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg)); | |
| 5917 }); | |
| 5918 _add(Reg, GotAddr, Reg); | |
| 5919 } | |
| 5683 return Reg; | 5920 return Reg; |
| 5684 } else { | 5921 } else { |
| 5685 assert(isScalarFloatingType(Ty)); | 5922 assert(isScalarFloatingType(Ty)); |
| 5686 uint32_t ModifiedImm; | 5923 uint32_t ModifiedImm; |
| 5687 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { | 5924 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) { |
| 5688 Variable *T = makeReg(Ty, RegNum); | 5925 Variable *T = makeReg(Ty, RegNum); |
| 5689 _mov(T, | 5926 _mov(T, |
| 5690 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); | 5927 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm)); |
| 5691 return T; | 5928 return T; |
| 5692 } | 5929 } |
| 5693 | 5930 |
| 5694 if (Ty == IceType_f64 && isFloatingPointZero(From)) { | 5931 if (Ty == IceType_f64 && isFloatingPointZero(From)) { |
| 5695 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 | 5932 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32 |
| 5696 // because ARM does not have a veor instruction with S registers. | 5933 // because ARM does not have a veor instruction with S registers. |
| 5697 Variable *T = makeReg(IceType_f64, RegNum); | 5934 Variable *T = makeReg(IceType_f64, RegNum); |
| 5698 Context.insert<InstFakeDef>(T); | 5935 Context.insert<InstFakeDef>(T); |
| 5699 _veor(T, T, T); | 5936 _veor(T, T, T); |
| 5700 return T; | 5937 return T; |
| 5701 } | 5938 } |
| 5702 | 5939 |
| 5703 // Load floats/doubles from literal pool. | 5940 // Load floats/doubles from literal pool. |
| 5704 std::string Buffer; | 5941 std::string Buffer; |
| 5705 llvm::raw_string_ostream StrBuf(Buffer); | 5942 llvm::raw_string_ostream StrBuf(Buffer); |
| 5706 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); | 5943 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf, Ctx); |
| 5707 llvm::cast<Constant>(From)->setShouldBePooled(true); | 5944 llvm::cast<Constant>(From)->setShouldBePooled(true); |
| 5708 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 5945 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
| 5709 Variable *BaseReg = makeReg(getPointerType()); | 5946 Variable *BaseReg = nullptr; |
| 5710 _movw(BaseReg, Offset); | 5947 if (SandboxingType == ST_Nonsfi) { |
| 5711 _movt(BaseReg, Offset); | 5948 // vldr does not support the [base, index] addressing mode, so we need |
| 5949 // to legalize Offset to a register. Otherwise, we could simply | |
| 5950 // vldr dest, [got, reg(Offset)] | |
| 5951 BaseReg = legalizeToReg(Offset); | |
| 5952 } else { | |
| 5953 BaseReg = makeReg(getPointerType()); | |
| 5954 _movw(BaseReg, Offset); | |
| 5955 _movt(BaseReg, Offset); | |
| 5956 } | |
| 5712 From = formMemoryOperand(BaseReg, Ty); | 5957 From = formMemoryOperand(BaseReg, Ty); |
| 5713 return copyToReg(From, RegNum); | 5958 return copyToReg(From, RegNum); |
| 5714 } | 5959 } |
| 5715 } | 5960 } |
| 5716 | 5961 |
| 5717 if (auto *Var = llvm::dyn_cast<Variable>(From)) { | 5962 if (auto *Var = llvm::dyn_cast<Variable>(From)) { |
| 5718 if (Var->isRematerializable()) { | 5963 if (Var->isRematerializable()) { |
| 5719 if (Allowed & Legal_Rematerializable) { | 5964 if (Allowed & Legal_Rematerializable) { |
| 5720 return From; | 5965 return From; |
| 5721 } | 5966 } |
| (...skipping 863 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 6585 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 6830 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 6586 } | 6831 } |
| 6587 | 6832 |
| 6588 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 6833 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
| 6589 llvm::SmallBitVector | 6834 llvm::SmallBitVector |
| 6590 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 6835 TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
| 6591 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; | 6836 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; |
| 6592 | 6837 |
| 6593 } // end of namespace ARM32 | 6838 } // end of namespace ARM32 |
| 6594 } // end of namespace Ice | 6839 } // end of namespace Ice |
| OLD | NEW |