OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// | 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
11 /// \brief Declares the TargetLoweringARM32 class, which implements the | 11 /// \brief Declares the TargetLoweringARM32 class, which implements the |
12 /// TargetLowering interface for the ARM 32-bit architecture. | 12 /// TargetLowering interface for the ARM 32-bit architecture. |
13 /// | 13 /// |
14 //===----------------------------------------------------------------------===// | 14 //===----------------------------------------------------------------------===// |
15 | 15 |
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H | 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H | 17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
18 | 18 |
19 #include "IceAssemblerARM32.h" | 19 #include "IceAssemblerARM32.h" |
20 #include "IceDefs.h" | 20 #include "IceDefs.h" |
21 #include "IceInstARM32.h" | 21 #include "IceInstARM32.h" |
22 #include "IceRegistersARM32.h" | 22 #include "IceRegistersARM32.h" |
23 #include "IceTargetLowering.h" | 23 #include "IceTargetLowering.h" |
24 | 24 |
25 #include "llvm/ADT/SmallBitVector.h" | |
26 | |
27 #include <unordered_set> | |
28 | |
29 namespace Ice { | 25 namespace Ice { |
30 namespace ARM32 { | 26 namespace ARM32 { |
31 | 27 |
32 // Class encapsulating ARM cpu features / instruction set. | 28 // Class encapsulating ARM cpu features / instruction set. |
33 class TargetARM32Features { | 29 class TargetARM32Features { |
34 TargetARM32Features() = delete; | 30 TargetARM32Features() = delete; |
35 TargetARM32Features(const TargetARM32Features &) = delete; | 31 TargetARM32Features(const TargetARM32Features &) = delete; |
36 TargetARM32Features &operator=(const TargetARM32Features &) = delete; | 32 TargetARM32Features &operator=(const TargetARM32Features &) = delete; |
37 | 33 |
38 public: | 34 public: |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 } | 73 } |
78 | 74 |
79 void translateOm1() override; | 75 void translateOm1() override; |
80 void translateO2() override; | 76 void translateO2() override; |
81 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; | 77 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; |
82 | 78 |
83 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } | 79 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } |
84 Variable *getPhysicalRegister(RegNumT RegNum, | 80 Variable *getPhysicalRegister(RegNumT RegNum, |
85 Type Ty = IceType_void) override; | 81 Type Ty = IceType_void) override; |
86 IceString getRegName(RegNumT RegNum, Type Ty) const override; | 82 IceString getRegName(RegNumT RegNum, Type Ty) const override; |
87 llvm::SmallBitVector getRegisterSet(RegSetMask Include, | 83 SmallBitVector getRegisterSet(RegSetMask Include, |
88 RegSetMask Exclude) const override; | 84 RegSetMask Exclude) const override; |
89 const llvm::SmallBitVector & | 85 const SmallBitVector & |
90 getRegistersForVariable(const Variable *Var) const override { | 86 getRegistersForVariable(const Variable *Var) const override { |
91 RegClass RC = Var->getRegClass(); | 87 RegClass RC = Var->getRegClass(); |
92 switch (RC) { | 88 switch (RC) { |
93 default: | 89 default: |
94 assert(RC < RC_Target); | 90 assert(RC < RC_Target); |
95 return TypeToRegisterSet[RC]; | 91 return TypeToRegisterSet[RC]; |
96 case RegARM32::RCARM32_QtoS: | 92 case RegARM32::RCARM32_QtoS: |
97 return TypeToRegisterSet[RC]; | 93 return TypeToRegisterSet[RC]; |
98 } | 94 } |
99 } | 95 } |
100 const llvm::SmallBitVector & | 96 const SmallBitVector & |
101 getAllRegistersForVariable(const Variable *Var) const override { | 97 getAllRegistersForVariable(const Variable *Var) const override { |
102 RegClass RC = Var->getRegClass(); | 98 RegClass RC = Var->getRegClass(); |
103 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); | 99 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); |
104 return TypeToRegisterSetUnfiltered[RC]; | 100 return TypeToRegisterSetUnfiltered[RC]; |
105 } | 101 } |
106 const llvm::SmallBitVector & | 102 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override { |
107 getAliasesForRegister(RegNumT Reg) const override { | |
108 return RegisterAliases[Reg]; | 103 return RegisterAliases[Reg]; |
109 } | 104 } |
110 bool hasFramePointer() const override { return UsesFramePointer; } | 105 bool hasFramePointer() const override { return UsesFramePointer; } |
111 void setHasFramePointer() override { UsesFramePointer = true; } | 106 void setHasFramePointer() override { UsesFramePointer = true; } |
112 RegNumT getStackReg() const override { return RegARM32::Reg_sp; } | 107 RegNumT getStackReg() const override { return RegARM32::Reg_sp; } |
113 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } | 108 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } |
114 RegNumT getFrameOrStackReg() const override { | 109 RegNumT getFrameOrStackReg() const override { |
115 return UsesFramePointer ? getFrameReg() : getStackReg(); | 110 return UsesFramePointer ? getFrameReg() : getStackReg(); |
116 } | 111 } |
117 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } | 112 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } |
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
295 static Type stackSlotType(); | 290 static Type stackSlotType(); |
296 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); | 291 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); |
297 void alignRegisterPow2(Variable *Reg, uint32_t Align, | 292 void alignRegisterPow2(Variable *Reg, uint32_t Align, |
298 RegNumT TmpRegNum = RegNumT()); | 293 RegNumT TmpRegNum = RegNumT()); |
299 | 294 |
300 /// Returns a vector in a register with the given constant entries. | 295 /// Returns a vector in a register with the given constant entries. |
301 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); | 296 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); |
302 | 297 |
303 void | 298 void |
304 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, | 299 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, |
305 const llvm::SmallBitVector &ExcludeRegisters, | 300 const SmallBitVector &ExcludeRegisters, |
306 uint64_t Salt) const override; | 301 uint64_t Salt) const override; |
307 | 302 |
308 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; | 303 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; |
309 // .LSKIP: <continuation>. If no check is needed nothing is inserted. | 304 // .LSKIP: <continuation>. If no check is needed nothing is inserted. |
310 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); | 305 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); |
311 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, | 306 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, |
312 CondARM32::Cond); | 307 CondARM32::Cond); |
313 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, | 308 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, |
314 CondARM32::Cond); | 309 CondARM32::Cond); |
315 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, | 310 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, |
(...skipping 575 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
891 // TODO(jpp): use CfgLocalAllocator. | 886 // TODO(jpp): use CfgLocalAllocator. |
892 /// @} | 887 /// @} |
893 | 888 |
894 /// Manages the Gotoff relocations created during the function lowering. A | 889 /// Manages the Gotoff relocations created during the function lowering. A |
895 /// single Gotoff relocation is created for each global variable used by the | 890 /// single Gotoff relocation is created for each global variable used by the |
896 /// function being lowered. | 891 /// function being lowered. |
897 /// @{ | 892 /// @{ |
898 // TODO(jpp): if the same global G is used in different functions, then this | 893 // TODO(jpp): if the same global G is used in different functions, then this |
899 // method will emit one G(gotoff) relocation per function. | 894 // method will emit one G(gotoff) relocation per function. |
900 IceString createGotoffRelocation(const ConstantRelocatable *CR); | 895 IceString createGotoffRelocation(const ConstantRelocatable *CR); |
901 std::unordered_set<IceString> KnownGotoffs; | 896 CfgUnorderedSet<IceString> KnownGotoffs; |
902 /// @} | 897 /// @} |
903 | 898 |
904 /// Loads the constant relocatable Name to Register. Then invoke Finish to | 899 /// Loads the constant relocatable Name to Register. Then invoke Finish to |
905 /// finish the relocatable lowering. Finish **must** use PC in its first | 900 /// finish the relocatable lowering. Finish **must** use PC in its first |
906 /// emitted instruction, or the relocatable in Register will contain the wrong | 901 /// emitted instruction, or the relocatable in Register will contain the wrong |
907 /// value. | 902 /// value. |
908 // | 903 // |
909 // Lowered sequence: | 904 // Lowered sequence: |
910 // | 905 // |
911 // Movw: | 906 // Movw: |
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1109 TargetARM32Features CPUFeatures; | 1104 TargetARM32Features CPUFeatures; |
1110 bool UsesFramePointer = false; | 1105 bool UsesFramePointer = false; |
1111 bool NeedsStackAlignment = false; | 1106 bool NeedsStackAlignment = false; |
1112 bool MaybeLeafFunc = true; | 1107 bool MaybeLeafFunc = true; |
1113 size_t SpillAreaSizeBytes = 0; | 1108 size_t SpillAreaSizeBytes = 0; |
1114 size_t FixedAllocaSizeBytes = 0; | 1109 size_t FixedAllocaSizeBytes = 0; |
1115 size_t FixedAllocaAlignBytes = 0; | 1110 size_t FixedAllocaAlignBytes = 0; |
1116 bool PrologEmitsFixedAllocas = false; | 1111 bool PrologEmitsFixedAllocas = false; |
1117 uint32_t MaxOutArgsSizeBytes = 0; | 1112 uint32_t MaxOutArgsSizeBytes = 0; |
1118 // TODO(jpp): std::array instead of array. | 1113 // TODO(jpp): std::array instead of array. |
1119 static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; | 1114 static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; |
1120 static llvm::SmallBitVector | 1115 static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; |
1121 TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; | 1116 static SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; |
1122 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; | 1117 SmallBitVector RegsUsed; |
1123 llvm::SmallBitVector RegsUsed; | |
1124 VarList PhysicalRegisters[IceType_NUM]; | 1118 VarList PhysicalRegisters[IceType_NUM]; |
1125 VarList PreservedGPRs; | 1119 VarList PreservedGPRs; |
1126 VarList PreservedSRegs; | 1120 VarList PreservedSRegs; |
1127 | 1121 |
1128 /// Helper class that understands the Calling Convention and register | 1122 /// Helper class that understands the Calling Convention and register |
1129 /// assignments. The first few integer type parameters can use r0-r3, | 1123 /// assignments. The first few integer type parameters can use r0-r3, |
1130 /// regardless of their position relative to the floating-point/vector | 1124 /// regardless of their position relative to the floating-point/vector |
1131 /// arguments in the argument list. Floating-point and vector arguments | 1125 /// arguments in the argument list. Floating-point and vector arguments |
1132 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, | 1126 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, |
1133 /// see the ARM Architecture Procedure Calling Standards (AAPCS). | 1127 /// see the ARM Architecture Procedure Calling Standards (AAPCS). |
(...skipping 17 matching lines...) Expand all Loading... |
1151 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will | 1145 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will |
1152 /// be an I64 register pair. | 1146 /// be an I64 register pair. |
1153 bool argInGPR(Type Ty, RegNumT *Reg); | 1147 bool argInGPR(Type Ty, RegNumT *Reg); |
1154 | 1148 |
1155 /// argInVFP is to floating-point/vector types what argInGPR is for integer | 1149 /// argInVFP is to floating-point/vector types what argInGPR is for integer |
1156 /// types. | 1150 /// types. |
1157 bool argInVFP(Type Ty, RegNumT *Reg); | 1151 bool argInVFP(Type Ty, RegNumT *Reg); |
1158 | 1152 |
1159 private: | 1153 private: |
1160 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); | 1154 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); |
1161 llvm::SmallBitVector GPRegsUsed; | 1155 SmallBitVector GPRegsUsed; |
1162 CfgVector<RegNumT> GPRArgs; | 1156 CfgVector<RegNumT> GPRArgs; |
1163 CfgVector<RegNumT> I64Args; | 1157 CfgVector<RegNumT> I64Args; |
1164 | 1158 |
1165 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); | 1159 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); |
1166 llvm::SmallBitVector VFPRegsUsed; | 1160 SmallBitVector VFPRegsUsed; |
1167 CfgVector<RegNumT> FP32Args; | 1161 CfgVector<RegNumT> FP32Args; |
1168 CfgVector<RegNumT> FP64Args; | 1162 CfgVector<RegNumT> FP64Args; |
1169 CfgVector<RegNumT> Vec128Args; | 1163 CfgVector<RegNumT> Vec128Args; |
1170 }; | 1164 }; |
1171 | 1165 |
1172 private: | 1166 private: |
1173 ENABLE_MAKE_UNIQUE; | 1167 ENABLE_MAKE_UNIQUE; |
1174 | 1168 |
1175 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, | 1169 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, |
1176 Operand *Base); | 1170 Operand *Base); |
1177 | 1171 |
1178 void postambleCtpop64(const InstCall *Instr); | 1172 void postambleCtpop64(const InstCall *Instr); |
1179 void preambleDivRem(const InstCall *Instr); | 1173 void preambleDivRem(const InstCall *Instr); |
1180 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Instr)> | 1174 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> |
1181 ARM32HelpersPreamble; | 1175 ARM32HelpersPreamble; |
1182 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Instr)> | 1176 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)> |
1183 ARM32HelpersPostamble; | 1177 ARM32HelpersPostamble; |
1184 | 1178 |
1185 class ComputationTracker { | 1179 class ComputationTracker { |
1186 public: | 1180 public: |
1187 ComputationTracker() = default; | 1181 ComputationTracker() = default; |
1188 ~ComputationTracker() = default; | 1182 ~ComputationTracker() = default; |
1189 | 1183 |
1190 void forgetProducers() { KnownComputations.clear(); } | 1184 void forgetProducers() { KnownComputations.clear(); } |
1191 void recordProducers(CfgNode *Node); | 1185 void recordProducers(CfgNode *Node); |
1192 | 1186 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1229 // by this instruction. If liveness analysis is not performed (e.g., in | 1223 // by this instruction. If liveness analysis is not performed (e.g., in |
1230 // Om1 mode) IsLiveOut will never be set to false, and folding will be | 1224 // Om1 mode) IsLiveOut will never be set to false, and folding will be |
1231 // disabled. | 1225 // disabled. |
1232 bool IsLiveOut = true; | 1226 bool IsLiveOut = true; |
1233 int32_t NumUses = 0; | 1227 int32_t NumUses = 0; |
1234 Type ComputationType; | 1228 Type ComputationType; |
1235 }; | 1229 }; |
1236 | 1230 |
1237 // ComputationMap maps a Variable number to a payload identifying which | 1231 // ComputationMap maps a Variable number to a payload identifying which |
1238 // instruction defined it. | 1232 // instruction defined it. |
1239 using ComputationMap = std::unordered_map<SizeT, ComputationEntry>; | 1233 using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>; |
1240 ComputationMap KnownComputations; | 1234 ComputationMap KnownComputations; |
1241 }; | 1235 }; |
1242 | 1236 |
1243 ComputationTracker Computations; | 1237 ComputationTracker Computations; |
1244 | 1238 |
1245 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked | 1239 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked |
1246 // without specifying a physical register. This is needed for creating unbound | 1240 // without specifying a physical register. This is needed for creating unbound |
1247 // temporaries during Ice -> ARM lowering, but before register allocation. | 1241 // temporaries during Ice -> ARM lowering, but before register allocation. |
1248 // This a safe-guard that no unbound temporaries are created during the | 1242 // This a safe-guard that no unbound temporaries are created during the |
1249 // legalization post-passes. | 1243 // legalization post-passes. |
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1307 private: | 1301 private: |
1308 ~TargetHeaderARM32() = default; | 1302 ~TargetHeaderARM32() = default; |
1309 | 1303 |
1310 TargetARM32Features CPUFeatures; | 1304 TargetARM32Features CPUFeatures; |
1311 }; | 1305 }; |
1312 | 1306 |
1313 } // end of namespace ARM32 | 1307 } // end of namespace ARM32 |
1314 } // end of namespace Ice | 1308 } // end of namespace Ice |
1315 | 1309 |
1316 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H | 1310 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H |
OLD | NEW |