Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: src/IceTargetLoweringARM32.h

Issue 1738443002: Subzero. Performance tweaks. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments -- all of them Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLowering.cpp ('k') | src/IceTargetLoweringARM32.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===// 1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// \brief Declares the TargetLoweringARM32 class, which implements the 11 /// \brief Declares the TargetLoweringARM32 class, which implements the
12 /// TargetLowering interface for the ARM 32-bit architecture. 12 /// TargetLowering interface for the ARM 32-bit architecture.
13 /// 13 ///
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
18 18
19 #include "IceAssemblerARM32.h" 19 #include "IceAssemblerARM32.h"
20 #include "IceDefs.h" 20 #include "IceDefs.h"
21 #include "IceInstARM32.h" 21 #include "IceInstARM32.h"
22 #include "IceRegistersARM32.h" 22 #include "IceRegistersARM32.h"
23 #include "IceTargetLowering.h" 23 #include "IceTargetLowering.h"
24 24
25 #include "llvm/ADT/SmallBitVector.h"
26
27 #include <unordered_set>
28
29 namespace Ice { 25 namespace Ice {
30 namespace ARM32 { 26 namespace ARM32 {
31 27
32 // Class encapsulating ARM cpu features / instruction set. 28 // Class encapsulating ARM cpu features / instruction set.
33 class TargetARM32Features { 29 class TargetARM32Features {
34 TargetARM32Features() = delete; 30 TargetARM32Features() = delete;
35 TargetARM32Features(const TargetARM32Features &) = delete; 31 TargetARM32Features(const TargetARM32Features &) = delete;
36 TargetARM32Features &operator=(const TargetARM32Features &) = delete; 32 TargetARM32Features &operator=(const TargetARM32Features &) = delete;
37 33
38 public: 34 public:
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 } 73 }
78 74
79 void translateOm1() override; 75 void translateOm1() override;
80 void translateO2() override; 76 void translateO2() override;
81 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override; 77 bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
82 78
83 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; } 79 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
84 Variable *getPhysicalRegister(RegNumT RegNum, 80 Variable *getPhysicalRegister(RegNumT RegNum,
85 Type Ty = IceType_void) override; 81 Type Ty = IceType_void) override;
86 IceString getRegName(RegNumT RegNum, Type Ty) const override; 82 IceString getRegName(RegNumT RegNum, Type Ty) const override;
87 llvm::SmallBitVector getRegisterSet(RegSetMask Include, 83 SmallBitVector getRegisterSet(RegSetMask Include,
88 RegSetMask Exclude) const override; 84 RegSetMask Exclude) const override;
89 const llvm::SmallBitVector & 85 const SmallBitVector &
90 getRegistersForVariable(const Variable *Var) const override { 86 getRegistersForVariable(const Variable *Var) const override {
91 RegClass RC = Var->getRegClass(); 87 RegClass RC = Var->getRegClass();
92 switch (RC) { 88 switch (RC) {
93 default: 89 default:
94 assert(RC < RC_Target); 90 assert(RC < RC_Target);
95 return TypeToRegisterSet[RC]; 91 return TypeToRegisterSet[RC];
96 case RegARM32::RCARM32_QtoS: 92 case RegARM32::RCARM32_QtoS:
97 return TypeToRegisterSet[RC]; 93 return TypeToRegisterSet[RC];
98 } 94 }
99 } 95 }
100 const llvm::SmallBitVector & 96 const SmallBitVector &
101 getAllRegistersForVariable(const Variable *Var) const override { 97 getAllRegistersForVariable(const Variable *Var) const override {
102 RegClass RC = Var->getRegClass(); 98 RegClass RC = Var->getRegClass();
103 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM); 99 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
104 return TypeToRegisterSetUnfiltered[RC]; 100 return TypeToRegisterSetUnfiltered[RC];
105 } 101 }
106 const llvm::SmallBitVector & 102 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
107 getAliasesForRegister(RegNumT Reg) const override {
108 return RegisterAliases[Reg]; 103 return RegisterAliases[Reg];
109 } 104 }
110 bool hasFramePointer() const override { return UsesFramePointer; } 105 bool hasFramePointer() const override { return UsesFramePointer; }
111 void setHasFramePointer() override { UsesFramePointer = true; } 106 void setHasFramePointer() override { UsesFramePointer = true; }
112 RegNumT getStackReg() const override { return RegARM32::Reg_sp; } 107 RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
113 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; } 108 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
114 RegNumT getFrameOrStackReg() const override { 109 RegNumT getFrameOrStackReg() const override {
115 return UsesFramePointer ? getFrameReg() : getStackReg(); 110 return UsesFramePointer ? getFrameReg() : getStackReg();
116 } 111 }
117 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; } 112 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
(...skipping 177 matching lines...) Expand 10 before | Expand all | Expand 10 after
295 static Type stackSlotType(); 290 static Type stackSlotType();
296 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT()); 291 Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
297 void alignRegisterPow2(Variable *Reg, uint32_t Align, 292 void alignRegisterPow2(Variable *Reg, uint32_t Align,
298 RegNumT TmpRegNum = RegNumT()); 293 RegNumT TmpRegNum = RegNumT());
299 294
300 /// Returns a vector in a register with the given constant entries. 295 /// Returns a vector in a register with the given constant entries.
301 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT()); 296 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
302 297
303 void 298 void
304 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation, 299 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
305 const llvm::SmallBitVector &ExcludeRegisters, 300 const SmallBitVector &ExcludeRegisters,
306 uint64_t Salt) const override; 301 uint64_t Salt) const override;
307 302
308 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap; 303 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
309 // .LSKIP: <continuation>. If no check is needed nothing is inserted. 304 // .LSKIP: <continuation>. If no check is needed nothing is inserted.
310 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi); 305 void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
311 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *, 306 using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
312 CondARM32::Cond); 307 CondARM32::Cond);
313 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *, 308 using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
314 CondARM32::Cond); 309 CondARM32::Cond);
315 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1, 310 void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
(...skipping 575 matching lines...) Expand 10 before | Expand all | Expand 10 after
891 // TODO(jpp): use CfgLocalAllocator. 886 // TODO(jpp): use CfgLocalAllocator.
892 /// @} 887 /// @}
893 888
894 /// Manages the Gotoff relocations created during the function lowering. A 889 /// Manages the Gotoff relocations created during the function lowering. A
895 /// single Gotoff relocation is created for each global variable used by the 890 /// single Gotoff relocation is created for each global variable used by the
896 /// function being lowered. 891 /// function being lowered.
897 /// @{ 892 /// @{
898 // TODO(jpp): if the same global G is used in different functions, then this 893 // TODO(jpp): if the same global G is used in different functions, then this
899 // method will emit one G(gotoff) relocation per function. 894 // method will emit one G(gotoff) relocation per function.
900 IceString createGotoffRelocation(const ConstantRelocatable *CR); 895 IceString createGotoffRelocation(const ConstantRelocatable *CR);
901 std::unordered_set<IceString> KnownGotoffs; 896 CfgUnorderedSet<IceString> KnownGotoffs;
902 /// @} 897 /// @}
903 898
904 /// Loads the constant relocatable Name to Register. Then invoke Finish to 899 /// Loads the constant relocatable Name to Register. Then invoke Finish to
905 /// finish the relocatable lowering. Finish **must** use PC in its first 900 /// finish the relocatable lowering. Finish **must** use PC in its first
906 /// emitted instruction, or the relocatable in Register will contain the wrong 901 /// emitted instruction, or the relocatable in Register will contain the wrong
907 /// value. 902 /// value.
908 // 903 //
909 // Lowered sequence: 904 // Lowered sequence:
910 // 905 //
911 // Movw: 906 // Movw:
(...skipping 197 matching lines...) Expand 10 before | Expand all | Expand 10 after
1109 TargetARM32Features CPUFeatures; 1104 TargetARM32Features CPUFeatures;
1110 bool UsesFramePointer = false; 1105 bool UsesFramePointer = false;
1111 bool NeedsStackAlignment = false; 1106 bool NeedsStackAlignment = false;
1112 bool MaybeLeafFunc = true; 1107 bool MaybeLeafFunc = true;
1113 size_t SpillAreaSizeBytes = 0; 1108 size_t SpillAreaSizeBytes = 0;
1114 size_t FixedAllocaSizeBytes = 0; 1109 size_t FixedAllocaSizeBytes = 0;
1115 size_t FixedAllocaAlignBytes = 0; 1110 size_t FixedAllocaAlignBytes = 0;
1116 bool PrologEmitsFixedAllocas = false; 1111 bool PrologEmitsFixedAllocas = false;
1117 uint32_t MaxOutArgsSizeBytes = 0; 1112 uint32_t MaxOutArgsSizeBytes = 0;
1118 // TODO(jpp): std::array instead of array. 1113 // TODO(jpp): std::array instead of array.
1119 static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM]; 1114 static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1120 static llvm::SmallBitVector 1115 static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1121 TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM]; 1116 static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1122 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM]; 1117 SmallBitVector RegsUsed;
1123 llvm::SmallBitVector RegsUsed;
1124 VarList PhysicalRegisters[IceType_NUM]; 1118 VarList PhysicalRegisters[IceType_NUM];
1125 VarList PreservedGPRs; 1119 VarList PreservedGPRs;
1126 VarList PreservedSRegs; 1120 VarList PreservedSRegs;
1127 1121
1128 /// Helper class that understands the Calling Convention and register 1122 /// Helper class that understands the Calling Convention and register
1129 /// assignments. The first few integer type parameters can use r0-r3, 1123 /// assignments. The first few integer type parameters can use r0-r3,
1130 /// regardless of their position relative to the floating-point/vector 1124 /// regardless of their position relative to the floating-point/vector
1131 /// arguments in the argument list. Floating-point and vector arguments 1125 /// arguments in the argument list. Floating-point and vector arguments
1132 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic, 1126 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1133 /// see the ARM Architecture Procedure Calling Standards (AAPCS). 1127 /// see the ARM Architecture Procedure Calling Standards (AAPCS).
(...skipping 17 matching lines...) Expand all
1151 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will 1145 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1152 /// be an I64 register pair. 1146 /// be an I64 register pair.
1153 bool argInGPR(Type Ty, RegNumT *Reg); 1147 bool argInGPR(Type Ty, RegNumT *Reg);
1154 1148
1155 /// argInVFP is to floating-point/vector types what argInGPR is for integer 1149 /// argInVFP is to floating-point/vector types what argInGPR is for integer
1156 /// types. 1150 /// types.
1157 bool argInVFP(Type Ty, RegNumT *Reg); 1151 bool argInVFP(Type Ty, RegNumT *Reg);
1158 1152
1159 private: 1153 private:
1160 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs); 1154 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
1161 llvm::SmallBitVector GPRegsUsed; 1155 SmallBitVector GPRegsUsed;
1162 CfgVector<RegNumT> GPRArgs; 1156 CfgVector<RegNumT> GPRArgs;
1163 CfgVector<RegNumT> I64Args; 1157 CfgVector<RegNumT> I64Args;
1164 1158
1165 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs); 1159 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
1166 llvm::SmallBitVector VFPRegsUsed; 1160 SmallBitVector VFPRegsUsed;
1167 CfgVector<RegNumT> FP32Args; 1161 CfgVector<RegNumT> FP32Args;
1168 CfgVector<RegNumT> FP64Args; 1162 CfgVector<RegNumT> FP64Args;
1169 CfgVector<RegNumT> Vec128Args; 1163 CfgVector<RegNumT> Vec128Args;
1170 }; 1164 };
1171 1165
1172 private: 1166 private:
1173 ENABLE_MAKE_UNIQUE; 1167 ENABLE_MAKE_UNIQUE;
1174 1168
1175 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt, 1169 OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1176 Operand *Base); 1170 Operand *Base);
1177 1171
1178 void postambleCtpop64(const InstCall *Instr); 1172 void postambleCtpop64(const InstCall *Instr);
1179 void preambleDivRem(const InstCall *Instr); 1173 void preambleDivRem(const InstCall *Instr);
1180 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1174 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1181 ARM32HelpersPreamble; 1175 ARM32HelpersPreamble;
1182 std::unordered_map<Operand *, void (TargetARM32::*)(const InstCall *Instr)> 1176 CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1183 ARM32HelpersPostamble; 1177 ARM32HelpersPostamble;
1184 1178
1185 class ComputationTracker { 1179 class ComputationTracker {
1186 public: 1180 public:
1187 ComputationTracker() = default; 1181 ComputationTracker() = default;
1188 ~ComputationTracker() = default; 1182 ~ComputationTracker() = default;
1189 1183
1190 void forgetProducers() { KnownComputations.clear(); } 1184 void forgetProducers() { KnownComputations.clear(); }
1191 void recordProducers(CfgNode *Node); 1185 void recordProducers(CfgNode *Node);
1192 1186
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
1229 // by this instruction. If liveness analysis is not performed (e.g., in 1223 // by this instruction. If liveness analysis is not performed (e.g., in
1230 // Om1 mode) IsLiveOut will never be set to false, and folding will be 1224 // Om1 mode) IsLiveOut will never be set to false, and folding will be
1231 // disabled. 1225 // disabled.
1232 bool IsLiveOut = true; 1226 bool IsLiveOut = true;
1233 int32_t NumUses = 0; 1227 int32_t NumUses = 0;
1234 Type ComputationType; 1228 Type ComputationType;
1235 }; 1229 };
1236 1230
1237 // ComputationMap maps a Variable number to a payload identifying which 1231 // ComputationMap maps a Variable number to a payload identifying which
1238 // instruction defined it. 1232 // instruction defined it.
1239 using ComputationMap = std::unordered_map<SizeT, ComputationEntry>; 1233 using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
1240 ComputationMap KnownComputations; 1234 ComputationMap KnownComputations;
1241 }; 1235 };
1242 1236
1243 ComputationTracker Computations; 1237 ComputationTracker Computations;
1244 1238
1245 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked 1239 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1246 // without specifying a physical register. This is needed for creating unbound 1240 // without specifying a physical register. This is needed for creating unbound
1247 // temporaries during Ice -> ARM lowering, but before register allocation. 1241 // temporaries during Ice -> ARM lowering, but before register allocation.
1248 // This a safe-guard that no unbound temporaries are created during the 1242 // This a safe-guard that no unbound temporaries are created during the
1249 // legalization post-passes. 1243 // legalization post-passes.
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1307 private: 1301 private:
1308 ~TargetHeaderARM32() = default; 1302 ~TargetHeaderARM32() = default;
1309 1303
1310 TargetARM32Features CPUFeatures; 1304 TargetARM32Features CPUFeatures;
1311 }; 1305 };
1312 1306
1313 } // end of namespace ARM32 1307 } // end of namespace ARM32
1314 } // end of namespace Ice 1308 } // end of namespace Ice
1315 1309
1316 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H 1310 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
OLDNEW
« no previous file with comments | « src/IceTargetLowering.cpp ('k') | src/IceTargetLoweringARM32.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698