src/IceTargetLoweringARM32.h - Issue 1738443002: Subzero. Performance tweaks.

Side by Side Diff: src/IceTargetLoweringARM32.h

Issue 1738443002: Subzero. Performance tweaks. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments -- all of them Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----- C++ --===//	1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----- C++ --===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

11 /// \brief Declares the TargetLoweringARM32 class, which implements the	11 /// \brief Declares the TargetLoweringARM32 class, which implements the

12 /// TargetLowering interface for the ARM 32-bit architecture.	12 /// TargetLowering interface for the ARM 32-bit architecture.

13 ///	13 ///

14 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

15	15

16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H	16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H

17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H	17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H

18	18

19 #include "IceAssemblerARM32.h"	19 #include "IceAssemblerARM32.h"

20 #include "IceDefs.h"	20 #include "IceDefs.h"

21 #include "IceInstARM32.h"	21 #include "IceInstARM32.h"

22 #include "IceRegistersARM32.h"	22 #include "IceRegistersARM32.h"

23 #include "IceTargetLowering.h"	23 #include "IceTargetLowering.h"

24	24

25 #include "llvm/ADT/SmallBitVector.h"

26

27 #include <unordered_set>

28

29 namespace Ice {	25 namespace Ice {

30 namespace ARM32 {	26 namespace ARM32 {

31	27

32 // Class encapsulating ARM cpu features / instruction set.	28 // Class encapsulating ARM cpu features / instruction set.

33 class TargetARM32Features {	29 class TargetARM32Features {

34 TargetARM32Features() = delete;	30 TargetARM32Features() = delete;

35 TargetARM32Features(const TargetARM32Features &) = delete;	31 TargetARM32Features(const TargetARM32Features &) = delete;

36 TargetARM32Features &operator=(const TargetARM32Features &) = delete;	32 TargetARM32Features &operator=(const TargetARM32Features &) = delete;

37	33

38 public:	34 public:

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
77 }	73 }

78	74

79 void translateOm1() override;	75 void translateOm1() override;

80 void translateO2() override;	76 void translateO2() override;

81 bool doBranchOpt(Inst I, const CfgNode NextNode) override;	77 bool doBranchOpt(Inst I, const CfgNode NextNode) override;

82	78

83 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }	79 SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }

84 Variable *getPhysicalRegister(RegNumT RegNum,	80 Variable *getPhysicalRegister(RegNumT RegNum,

85 Type Ty = IceType_void) override;	81 Type Ty = IceType_void) override;

86 IceString getRegName(RegNumT RegNum, Type Ty) const override;	82 IceString getRegName(RegNumT RegNum, Type Ty) const override;

87 llvm::SmallBitVector getRegisterSet(RegSetMask Include,	83 SmallBitVector getRegisterSet(RegSetMask Include,

88 RegSetMask Exclude) const override;	84 RegSetMask Exclude) const override;

89 const llvm::SmallBitVector &	85 const SmallBitVector &

90 getRegistersForVariable(const Variable *Var) const override {	86 getRegistersForVariable(const Variable *Var) const override {

91 RegClass RC = Var->getRegClass();	87 RegClass RC = Var->getRegClass();

92 switch (RC) {	88 switch (RC) {

93 default:	89 default:

94 assert(RC < RC_Target);	90 assert(RC < RC_Target);

95 return TypeToRegisterSet[RC];	91 return TypeToRegisterSet[RC];

96 case RegARM32::RCARM32_QtoS:	92 case RegARM32::RCARM32_QtoS:

97 return TypeToRegisterSet[RC];	93 return TypeToRegisterSet[RC];

98 }	94 }

99 }	95 }

100 const llvm::SmallBitVector &	96 const SmallBitVector &

101 getAllRegistersForVariable(const Variable *Var) const override {	97 getAllRegistersForVariable(const Variable *Var) const override {

102 RegClass RC = Var->getRegClass();	98 RegClass RC = Var->getRegClass();

103 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);	99 assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);

104 return TypeToRegisterSetUnfiltered[RC];	100 return TypeToRegisterSetUnfiltered[RC];

105 }	101 }

106 const llvm::SmallBitVector &	102 const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {

107 getAliasesForRegister(RegNumT Reg) const override {

108 return RegisterAliases[Reg];	103 return RegisterAliases[Reg];

109 }	104 }

110 bool hasFramePointer() const override { return UsesFramePointer; }	105 bool hasFramePointer() const override { return UsesFramePointer; }

111 void setHasFramePointer() override { UsesFramePointer = true; }	106 void setHasFramePointer() override { UsesFramePointer = true; }

112 RegNumT getStackReg() const override { return RegARM32::Reg_sp; }	107 RegNumT getStackReg() const override { return RegARM32::Reg_sp; }

113 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }	108 RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }

114 RegNumT getFrameOrStackReg() const override {	109 RegNumT getFrameOrStackReg() const override {

115 return UsesFramePointer ? getFrameReg() : getStackReg();	110 return UsesFramePointer ? getFrameReg() : getStackReg();

116 }	111 }

117 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }	112 RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }

(...skipping 177 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
295 static Type stackSlotType();	290 static Type stackSlotType();

296 Variable copyToReg(Operand Src, RegNumT RegNum = RegNumT());	291 Variable copyToReg(Operand Src, RegNumT RegNum = RegNumT());

297 void alignRegisterPow2(Variable *Reg, uint32_t Align,	292 void alignRegisterPow2(Variable *Reg, uint32_t Align,

298 RegNumT TmpRegNum = RegNumT());	293 RegNumT TmpRegNum = RegNumT());

299	294

300 /// Returns a vector in a register with the given constant entries.	295 /// Returns a vector in a register with the given constant entries.

301 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());	296 Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());

302	297

303 void	298 void

304 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,	299 makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,

305 const llvm::SmallBitVector &ExcludeRegisters,	300 const SmallBitVector &ExcludeRegisters,

306 uint64_t Salt) const override;	301 uint64_t Salt) const override;

307	302

308 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;	303 // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;

309 // .LSKIP: <continuation>. If no check is needed nothing is inserted.	304 // .LSKIP: <continuation>. If no check is needed nothing is inserted.

310 void div0Check(Type Ty, Operand SrcLo, Operand SrcHi);	305 void div0Check(Type Ty, Operand SrcLo, Operand SrcHi);

311 using ExtInstr = void (TargetARM32::)(Variable , Variable *,	306 using ExtInstr = void (TargetARM32::)(Variable , Variable *,

312 CondARM32::Cond);	307 CondARM32::Cond);

313 using DivInstr = void (TargetARM32::)(Variable , Variable , Variable ,	308 using DivInstr = void (TargetARM32::)(Variable , Variable , Variable ,

314 CondARM32::Cond);	309 CondARM32::Cond);

315 void lowerIDivRem(Variable Dest, Variable T, Variable Src0R, Operand Src1,	310 void lowerIDivRem(Variable Dest, Variable T, Variable Src0R, Operand Src1,

(...skipping 575 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
891 // TODO(jpp): use CfgLocalAllocator.	886 // TODO(jpp): use CfgLocalAllocator.

892 /// @}	887 /// @}

893	888

894 /// Manages the Gotoff relocations created during the function lowering. A	889 /// Manages the Gotoff relocations created during the function lowering. A

895 /// single Gotoff relocation is created for each global variable used by the	890 /// single Gotoff relocation is created for each global variable used by the

896 /// function being lowered.	891 /// function being lowered.

897 /// @{	892 /// @{

898 // TODO(jpp): if the same global G is used in different functions, then this	893 // TODO(jpp): if the same global G is used in different functions, then this

899 // method will emit one G(gotoff) relocation per function.	894 // method will emit one G(gotoff) relocation per function.

900 IceString createGotoffRelocation(const ConstantRelocatable *CR);	895 IceString createGotoffRelocation(const ConstantRelocatable *CR);

901 std::unordered_set<IceString> KnownGotoffs;	896 CfgUnorderedSet<IceString> KnownGotoffs;

902 /// @}	897 /// @}

903	898

904 /// Loads the constant relocatable Name to Register. Then invoke Finish to	899 /// Loads the constant relocatable Name to Register. Then invoke Finish to

905 /// finish the relocatable lowering. Finish must use PC in its first	900 /// finish the relocatable lowering. Finish must use PC in its first

906 /// emitted instruction, or the relocatable in Register will contain the wrong	901 /// emitted instruction, or the relocatable in Register will contain the wrong

907 /// value.	902 /// value.

908 //	903 //

909 // Lowered sequence:	904 // Lowered sequence:

910 //	905 //

911 // Movw:	906 // Movw:

(...skipping 197 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1109 TargetARM32Features CPUFeatures;	1104 TargetARM32Features CPUFeatures;

1110 bool UsesFramePointer = false;	1105 bool UsesFramePointer = false;

1111 bool NeedsStackAlignment = false;	1106 bool NeedsStackAlignment = false;

1112 bool MaybeLeafFunc = true;	1107 bool MaybeLeafFunc = true;

1113 size_t SpillAreaSizeBytes = 0;	1108 size_t SpillAreaSizeBytes = 0;

1114 size_t FixedAllocaSizeBytes = 0;	1109 size_t FixedAllocaSizeBytes = 0;

1115 size_t FixedAllocaAlignBytes = 0;	1110 size_t FixedAllocaAlignBytes = 0;

1116 bool PrologEmitsFixedAllocas = false;	1111 bool PrologEmitsFixedAllocas = false;

1117 uint32_t MaxOutArgsSizeBytes = 0;	1112 uint32_t MaxOutArgsSizeBytes = 0;

1118 // TODO(jpp): std::array instead of array.	1113 // TODO(jpp): std::array instead of array.

1119 static llvm::SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];	1114 static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];

1120 static llvm::SmallBitVector	1115 static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];

1121 TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];	1116 static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];

1122 static llvm::SmallBitVector RegisterAliases[RegARM32::Reg_NUM];	1117 SmallBitVector RegsUsed;

1123 llvm::SmallBitVector RegsUsed;

1124 VarList PhysicalRegisters[IceType_NUM];	1118 VarList PhysicalRegisters[IceType_NUM];

1125 VarList PreservedGPRs;	1119 VarList PreservedGPRs;

1126 VarList PreservedSRegs;	1120 VarList PreservedSRegs;

1127	1121

1128 /// Helper class that understands the Calling Convention and register	1122 /// Helper class that understands the Calling Convention and register

1129 /// assignments. The first few integer type parameters can use r0-r3,	1123 /// assignments. The first few integer type parameters can use r0-r3,

1130 /// regardless of their position relative to the floating-point/vector	1124 /// regardless of their position relative to the floating-point/vector

1131 /// arguments in the argument list. Floating-point and vector arguments	1125 /// arguments in the argument list. Floating-point and vector arguments

1132 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,	1126 /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,

1133 /// see the ARM Architecture Procedure Calling Standards (AAPCS).	1127 /// see the ARM Architecture Procedure Calling Standards (AAPCS).

(...skipping 17 matching lines...) Expand all Loading...
1151 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will	1145 /// appropriate register number. Note that, when Ty == IceType_i64, Reg will

1152 /// be an I64 register pair.	1146 /// be an I64 register pair.

1153 bool argInGPR(Type Ty, RegNumT *Reg);	1147 bool argInGPR(Type Ty, RegNumT *Reg);

1154	1148

1155 /// argInVFP is to floating-point/vector types what argInGPR is for integer	1149 /// argInVFP is to floating-point/vector types what argInGPR is for integer

1156 /// types.	1150 /// types.

1157 bool argInVFP(Type Ty, RegNumT *Reg);	1151 bool argInVFP(Type Ty, RegNumT *Reg);

1158	1152

1159 private:	1153 private:

1160 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);	1154 void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);

1161 llvm::SmallBitVector GPRegsUsed;	1155 SmallBitVector GPRegsUsed;

1162 CfgVector<RegNumT> GPRArgs;	1156 CfgVector<RegNumT> GPRArgs;

1163 CfgVector<RegNumT> I64Args;	1157 CfgVector<RegNumT> I64Args;

1164	1158

1165 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);	1159 void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);

1166 llvm::SmallBitVector VFPRegsUsed;	1160 SmallBitVector VFPRegsUsed;

1167 CfgVector<RegNumT> FP32Args;	1161 CfgVector<RegNumT> FP32Args;

1168 CfgVector<RegNumT> FP64Args;	1162 CfgVector<RegNumT> FP64Args;

1169 CfgVector<RegNumT> Vec128Args;	1163 CfgVector<RegNumT> Vec128Args;

1170 };	1164 };

1171	1165

1172 private:	1166 private:

1173 ENABLE_MAKE_UNIQUE;	1167 ENABLE_MAKE_UNIQUE;

1174	1168

1175 OperandARM32Mem formAddressingMode(Type Ty, Cfg Func, const Inst *LdSt,	1169 OperandARM32Mem formAddressingMode(Type Ty, Cfg Func, const Inst *LdSt,

1176 Operand *Base);	1170 Operand *Base);

1177	1171

1178 void postambleCtpop64(const InstCall *Instr);	1172 void postambleCtpop64(const InstCall *Instr);

1179 void preambleDivRem(const InstCall *Instr);	1173 void preambleDivRem(const InstCall *Instr);

1180 std::unordered_map<Operand , void (TargetARM32::)(const InstCall *Instr)>	1174 CfgUnorderedMap<Operand , void (TargetARM32::)(const InstCall *Instr)>

1181 ARM32HelpersPreamble;	1175 ARM32HelpersPreamble;

1182 std::unordered_map<Operand , void (TargetARM32::)(const InstCall *Instr)>	1176 CfgUnorderedMap<Operand , void (TargetARM32::)(const InstCall *Instr)>

1183 ARM32HelpersPostamble;	1177 ARM32HelpersPostamble;

1184	1178

1185 class ComputationTracker {	1179 class ComputationTracker {

1186 public:	1180 public:

1187 ComputationTracker() = default;	1181 ComputationTracker() = default;

1188 ~ComputationTracker() = default;	1182 ~ComputationTracker() = default;

1189	1183

1190 void forgetProducers() { KnownComputations.clear(); }	1184 void forgetProducers() { KnownComputations.clear(); }

1191 void recordProducers(CfgNode *Node);	1185 void recordProducers(CfgNode *Node);

1192	1186

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1229 // by this instruction. If liveness analysis is not performed (e.g., in	1223 // by this instruction. If liveness analysis is not performed (e.g., in

1230 // Om1 mode) IsLiveOut will never be set to false, and folding will be	1224 // Om1 mode) IsLiveOut will never be set to false, and folding will be

1231 // disabled.	1225 // disabled.

1232 bool IsLiveOut = true;	1226 bool IsLiveOut = true;

1233 int32_t NumUses = 0;	1227 int32_t NumUses = 0;

1234 Type ComputationType;	1228 Type ComputationType;

1235 };	1229 };

1236	1230

1237 // ComputationMap maps a Variable number to a payload identifying which	1231 // ComputationMap maps a Variable number to a payload identifying which

1238 // instruction defined it.	1232 // instruction defined it.

1239 using ComputationMap = std::unordered_map<SizeT, ComputationEntry>;	1233 using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;

1240 ComputationMap KnownComputations;	1234 ComputationMap KnownComputations;

1241 };	1235 };

1242	1236

1243 ComputationTracker Computations;	1237 ComputationTracker Computations;

1244	1238

1245 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked	1239 // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked

1246 // without specifying a physical register. This is needed for creating unbound	1240 // without specifying a physical register. This is needed for creating unbound

1247 // temporaries during Ice -> ARM lowering, but before register allocation.	1241 // temporaries during Ice -> ARM lowering, but before register allocation.

1248 // This a safe-guard that no unbound temporaries are created during the	1242 // This a safe-guard that no unbound temporaries are created during the

1249 // legalization post-passes.	1243 // legalization post-passes.

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1307 private:	1301 private:

1308 ~TargetHeaderARM32() = default;	1302 ~TargetHeaderARM32() = default;

1309	1303

1310 TargetARM32Features CPUFeatures;	1304 TargetARM32Features CPUFeatures;

1311 };	1305 };

1312	1306

1313 } // end of namespace ARM32	1307 } // end of namespace ARM32

1314 } // end of namespace Ice	1308 } // end of namespace Ice

1315	1309

1316 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H	1310 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H

OLD	NEW

« no previous file with comments | « src/IceTargetLowering.cpp ('k') | src/IceTargetLoweringARM32.cpp » ('j') | no next file with comments »