OLD | NEW |
---|---|
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
11 // multiple functions. | 11 // multiple functions. |
12 // | 12 // |
13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
14 | 14 |
15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H | 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H |
16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H | 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H |
17 | 17 |
18 #include <memory> | 18 #include <memory> |
19 #include <mutex> | |
19 | 20 |
20 #include "IceDefs.h" | 21 #include "IceDefs.h" |
21 #include "IceClFlags.h" | 22 #include "IceClFlags.h" |
22 #include "IceIntrinsics.h" | 23 #include "IceIntrinsics.h" |
23 #include "IceRNG.h" | 24 #include "IceRNG.h" |
24 #include "IceTimerTree.h" | 25 #include "IceTimerTree.h" |
25 #include "IceTypes.h" | 26 #include "IceTypes.h" |
26 | 27 |
27 namespace Ice { | 28 namespace Ice { |
28 | 29 |
29 class ClFlags; | 30 class ClFlags; |
30 class FuncSigType; | 31 class FuncSigType; |
31 | 32 |
32 // This class collects rudimentary statistics during translation. | |
33 class CodeStats { | |
34 CodeStats(const CodeStats &) = delete; | |
35 CodeStats &operator=(const CodeStats &) = default; | |
36 | |
37 public: | |
38 CodeStats() | |
39 : InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0), | |
40 Fills(0) {} | |
41 void reset() { *this = CodeStats(); } | |
42 void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; } | |
43 void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; } | |
44 void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; } | |
45 void updateSpills() { ++Spills; } | |
46 void updateFills() { ++Fills; } | |
47 void dump(const IceString &Name, Ostream &Str); | |
48 | |
49 private: | |
50 uint32_t InstructionsEmitted; | |
51 uint32_t RegistersSaved; | |
52 uint32_t FrameBytes; | |
53 uint32_t Spills; | |
54 uint32_t Fills; | |
55 }; | |
56 | |
57 // TODO: Accesses to all non-const fields of GlobalContext need to | |
58 // be synchronized, especially the constant pool, the allocator, and | |
59 // the output streams. | |
60 class GlobalContext { | 33 class GlobalContext { |
61 GlobalContext(const GlobalContext &) = delete; | 34 GlobalContext(const GlobalContext &) = delete; |
62 GlobalContext &operator=(const GlobalContext &) = delete; | 35 GlobalContext &operator=(const GlobalContext &) = delete; |
63 | 36 |
37 // CodeStats collects rudimentary statistics during translation. | |
38 class CodeStats { | |
39 CodeStats(const CodeStats &) = delete; | |
40 CodeStats &operator=(const CodeStats &) = default; | |
41 | |
42 public: | |
43 CodeStats() | |
44 : InstructionsEmitted(0), RegistersSaved(0), FrameBytes(0), Spills(0), | |
45 Fills(0) {} | |
46 void reset() { *this = CodeStats(); } | |
47 void updateEmitted(uint32_t InstCount) { InstructionsEmitted += InstCount; } | |
48 void updateRegistersSaved(uint32_t Num) { RegistersSaved += Num; } | |
49 void updateFrameBytes(uint32_t Bytes) { FrameBytes += Bytes; } | |
50 void updateSpills() { ++Spills; } | |
51 void updateFills() { ++Fills; } | |
52 void dump(const IceString &Name, Ostream &Str); | |
53 | |
54 private: | |
55 uint32_t InstructionsEmitted; | |
56 uint32_t RegistersSaved; | |
57 uint32_t FrameBytes; | |
58 uint32_t Spills; | |
59 uint32_t Fills; | |
60 }; | |
61 | |
62 // ThreadContext contains thread-local data. This data can be | |
63 // combined/reduced as needed after all threads complete. | |
64 class ThreadContext { | |
65 ThreadContext(const ThreadContext &) = delete; | |
66 ThreadContext &operator=(const ThreadContext &) = delete; | |
67 public: | |
68 ThreadContext() {} | |
69 CodeStats StatsFunction; | |
70 std::vector<TimerStack> Timers; | |
71 }; | |
72 | |
73 typedef std::recursive_mutex GlobalLockType; | |
JF
2015/01/15 16:28:39
Could you instead have separate allocator lock and
Jim Stichnoth
2015/01/17 18:44:17
Done. I went ahead and made several finer-grain l
| |
74 | |
64 public: | 75 public: |
65 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 76 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
66 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 77 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
67 IceString TestPrefix, const ClFlags &Flags); | 78 IceString TestPrefix, const ClFlags &Flags); |
68 ~GlobalContext(); | 79 ~GlobalContext(); |
69 | 80 |
70 // Returns true if any of the specified options in the verbose mask | 81 // Returns true if any of the specified options in the verbose mask |
71 // are set. If the argument is omitted, it checks if any verbose | 82 // are set. If the argument is omitted, it checks if any verbose |
72 // options at all are set. | 83 // options at all are set. |
73 VerboseMask getVerbose() const { return VMask; } | 84 VerboseMask getVerbose() const { return VMask; } |
74 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } | 85 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } |
75 void setVerbose(VerboseMask Mask) { VMask = Mask; } | 86 void setVerbose(VerboseMask Mask) { VMask = Mask; } |
76 void addVerbose(VerboseMask Mask) { VMask |= Mask; } | 87 void addVerbose(VerboseMask Mask) { VMask |= Mask; } |
77 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } | 88 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } |
78 | 89 |
79 Ostream &getStrDump() { return *StrDump; } | 90 // The dump and emit streams need to be used by only one thread at a |
80 Ostream &getStrEmit() { return *StrEmit; } | 91 // time. This is done by exclusively reserving the streams via |
92 // lockStr() and unlockStr(). The OstreamLocker class can be used | |
93 // to conveniently manage this. | |
94 // | |
95 // The model is that a thread grabs the stream lock, then does an | |
96 // arbitrary amount of work during which far-away callees may grab | |
97 // the stream and do something with it, and finally the thread | |
98 // releases the stream lock. This allows large chunks of output to | |
99 // be dumped or emitted without risking interleaving from multiple | |
100 // threads. When a worker locks the streams via lockStr(), we use | |
101 // IsStrLocked to verify that it wasn't already locked (i.e. no | |
102 // recursive lockStr() calls). When a worker grabs one of the | |
103 // streams via getStrDump() or getStrEmit(), we lock StrLock | |
104 // (recursively, if lockStr() was correctly used, hence the need for | |
105 // recursive_mutex) and check that IsStrLocked is set. | |
106 void lockStr() { | |
107 StrLock.lock(); | |
108 assert(!isStrLocked()); | |
109 IsStrLocked = true; | |
110 } | |
111 void unlockStr() { | |
112 assert(isStrLocked()); | |
113 IsStrLocked = false; | |
114 StrLock.unlock(); | |
115 } | |
116 // Test whether we are already holding StrLock, by first doing a | |
117 // lock() and when it (eventually) succeeds, checking that we didn't | |
118 // recursively lock it. | |
119 bool isStrLocked() { | |
120 StrLock.lock(); | |
121 bool WasLocked = IsStrLocked; | |
122 StrLock.unlock(); | |
123 return WasLocked; | |
124 } | |
125 Ostream &getStrDump() { | |
126 assert(isStrLocked()); | |
127 return *StrDump; | |
128 } | |
129 Ostream &getStrEmit() { | |
130 assert(isStrLocked()); | |
131 return *StrEmit; | |
132 } | |
81 | 133 |
82 TargetArch getTargetArch() const { return Arch; } | 134 TargetArch getTargetArch() const { return Arch; } |
83 OptLevel getOptLevel() const { return Opt; } | 135 OptLevel getOptLevel() const { return Opt; } |
84 | 136 |
85 // When emitting assembly, we allow a string to be prepended to | 137 // When emitting assembly, we allow a string to be prepended to |
86 // names of translated functions. This makes it easier to create an | 138 // names of translated functions. This makes it easier to create an |
87 // execution test against a reference translator like llc, with both | 139 // execution test against a reference translator like llc, with both |
88 // translators using the same bitcode as input. | 140 // translators using the same bitcode as input. |
89 IceString getTestPrefix() const { return TestPrefix; } | 141 IceString getTestPrefix() const { return TestPrefix; } |
90 IceString mangleName(const IceString &Name) const; | 142 IceString mangleName(const IceString &Name) const; |
(...skipping 11 matching lines...) Expand all Loading... | |
102 Constant *getConstantDouble(double Value); | 154 Constant *getConstantDouble(double Value); |
103 // Returns a symbolic constant. | 155 // Returns a symbolic constant. |
104 Constant *getConstantSym(RelocOffsetT Offset, const IceString &Name, | 156 Constant *getConstantSym(RelocOffsetT Offset, const IceString &Name, |
105 bool SuppressMangling); | 157 bool SuppressMangling); |
106 // Returns an undef. | 158 // Returns an undef. |
107 Constant *getConstantUndef(Type Ty); | 159 Constant *getConstantUndef(Type Ty); |
108 // Returns a zero value. | 160 // Returns a zero value. |
109 Constant *getConstantZero(Type Ty); | 161 Constant *getConstantZero(Type Ty); |
110 // getConstantPool() returns a copy of the constant pool for | 162 // getConstantPool() returns a copy of the constant pool for |
111 // constants of a given type. | 163 // constants of a given type. |
112 ConstantList getConstantPool(Type Ty) const; | 164 ConstantList getConstantPool(Type Ty); |
113 // Returns a new function declaration, allocated in an internal | 165 // Returns a new function declaration, allocated in an internal |
114 // memory pool. Ownership of the function is maintained by this | 166 // memory pool. Ownership of the function is maintained by this |
115 // class instance. | 167 // class instance. |
116 FunctionDeclaration *newFunctionDeclaration(const FuncSigType *Signature, | 168 FunctionDeclaration *newFunctionDeclaration(const FuncSigType *Signature, |
117 unsigned CallingConv, | 169 unsigned CallingConv, |
118 unsigned Linkage, bool IsProto); | 170 unsigned Linkage, bool IsProto); |
119 | 171 |
120 // Returns a new global variable declaration, allocated in an | 172 // Returns a new global variable declaration, allocated in an |
121 // internal memory pool. Ownership of the function is maintained by | 173 // internal memory pool. Ownership of the function is maintained by |
122 // this class instance. | 174 // this class instance. |
123 VariableDeclaration *newVariableDeclaration(); | 175 VariableDeclaration *newVariableDeclaration(); |
124 | 176 |
125 const ClFlags &getFlags() const { return Flags; } | 177 const ClFlags &getFlags() const { return Flags; } |
126 | 178 |
127 bool isIRGenerationDisabled() const { | 179 bool isIRGenerationDisabled() const { |
128 return ALLOW_DISABLE_IR_GEN ? getFlags().DisableIRGeneration : false; | 180 return ALLOW_DISABLE_IR_GEN ? getFlags().DisableIRGeneration : false; |
129 } | 181 } |
130 | 182 |
131 // Allocate data of type T using the global allocator. | 183 // Allocate data of type T using the global allocator. |
132 template <typename T> T *allocate() { return Allocator.Allocate<T>(); } | 184 template <typename T> T *allocate() { |
185 std::lock_guard<GlobalLockType> L(GlobalLock); | |
186 return Allocator.Allocate<T>(); | |
187 } | |
133 | 188 |
134 const Intrinsics &getIntrinsicsInfo() const { return IntrinsicsInfo; } | 189 const Intrinsics &getIntrinsicsInfo() const { return IntrinsicsInfo; } |
135 | 190 |
136 // TODO(wala,stichnot): Make the RNG play nicely with multithreaded | 191 // TODO(wala,stichnot): Make the RNG play nicely with multithreaded |
137 // translation. | 192 // translation. |
138 RandomNumberGenerator &getRNG() { return RNG; } | 193 RandomNumberGenerator &getRNG() { return RNG; } |
139 | 194 |
140 ELFObjectWriter *getObjectWriter() const { return ObjectWriter.get(); } | 195 ELFObjectWriter *getObjectWriter() const { return ObjectWriter.get(); } |
141 | 196 |
142 // Reset stats at the beginning of a function. | 197 // Reset stats at the beginning of a function. |
143 void resetStats() { | 198 void resetStats() { |
144 if (ALLOW_DUMP) | 199 if (ALLOW_DUMP) |
145 StatsFunction.reset(); | 200 TLS->StatsFunction.reset(); |
146 } | 201 } |
147 void dumpStats(const IceString &Name, bool Final = false); | 202 void dumpStats(const IceString &Name, bool Final = false); |
148 void statsUpdateEmitted(uint32_t InstCount) { | 203 void statsUpdateEmitted(uint32_t InstCount) { |
149 if (!ALLOW_DUMP) | 204 if (!ALLOW_DUMP || !getFlags().DumpStats) |
150 return; | 205 return; |
151 StatsFunction.updateEmitted(InstCount); | 206 TLS->StatsFunction.updateEmitted(InstCount); |
207 std::lock_guard<GlobalLockType> L(GlobalLock); | |
152 StatsCumulative.updateEmitted(InstCount); | 208 StatsCumulative.updateEmitted(InstCount); |
153 } | 209 } |
154 void statsUpdateRegistersSaved(uint32_t Num) { | 210 void statsUpdateRegistersSaved(uint32_t Num) { |
155 if (!ALLOW_DUMP) | 211 if (!ALLOW_DUMP || !getFlags().DumpStats) |
156 return; | 212 return; |
157 StatsFunction.updateRegistersSaved(Num); | 213 TLS->StatsFunction.updateRegistersSaved(Num); |
214 std::lock_guard<GlobalLockType> L(GlobalLock); | |
158 StatsCumulative.updateRegistersSaved(Num); | 215 StatsCumulative.updateRegistersSaved(Num); |
159 } | 216 } |
160 void statsUpdateFrameBytes(uint32_t Bytes) { | 217 void statsUpdateFrameBytes(uint32_t Bytes) { |
161 if (!ALLOW_DUMP) | 218 if (!ALLOW_DUMP || !getFlags().DumpStats) |
162 return; | 219 return; |
163 StatsFunction.updateFrameBytes(Bytes); | 220 TLS->StatsFunction.updateFrameBytes(Bytes); |
221 std::lock_guard<GlobalLockType> L(GlobalLock); | |
164 StatsCumulative.updateFrameBytes(Bytes); | 222 StatsCumulative.updateFrameBytes(Bytes); |
165 } | 223 } |
166 void statsUpdateSpills() { | 224 void statsUpdateSpills() { |
167 if (!ALLOW_DUMP) | 225 if (!ALLOW_DUMP || !getFlags().DumpStats) |
168 return; | 226 return; |
169 StatsFunction.updateSpills(); | 227 TLS->StatsFunction.updateSpills(); |
228 std::lock_guard<GlobalLockType> L(GlobalLock); | |
170 StatsCumulative.updateSpills(); | 229 StatsCumulative.updateSpills(); |
171 } | 230 } |
172 void statsUpdateFills() { | 231 void statsUpdateFills() { |
173 if (!ALLOW_DUMP) | 232 if (!ALLOW_DUMP || !getFlags().DumpStats) |
174 return; | 233 return; |
175 StatsFunction.updateFills(); | 234 TLS->StatsFunction.updateFills(); |
235 std::lock_guard<GlobalLockType> L(GlobalLock); | |
176 StatsCumulative.updateFills(); | 236 StatsCumulative.updateFills(); |
177 } | 237 } |
178 | 238 |
179 // These are predefined TimerStackIdT values. | 239 // These are predefined TimerStackIdT values. |
180 enum TimerStackKind { | 240 enum TimerStackKind { |
181 TSK_Default = 0, | 241 TSK_Default = 0, |
182 TSK_Funcs, | 242 TSK_Funcs, |
183 TSK_Num | 243 TSK_Num |
184 }; | 244 }; |
185 | 245 |
246 TimerStackIdT newTimerStackID(const IceString &Name); | |
186 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 247 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
187 TimerStackIdT newTimerStackID(const IceString &Name); | |
188 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 248 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
189 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 249 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
190 void resetTimer(TimerStackIdT StackID); | 250 void resetTimer(TimerStackIdT StackID); |
191 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 251 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
192 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 252 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
193 bool DumpCumulative = true); | 253 bool DumpCumulative = true); |
194 | 254 |
195 private: | 255 private: |
256 // GlobalLock is the default coarse-grain lock for accessing members | |
257 // of GlobalContext. As contention becomes an issue, more | |
258 // fine-grain locks can be added. | |
259 GlobalLockType GlobalLock; | |
260 | |
261 // StrLock is a global lock on the dump and emit output streams. | |
262 // IsStrLocked is used to validate the locking protocol, and can | |
263 // only be meaningfully inspected when StrLock is held. Note that | |
264 // in a production build, the dump and emit streams are not used in | |
265 // any meaningful way, so this locking is more for | |
266 // development/debugging purposes. | |
267 typedef std::recursive_mutex StrLockType; | |
268 StrLockType StrLock; | |
269 bool IsStrLocked; | |
270 | |
196 Ostream *StrDump; // Stream for dumping / diagnostics | 271 Ostream *StrDump; // Stream for dumping / diagnostics |
197 Ostream *StrEmit; // Stream for code emission | 272 Ostream *StrEmit; // Stream for code emission |
198 | 273 |
199 ArenaAllocator<> Allocator; | 274 ArenaAllocator<> Allocator; |
200 VerboseMask VMask; | 275 VerboseMask VMask; |
201 std::unique_ptr<class ConstantPool> ConstPool; | 276 std::unique_ptr<class ConstantPool> ConstPool; |
202 Intrinsics IntrinsicsInfo; | 277 Intrinsics IntrinsicsInfo; |
203 const TargetArch Arch; | 278 const TargetArch Arch; |
204 const OptLevel Opt; | 279 const OptLevel Opt; |
205 const IceString TestPrefix; | 280 const IceString TestPrefix; |
206 const ClFlags &Flags; | 281 const ClFlags &Flags; |
207 RandomNumberGenerator RNG; | 282 RandomNumberGenerator RNG; |
208 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 283 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
209 CodeStats StatsFunction; | |
210 CodeStats StatsCumulative; | 284 CodeStats StatsCumulative; |
211 std::vector<TimerStack> Timers; | 285 std::vector<TimerStack> Timers; |
212 std::vector<GlobalDeclaration *> GlobalDeclarations; | 286 std::vector<GlobalDeclaration *> GlobalDeclarations; |
213 | 287 |
288 std::vector<ThreadContext *> AllThreadContexts; | |
289 // Each thread has its own TLS pointer which is also held in | |
290 // AllThreadContexts. | |
291 thread_local static ThreadContext *TLS; | |
292 | |
214 // Private helpers for mangleName() | 293 // Private helpers for mangleName() |
215 typedef llvm::SmallVector<char, 32> ManglerVector; | 294 typedef llvm::SmallVector<char, 32> ManglerVector; |
216 void incrementSubstitutions(ManglerVector &OldName) const; | 295 void incrementSubstitutions(ManglerVector &OldName) const; |
217 }; | 296 }; |
218 | 297 |
219 // Helper class to push and pop a timer marker. The constructor | 298 // Helper class to push and pop a timer marker. The constructor |
220 // pushes a marker, and the destructor pops it. This is for | 299 // pushes a marker, and the destructor pops it. This is for |
221 // convenient timing of regions of code. | 300 // convenient timing of regions of code. |
222 class TimerMarker { | 301 class TimerMarker { |
223 TimerMarker(const TimerMarker &) = delete; | 302 TimerMarker(const TimerMarker &) = delete; |
(...skipping 14 matching lines...) Expand all Loading... | |
238 if (ALLOW_DUMP && Active) | 317 if (ALLOW_DUMP && Active) |
239 Ctx->popTimer(ID); | 318 Ctx->popTimer(ID); |
240 } | 319 } |
241 | 320 |
242 private: | 321 private: |
243 TimerIdT ID; | 322 TimerIdT ID; |
244 GlobalContext *const Ctx; | 323 GlobalContext *const Ctx; |
245 bool Active; | 324 bool Active; |
246 }; | 325 }; |
247 | 326 |
327 // Helper class for locking the streams and then automatically | |
328 // unlocking them. | |
329 class OstreamLocker { | |
330 private: | |
331 OstreamLocker() = delete; | |
332 OstreamLocker(const OstreamLocker &) = delete; | |
333 OstreamLocker &operator=(const OstreamLocker &) = delete; | |
334 | |
335 public: | |
336 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | |
337 ~OstreamLocker() { Ctx->unlockStr(); } | |
338 | |
339 private: | |
340 GlobalContext *const Ctx; | |
341 }; | |
342 | |
248 } // end of namespace Ice | 343 } // end of namespace Ice |
249 | 344 |
250 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 345 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |