OLD | NEW |
---|---|
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
11 // multiple functions. | 11 // multiple functions. |
12 // | 12 // |
13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
14 | 14 |
15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H | 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H |
16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H | 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H |
17 | 17 |
18 #include <memory> | |
19 #include <mutex> | 18 #include <mutex> |
19 #include <queue> | |
20 #include <thread> | |
20 | 21 |
21 #include "IceDefs.h" | 22 #include "IceDefs.h" |
22 #include "IceClFlags.h" | 23 #include "IceClFlags.h" |
23 #include "IceIntrinsics.h" | 24 #include "IceIntrinsics.h" |
24 #include "IceRNG.h" | 25 #include "IceRNG.h" |
25 #include "IceTimerTree.h" | 26 #include "IceTimerTree.h" |
26 #include "IceTypes.h" | 27 #include "IceTypes.h" |
28 #include "IceUtils.h" | |
27 | 29 |
28 namespace Ice { | 30 namespace Ice { |
29 | 31 |
30 class ClFlags; | 32 class ClFlags; |
31 class ConstantPool; | 33 class ConstantPool; |
32 class FuncSigType; | 34 class FuncSigType; |
33 | 35 |
34 typedef std::mutex GlobalLockType; | |
35 | |
36 // LockedPtr is a way to provide automatically locked access to some object. | 36 // LockedPtr is a way to provide automatically locked access to some object. |
37 template <typename T> class LockedPtr { | 37 template <typename T> class LockedPtr { |
38 LockedPtr() = delete; | 38 LockedPtr() = delete; |
39 LockedPtr(const LockedPtr &) = delete; | 39 LockedPtr(const LockedPtr &) = delete; |
40 LockedPtr &operator=(const LockedPtr &) = delete; | 40 LockedPtr &operator=(const LockedPtr &) = delete; |
41 | 41 |
42 public: | 42 public: |
43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { | 43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { |
44 Lock->lock(); | 44 Lock->lock(); |
45 } | 45 } |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
95 CodeStats StatsFunction; | 95 CodeStats StatsFunction; |
96 std::vector<TimerStack> Timers; | 96 std::vector<TimerStack> Timers; |
97 }; | 97 }; |
98 | 98 |
99 public: | 99 public: |
100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
102 IceString TestPrefix, const ClFlags &Flags); | 102 IceString TestPrefix, const ClFlags &Flags); |
103 ~GlobalContext(); | 103 ~GlobalContext(); |
104 | 104 |
105 // Returns true if any of the specified options in the verbose mask | |
106 // are set. If the argument is omitted, it checks if any verbose | |
107 // options at all are set. | |
108 VerboseMask getVerbose() const { return VMask; } | 105 VerboseMask getVerbose() const { return VMask; } |
109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } | |
110 void setVerbose(VerboseMask Mask) { VMask = Mask; } | |
111 void addVerbose(VerboseMask Mask) { VMask |= Mask; } | |
112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } | |
113 | 106 |
114 // The dump and emit streams need to be used by only one thread at a | 107 // The dump and emit streams need to be used by only one thread at a |
115 // time. This is done by exclusively reserving the streams via | 108 // time. This is done by exclusively reserving the streams via |
116 // lockStr() and unlockStr(). The OstreamLocker class can be used | 109 // lockStr() and unlockStr(). The OstreamLocker class can be used |
117 // to conveniently manage this. | 110 // to conveniently manage this. |
118 // | 111 // |
119 // The model is that a thread grabs the stream lock, then does an | 112 // The model is that a thread grabs the stream lock, then does an |
120 // arbitrary amount of work during which far-away callees may grab | 113 // arbitrary amount of work during which far-away callees may grab |
121 // the stream and do something with it, and finally the thread | 114 // the stream and do something with it, and finally the thread |
122 // releases the stream lock. This allows large chunks of output to | 115 // releases the stream lock. This allows large chunks of output to |
123 // be dumped or emitted without risking interleaving from multiple | 116 // be dumped or emitted without risking interleaving from multiple |
124 // threads. | 117 // threads. |
125 void lockStr() { StrLock.lock(); } | 118 void lockStr() { StrLock.lock(); } |
126 void unlockStr() { StrLock.unlock(); } | 119 void unlockStr() { StrLock.unlock(); } |
127 Ostream &getStrDump() { return *StrDump; } | 120 Ostream &getStrDump() { return *StrDump; } |
128 Ostream &getStrEmit() { return *StrEmit; } | 121 Ostream &getStrEmit() { return *StrEmit; } |
129 | 122 |
130 TargetArch getTargetArch() const { return Arch; } | 123 TargetArch getTargetArch() const { return Arch; } |
131 OptLevel getOptLevel() const { return Opt; } | 124 OptLevel getOptLevel() const { return Opt; } |
125 LockedPtr<std::error_code> getErrorStatus() { | |
126 return LockedPtr<std::error_code>(&ErrorStatus, &ErrorStatusLock); | |
127 } | |
132 | 128 |
133 // When emitting assembly, we allow a string to be prepended to | 129 // When emitting assembly, we allow a string to be prepended to |
134 // names of translated functions. This makes it easier to create an | 130 // names of translated functions. This makes it easier to create an |
135 // execution test against a reference translator like llc, with both | 131 // execution test against a reference translator like llc, with both |
136 // translators using the same bitcode as input. | 132 // translators using the same bitcode as input. |
137 IceString getTestPrefix() const { return TestPrefix; } | 133 IceString getTestPrefix() const { return TestPrefix; } |
138 IceString mangleName(const IceString &Name) const; | 134 IceString mangleName(const IceString &Name) const; |
139 | 135 |
140 // Manage Constants. | 136 // Manage Constants. |
141 // getConstant*() functions are not const because they might add | 137 // getConstant*() functions are not const because they might add |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
222 | 218 |
223 TimerStackIdT newTimerStackID(const IceString &Name); | 219 TimerStackIdT newTimerStackID(const IceString &Name); |
224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 220 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 221 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 222 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
227 void resetTimer(TimerStackIdT StackID); | 223 void resetTimer(TimerStackIdT StackID); |
228 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 224 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
229 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 225 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
230 bool DumpCumulative = true); | 226 bool DumpCumulative = true); |
231 | 227 |
228 // Adds a newly parsed and constructed function to the Cfg work | |
229 // queue. Notifies any idle workers that a new function is | |
230 // available for translating. May block if the work queue is too | |
231 // large, in order to control memory footprint. | |
232 void cfgQueueAdd(Cfg *Func) { CfgQ.blockingPush(Func); } | |
233 // Takes a Cfg from the work queue for translating. May block if | |
234 // the work queue is currently empty. Returns nullptr if there is | |
235 // no more work - the queue is empty and either end() has been | |
236 // called or the Sequential flag was set. | |
237 Cfg *cfgQueueGet() { return CfgQ.blockingPop(); } | |
JF
2015/01/26 17:54:50
I'd change these names too (add/get to blockingPus
Jim Stichnoth
2015/01/27 00:56:18
Done.
| |
238 // Notifies that no more work will be added to the work queue. | |
239 void cfgQueueEnd() { CfgQ.end(); } | |
JF
2015/01/26 17:54:51
Now that I think of it, notifyEnded would probably
Jim Stichnoth
2015/01/27 00:56:18
Done.
| |
240 | |
241 void startWorkerThreads() { | |
242 size_t NumWorkers = getFlags().NumTranslationThreads; | |
243 for (size_t i = 0; i < NumWorkers; ++i) { | |
244 ThreadContext *WorkerTLS = new ThreadContext(); | |
245 AllThreadContexts.push_back(WorkerTLS); | |
246 TranslationThreads.push_back(std::thread( | |
247 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | |
248 } | |
249 if (NumWorkers) { | |
250 // TODO(stichnot): start a new thread for the emitter queue worker. | |
251 } | |
252 } | |
253 | |
254 void waitForWorkerThreads() { | |
255 cfgQueueEnd(); | |
256 // TODO(stichnot): call end() on the emitter work queue. | |
257 for (std::thread &Worker : TranslationThreads) { | |
258 Worker.join(); | |
259 } | |
260 TranslationThreads.clear(); | |
261 // TODO(stichnot): join the emitter thread. | |
262 } | |
263 | |
264 // Translation thread startup routine. | |
265 void translateFunctionsWrapper(ThreadContext *MyTLS) { | |
266 TLS = MyTLS; | |
267 translateFunctions(); | |
268 } | |
269 // Translate functions from the Cfg queue until the queue is empty. | |
270 void translateFunctions(); | |
271 | |
272 // Utility function to match a symbol name against a match string. | |
273 // This is used in a few cases where we want to take some action on | |
274 // a particular function or symbol based on a command-line argument, | |
275 // such as changing the verbose level for a particular function. An | |
276 // empty Match argument means match everything. Returns true if | |
277 // there is a match. | |
278 static bool matchSymbolName(const IceString &SymbolName, | |
279 const IceString &Match) { | |
280 return Match.empty() || Match == SymbolName; | |
281 } | |
282 | |
232 private: | 283 private: |
233 // Try to make sure the mutexes are allocated on separate cache | 284 // Try to ensure mutexes are allocated on separate cache lines. |
234 // lines, assuming the maximum cache line size is 64. | 285 |
235 const static size_t MaxCacheLineSize = 64; | 286 // Managed by getAllocator() |
236 alignas(MaxCacheLineSize) GlobalLockType AllocLock; | 287 alignas(MaxCacheLineSize) GlobalLockType AllocLock; |
288 ArenaAllocator<> Allocator; | |
289 | |
290 // Managed by getConstantPool() | |
237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; | 291 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; |
292 std::unique_ptr<ConstantPool> ConstPool; | |
293 | |
294 // Managed by getErrorStatus() | |
295 alignas(MaxCacheLineSize) GlobalLockType ErrorStatusLock; | |
296 std::error_code ErrorStatus; | |
297 | |
298 // Managed by getStatsCumulative() | |
238 alignas(MaxCacheLineSize) GlobalLockType StatsLock; | 299 alignas(MaxCacheLineSize) GlobalLockType StatsLock; |
300 CodeStats StatsCumulative; | |
301 | |
302 // Managed by getTimers() | |
239 alignas(MaxCacheLineSize) GlobalLockType TimerLock; | 303 alignas(MaxCacheLineSize) GlobalLockType TimerLock; |
304 std::vector<TimerStack> Timers; | |
240 | 305 |
241 // StrLock is a global lock on the dump and emit output streams. | 306 // StrLock is a global lock on the dump and emit output streams. |
242 typedef std::mutex StrLockType; | 307 typedef std::mutex StrLockType; |
243 StrLockType StrLock; | 308 alignas(MaxCacheLineSize) StrLockType StrLock; |
244 | |
245 Ostream *StrDump; // Stream for dumping / diagnostics | 309 Ostream *StrDump; // Stream for dumping / diagnostics |
246 Ostream *StrEmit; // Stream for code emission | 310 Ostream *StrEmit; // Stream for code emission |
247 | 311 |
248 ArenaAllocator<> Allocator; | 312 alignas(MaxCacheLineSize) const VerboseMask VMask; |
249 VerboseMask VMask; | |
250 std::unique_ptr<ConstantPool> ConstPool; | |
251 Intrinsics IntrinsicsInfo; | 313 Intrinsics IntrinsicsInfo; |
252 const TargetArch Arch; | 314 const TargetArch Arch; |
253 const OptLevel Opt; | 315 const OptLevel Opt; |
254 const IceString TestPrefix; | 316 const IceString TestPrefix; |
255 const ClFlags &Flags; | 317 const ClFlags &Flags; |
256 RandomNumberGenerator RNG; | 318 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
257 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 319 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
258 CodeStats StatsCumulative; | 320 BoundedProducerConsumerQueue<Cfg> CfgQ; |
259 std::vector<TimerStack> Timers; | |
260 | 321 |
261 LockedPtr<ArenaAllocator<>> getAllocator() { | 322 LockedPtr<ArenaAllocator<>> getAllocator() { |
262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 323 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
263 } | 324 } |
264 LockedPtr<ConstantPool> getConstPool() { | 325 LockedPtr<ConstantPool> getConstPool() { |
265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 326 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
266 } | 327 } |
267 LockedPtr<CodeStats> getStatsCumulative() { | 328 LockedPtr<CodeStats> getStatsCumulative() { |
268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 329 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
269 } | 330 } |
270 LockedPtr<std::vector<TimerStack>> getTimers() { | 331 LockedPtr<std::vector<TimerStack>> getTimers() { |
271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 332 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); |
272 } | 333 } |
273 | 334 |
274 std::vector<ThreadContext *> AllThreadContexts; | 335 std::vector<ThreadContext *> AllThreadContexts; |
336 std::vector<std::thread> TranslationThreads; | |
275 // Each thread has its own TLS pointer which is also held in | 337 // Each thread has its own TLS pointer which is also held in |
276 // AllThreadContexts. | 338 // AllThreadContexts. |
277 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; | 339 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; |
278 | 340 |
279 // Private helpers for mangleName() | 341 // Private helpers for mangleName() |
280 typedef llvm::SmallVector<char, 32> ManglerVector; | 342 typedef llvm::SmallVector<char, 32> ManglerVector; |
281 void incrementSubstitutions(ManglerVector &OldName) const; | 343 void incrementSubstitutions(ManglerVector &OldName) const; |
282 }; | 344 }; |
283 | 345 |
284 // Helper class to push and pop a timer marker. The constructor | 346 // Helper class to push and pop a timer marker. The constructor |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
322 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 384 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
323 ~OstreamLocker() { Ctx->unlockStr(); } | 385 ~OstreamLocker() { Ctx->unlockStr(); } |
324 | 386 |
325 private: | 387 private: |
326 GlobalContext *const Ctx; | 388 GlobalContext *const Ctx; |
327 }; | 389 }; |
328 | 390 |
329 } // end of namespace Ice | 391 } // end of namespace Ice |
330 | 392 |
331 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 393 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |