OLD | NEW |
---|---|
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
11 // multiple functions. | 11 // multiple functions. |
12 // | 12 // |
13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
14 | 14 |
15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H | 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H |
16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H | 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H |
17 | 17 |
18 #include <memory> | |
19 #include <mutex> | 18 #include <mutex> |
19 #include <queue> | |
20 #include <thread> | |
20 | 21 |
21 #include "IceDefs.h" | 22 #include "IceDefs.h" |
22 #include "IceClFlags.h" | 23 #include "IceClFlags.h" |
23 #include "IceIntrinsics.h" | 24 #include "IceIntrinsics.h" |
24 #include "IceRNG.h" | 25 #include "IceRNG.h" |
25 #include "IceTimerTree.h" | 26 #include "IceTimerTree.h" |
26 #include "IceTypes.h" | 27 #include "IceTypes.h" |
27 | 28 |
28 namespace Ice { | 29 namespace Ice { |
29 | 30 |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
89 class ThreadContext { | 90 class ThreadContext { |
90 ThreadContext(const ThreadContext &) = delete; | 91 ThreadContext(const ThreadContext &) = delete; |
91 ThreadContext &operator=(const ThreadContext &) = delete; | 92 ThreadContext &operator=(const ThreadContext &) = delete; |
92 | 93 |
93 public: | 94 public: |
94 ThreadContext() {} | 95 ThreadContext() {} |
95 CodeStats StatsFunction; | 96 CodeStats StatsFunction; |
96 std::vector<TimerStack> Timers; | 97 std::vector<TimerStack> Timers; |
97 }; | 98 }; |
98 | 99 |
100 // CfgQueue is the translation work queue. It allows multiple | |
101 // producers and multiple consumers (though currently only a single | |
102 // producer is used). The producer adds entries using add(), and | |
103 // may block if the queue is "full" to control Cfg memory footprint. | |
104 // The producer uses end() to indicate that no more entries will be | |
105 // added. The consumer removes an item using get(), which will | |
106 // return nullptr if end() has been called and the queue is empty. | |
107 // | |
108 // The MaxSize ctor arg controls the maximum size the queue can grow | |
109 // to. The Sequential arg indicates purely sequential execution in | |
110 // which the single thread should never wait(). | |
111 // | |
112 // Two condition variables are used in the implementation. | |
113 // GrewOrEnded signals waiting workers that the producer has changed | |
114 // the state of the queue. Shrunk signals a blocked producer that a | |
115 // consumer has changed the state of the queue. | |
116 // | |
117 // The methods begin with Sequential-specific code to be most clear. | |
118 // The lock and condition variables are not used in the Sequential | |
119 // case. | |
120 class CfgQueue { | |
JF
2015/01/25 22:57:00
It's probably better to put this class in its own
Jim Stichnoth
2015/01/26 04:59:43
Done.
| |
121 public: | |
122 CfgQueue(size_t MaxSize, bool Sequential) | |
123 : MaxSize(MaxSize), Sequential(Sequential), IsEnded(false) {} | |
JF
2015/01/25 22:57:00
WorkQueue.reserve(MaxSize)
Jim Stichnoth
2015/01/26 04:59:43
There's no std::queue::reserve() or std::deque::re
JF
2015/01/26 17:54:50
:(
I'm still not a fan of using queue, the defaul
Jim Stichnoth
2015/01/27 00:56:18
Done.
| |
124 void add(Cfg *Func) { | |
JF
2015/01/25 22:57:00
I'd rename to blocking_push or something similar,
Jim Stichnoth
2015/01/26 04:59:43
Done. add() --> blockingPush(), and get() --> blo
| |
125 if (Sequential) { | |
126 WorkQueue.push(Func); | |
127 return; | |
128 } | |
129 std::unique_lock<GlobalLockType> L(Lock); | |
130 // If the work queue is already "full", wait for a consumer to | |
131 // grab an element and shrink the queue. | |
132 while (WorkQueue.size() >= MaxSize) { | |
133 Shrunk.wait(L); | |
134 } | |
JF
2015/01/25 22:57:00
This code:
while (WorkQueue.size() >= MaxSize) {
Jim Stichnoth
2015/01/26 04:59:43
Wow. Done.
(my first ever C++ lambda)
| |
135 WorkQueue.push(Func); | |
136 L.unlock(); | |
137 GrewOrEnded.notify_one(); | |
138 } | |
139 Cfg *get() { | |
JF
2015/01/25 22:57:00
I'd rename to wait_and_pop or something similar.
Jim Stichnoth
2015/01/26 04:59:43
Done.
| |
140 if (Sequential) { | |
141 Cfg *Func = nullptr; | |
142 if (!WorkQueue.empty()) { | |
143 Func = WorkQueue.front(); | |
144 WorkQueue.pop(); | |
145 } | |
146 return Func; | |
147 } | |
148 std::unique_lock<GlobalLockType> L(Lock); | |
149 while (!IsEnded || !WorkQueue.empty()) { | |
JF
2015/01/25 22:57:00
Similarly here, I'd go with:
Cfg *wait_and_pop()
Jim Stichnoth
2015/01/26 04:59:43
Yeah, your rewrite could have the workers shut dow
JF
2015/01/26 17:54:50
I'm in for killing Sequential, it'll make the non-
Jim Stichnoth
2015/01/27 00:56:18
Done.
| |
150 if (!WorkQueue.empty()) { | |
151 Cfg *Func = WorkQueue.front(); | |
152 WorkQueue.pop(); | |
153 L.unlock(); | |
154 Shrunk.notify_one(); | |
155 return Func; | |
156 } | |
157 GrewOrEnded.wait(L); | |
158 } | |
159 return nullptr; | |
160 } | |
161 void end() { | |
162 if (Sequential) | |
163 return; | |
164 std::unique_lock<GlobalLockType> L(Lock); | |
165 IsEnded = true; | |
166 L.unlock(); | |
JF
2015/01/25 22:57:00
Could you just use a lock_guard and scoping here?
Jim Stichnoth
2015/01/26 04:59:43
I thought about this for all 3 methods, but since
JF
2015/01/26 18:10:40
As discussed offline, lock_guard is simpler than u
Jim Stichnoth
2015/01/27 00:56:18
Done.
| |
167 GrewOrEnded.notify_all(); | |
168 } | |
169 | |
170 private: | |
JF
2015/01/25 22:57:00
CfGQueue() = delete;
CfgQueue(const CfgQueue &) =
Jim Stichnoth
2015/01/26 04:59:43
<shamecube> Done.
| |
171 // WorkQueue and Lock are read/written by all. | |
172 // TODO(stichnot): Since WorkQueue has an enforced maximum size, | |
173 // implement it on top of something like std::array to minimize | |
174 // contention. | |
175 alignas(MaxCacheLineSize) std::queue<Cfg *> WorkQueue; | |
176 // Lock guards access to WorkQueue and IsEnded. | |
177 alignas(MaxCacheLineSize) GlobalLockType Lock; | |
178 | |
179 // GrewOrEnded is written by the producer and read by the | |
180 // consumers. It is notified (by the producer) when something is | |
181 // added to the queue, in case consumers are waiting for a | |
182 // non-empty queue. | |
183 alignas(MaxCacheLineSize) std::condition_variable GrewOrEnded; | |
184 | |
185 // Shrunk is notified (by the consumer) when something is removed | |
186 // from the queue, in case the producer is waiting for the queue | |
187 // to drop below maximum capacity. It is written by the consumers | |
188 // and read by the producer. | |
189 alignas(MaxCacheLineSize) std::condition_variable Shrunk; | |
190 | |
191 // MaxSize and Sequential are read by all and written by none. | |
192 alignas(MaxCacheLineSize) const size_t MaxSize; | |
193 const bool Sequential; | |
194 // IsEnded is read by the consumers, and only written once by the | |
195 // producer. | |
196 bool IsEnded; | |
197 }; | |
198 | |
99 public: | 199 public: |
100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 200 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 201 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
102 IceString TestPrefix, const ClFlags &Flags); | 202 IceString TestPrefix, const ClFlags &Flags); |
103 ~GlobalContext(); | 203 ~GlobalContext(); |
104 | 204 |
105 // Returns true if any of the specified options in the verbose mask | |
106 // are set. If the argument is omitted, it checks if any verbose | |
107 // options at all are set. | |
108 VerboseMask getVerbose() const { return VMask; } | 205 VerboseMask getVerbose() const { return VMask; } |
109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } | |
110 void setVerbose(VerboseMask Mask) { VMask = Mask; } | |
111 void addVerbose(VerboseMask Mask) { VMask |= Mask; } | |
112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } | |
113 | 206 |
114 // The dump and emit streams need to be used by only one thread at a | 207 // The dump and emit streams need to be used by only one thread at a |
115 // time. This is done by exclusively reserving the streams via | 208 // time. This is done by exclusively reserving the streams via |
116 // lockStr() and unlockStr(). The OstreamLocker class can be used | 209 // lockStr() and unlockStr(). The OstreamLocker class can be used |
117 // to conveniently manage this. | 210 // to conveniently manage this. |
118 // | 211 // |
119 // The model is that a thread grabs the stream lock, then does an | 212 // The model is that a thread grabs the stream lock, then does an |
120 // arbitrary amount of work during which far-away callees may grab | 213 // arbitrary amount of work during which far-away callees may grab |
121 // the stream and do something with it, and finally the thread | 214 // the stream and do something with it, and finally the thread |
122 // releases the stream lock. This allows large chunks of output to | 215 // releases the stream lock. This allows large chunks of output to |
123 // be dumped or emitted without risking interleaving from multiple | 216 // be dumped or emitted without risking interleaving from multiple |
124 // threads. | 217 // threads. |
125 void lockStr() { StrLock.lock(); } | 218 void lockStr() { StrLock.lock(); } |
126 void unlockStr() { StrLock.unlock(); } | 219 void unlockStr() { StrLock.unlock(); } |
127 Ostream &getStrDump() { return *StrDump; } | 220 Ostream &getStrDump() { return *StrDump; } |
128 Ostream &getStrEmit() { return *StrEmit; } | 221 Ostream &getStrEmit() { return *StrEmit; } |
129 | 222 |
130 TargetArch getTargetArch() const { return Arch; } | 223 TargetArch getTargetArch() const { return Arch; } |
131 OptLevel getOptLevel() const { return Opt; } | 224 OptLevel getOptLevel() const { return Opt; } |
225 LockedPtr<std::error_code> getErrorStatus() { | |
226 return LockedPtr<std::error_code>(&ErrorStatus, &ErrorStatusLock); | |
227 } | |
132 | 228 |
133 // When emitting assembly, we allow a string to be prepended to | 229 // When emitting assembly, we allow a string to be prepended to |
134 // names of translated functions. This makes it easier to create an | 230 // names of translated functions. This makes it easier to create an |
135 // execution test against a reference translator like llc, with both | 231 // execution test against a reference translator like llc, with both |
136 // translators using the same bitcode as input. | 232 // translators using the same bitcode as input. |
137 IceString getTestPrefix() const { return TestPrefix; } | 233 IceString getTestPrefix() const { return TestPrefix; } |
138 IceString mangleName(const IceString &Name) const; | 234 IceString mangleName(const IceString &Name) const; |
139 | 235 |
140 // Manage Constants. | 236 // Manage Constants. |
141 // getConstant*() functions are not const because they might add | 237 // getConstant*() functions are not const because they might add |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
222 | 318 |
223 TimerStackIdT newTimerStackID(const IceString &Name); | 319 TimerStackIdT newTimerStackID(const IceString &Name); |
224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 320 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 321 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 322 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
227 void resetTimer(TimerStackIdT StackID); | 323 void resetTimer(TimerStackIdT StackID); |
228 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 324 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
229 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 325 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
230 bool DumpCumulative = true); | 326 bool DumpCumulative = true); |
231 | 327 |
328 // Adds a newly parsed and constructed function to the Cfg work | |
329 // queue. Notifies any idle workers that a new function is | |
330 // available for translating. May block if the work queue is too | |
331 // large, in order to control memory footprint. | |
332 void cfgQueueAdd(Cfg *Func) { CfgQ.add(Func); } | |
333 // Takes a Cfg from the work queue for translating. May block if | |
334 // the work queue is currently empty. Returns nullptr if there is | |
335 // no more work - the queue is empty and either end() has been | |
336 // called or the Sequential flag was set. | |
337 Cfg *cfgQueueGet() { return CfgQ.get(); } | |
338 // Notifies that no more work will be added to the work queue. | |
339 void cfgQueueEnd() { CfgQ.end(); } | |
340 | |
341 void startWorkerThreads() { | |
342 size_t NumWorkers = getFlags().NumTranslationThreads; | |
343 for (size_t i = 0; i < NumWorkers; ++i) { | |
344 ThreadContext *WorkerTLS = new ThreadContext(); | |
345 AllThreadContexts.push_back(WorkerTLS); | |
346 TranslationThreads.push_back(std::thread( | |
347 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | |
348 } | |
349 if (NumWorkers) { | |
350 // TODO(stichnot): start a new thread for the emitter queue worker. | |
351 } | |
352 } | |
353 | |
354 void waitForWorkerThreads() { | |
355 cfgQueueEnd(); | |
356 // TODO(stichnot): call end() on the emitter work queue. | |
357 for (std::thread &Worker : TranslationThreads) { | |
358 Worker.join(); | |
359 } | |
360 TranslationThreads.clear(); | |
361 // TODO(stichnot): join the emitter thread. | |
362 } | |
363 | |
364 // Translation thread startup routine. | |
365 void translateFunctionsWrapper(ThreadContext *MyTLS) { | |
366 TLS = MyTLS; | |
367 translateFunctions(); | |
368 } | |
369 // Translate functions from the Cfg queue until the queue is empty. | |
370 void translateFunctions(); | |
371 | |
372 // Utility function to match a symbol name against a match string. | |
373 // This is used in a few cases where we want to take some action on | |
374 // a particular function or symbol based on a command-line argument, | |
375 // such as changing the verbose level for a particular function. An | |
376 // empty Match argument means match everything. Returns true if | |
377 // there is a match. | |
378 static bool matchSymbolName(const IceString &SymbolName, | |
379 const IceString &Match) { | |
380 return Match.empty() || Match == SymbolName; | |
381 } | |
382 | |
232 private: | 383 private: |
233 // Try to make sure the mutexes are allocated on separate cache | 384 // Try to ensure mutexes are allocated on separate cache lines. |
234 // lines, assuming the maximum cache line size is 64. | 385 |
235 const static size_t MaxCacheLineSize = 64; | 386 // Managed by getAllocator() |
236 alignas(MaxCacheLineSize) GlobalLockType AllocLock; | 387 alignas(MaxCacheLineSize) GlobalLockType AllocLock; |
388 ArenaAllocator<> Allocator; | |
389 | |
390 // Managed by getConstantPool() | |
237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; | 391 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; |
392 std::unique_ptr<ConstantPool> ConstPool; | |
393 | |
394 // Managed by getErrorStatus() | |
395 alignas(MaxCacheLineSize) GlobalLockType ErrorStatusLock; | |
396 std::error_code ErrorStatus; | |
397 | |
398 // Managed by getStatsCumulative() | |
238 alignas(MaxCacheLineSize) GlobalLockType StatsLock; | 399 alignas(MaxCacheLineSize) GlobalLockType StatsLock; |
400 CodeStats StatsCumulative; | |
401 | |
402 // Managed by getTimers() | |
239 alignas(MaxCacheLineSize) GlobalLockType TimerLock; | 403 alignas(MaxCacheLineSize) GlobalLockType TimerLock; |
404 std::vector<TimerStack> Timers; | |
240 | 405 |
241 // StrLock is a global lock on the dump and emit output streams. | 406 // StrLock is a global lock on the dump and emit output streams. |
242 typedef std::mutex StrLockType; | 407 typedef std::mutex StrLockType; |
243 StrLockType StrLock; | 408 alignas(MaxCacheLineSize) StrLockType StrLock; |
244 | |
245 Ostream *StrDump; // Stream for dumping / diagnostics | 409 Ostream *StrDump; // Stream for dumping / diagnostics |
246 Ostream *StrEmit; // Stream for code emission | 410 Ostream *StrEmit; // Stream for code emission |
247 | 411 |
248 ArenaAllocator<> Allocator; | 412 const VerboseMask VMask; |
JF
2015/01/25 22:57:00
alignas after the streams.
Jim Stichnoth
2015/01/26 04:59:44
Done. (wish it was easier to add these boundaries
JF
2015/01/26 17:54:50
Your wish is my command:
#define ICE_CACHELINE_BO
JF
2015/01/26 19:11:48
A standard-compliant solution pointed out by Richa
Jim Stichnoth
2015/01/27 00:56:18
Done.
Jim Stichnoth
2015/01/27 00:56:18
Cool, thanks!
| |
249 VerboseMask VMask; | |
250 std::unique_ptr<ConstantPool> ConstPool; | |
251 Intrinsics IntrinsicsInfo; | 413 Intrinsics IntrinsicsInfo; |
252 const TargetArch Arch; | 414 const TargetArch Arch; |
253 const OptLevel Opt; | 415 const OptLevel Opt; |
254 const IceString TestPrefix; | 416 const IceString TestPrefix; |
255 const ClFlags &Flags; | 417 const ClFlags &Flags; |
256 RandomNumberGenerator RNG; | 418 RandomNumberGenerator RNG; |
JF
2015/01/25 22:57:00
Add a TODO to move the out of this class.
Jim Stichnoth
2015/01/26 04:59:43
Done.
| |
257 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 419 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
258 CodeStats StatsCumulative; | 420 CfgQueue CfgQ; |
259 std::vector<TimerStack> Timers; | |
260 | 421 |
261 LockedPtr<ArenaAllocator<>> getAllocator() { | 422 LockedPtr<ArenaAllocator<>> getAllocator() { |
262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 423 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
263 } | 424 } |
264 LockedPtr<ConstantPool> getConstPool() { | 425 LockedPtr<ConstantPool> getConstPool() { |
265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 426 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
266 } | 427 } |
267 LockedPtr<CodeStats> getStatsCumulative() { | 428 LockedPtr<CodeStats> getStatsCumulative() { |
268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 429 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
269 } | 430 } |
270 LockedPtr<std::vector<TimerStack>> getTimers() { | 431 LockedPtr<std::vector<TimerStack>> getTimers() { |
271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 432 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); |
272 } | 433 } |
273 | 434 |
274 std::vector<ThreadContext *> AllThreadContexts; | 435 std::vector<ThreadContext *> AllThreadContexts; |
436 std::vector<std::thread> TranslationThreads; | |
275 // Each thread has its own TLS pointer which is also held in | 437 // Each thread has its own TLS pointer which is also held in |
276 // AllThreadContexts. | 438 // AllThreadContexts. |
277 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; | 439 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; |
278 | 440 |
279 // Private helpers for mangleName() | 441 // Private helpers for mangleName() |
280 typedef llvm::SmallVector<char, 32> ManglerVector; | 442 typedef llvm::SmallVector<char, 32> ManglerVector; |
281 void incrementSubstitutions(ManglerVector &OldName) const; | 443 void incrementSubstitutions(ManglerVector &OldName) const; |
282 }; | 444 }; |
283 | 445 |
284 // Helper class to push and pop a timer marker. The constructor | 446 // Helper class to push and pop a timer marker. The constructor |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
322 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 484 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
323 ~OstreamLocker() { Ctx->unlockStr(); } | 485 ~OstreamLocker() { Ctx->unlockStr(); } |
324 | 486 |
325 private: | 487 private: |
326 GlobalContext *const Ctx; | 488 GlobalContext *const Ctx; |
327 }; | 489 }; |
328 | 490 |
329 } // end of namespace Ice | 491 } // end of namespace Ice |
330 | 492 |
331 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 493 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |