Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(290)

Side by Side Diff: src/IceGlobalContext.h

Issue 870653002: Subzero: Initial implementation of multithreaded translation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Make CfgQueue::Sequential logic more clear. Move IsEnded field. Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file declares aspects of the compilation that persist across 10 // This file declares aspects of the compilation that persist across
11 // multiple functions. 11 // multiple functions.
12 // 12 //
13 //===----------------------------------------------------------------------===// 13 //===----------------------------------------------------------------------===//
14 14
15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H
17 17
18 #include <memory>
19 #include <mutex> 18 #include <mutex>
19 #include <queue>
20 #include <thread>
20 21
21 #include "IceDefs.h" 22 #include "IceDefs.h"
22 #include "IceClFlags.h" 23 #include "IceClFlags.h"
23 #include "IceIntrinsics.h" 24 #include "IceIntrinsics.h"
24 #include "IceRNG.h" 25 #include "IceRNG.h"
25 #include "IceTimerTree.h" 26 #include "IceTimerTree.h"
26 #include "IceTypes.h" 27 #include "IceTypes.h"
27 28
28 namespace Ice { 29 namespace Ice {
29 30
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 class ThreadContext { 90 class ThreadContext {
90 ThreadContext(const ThreadContext &) = delete; 91 ThreadContext(const ThreadContext &) = delete;
91 ThreadContext &operator=(const ThreadContext &) = delete; 92 ThreadContext &operator=(const ThreadContext &) = delete;
92 93
93 public: 94 public:
94 ThreadContext() {} 95 ThreadContext() {}
95 CodeStats StatsFunction; 96 CodeStats StatsFunction;
96 std::vector<TimerStack> Timers; 97 std::vector<TimerStack> Timers;
97 }; 98 };
98 99
100 // CfgQueue is the translation work queue. It allows multiple
101 // producers and multiple consumers (though currently only a single
102 // producer is used). The producer adds entries using add(), and
103 // may block if the queue is "full" to control Cfg memory footprint.
104 // The producer uses end() to indicate that no more entries will be
105 // added. The consumer removes an item using get(), which will
106 // return nullptr if end() has been called and the queue is empty.
107 //
108 // The MaxSize ctor arg controls the maximum size the queue can grow
109 // to. The Sequential arg indicates purely sequential execution in
110 // which the single thread should never wait().
111 //
112 // Two condition variables are used in the implementation.
113 // GrewOrEnded signals waiting workers that the producer has changed
114 // the state of the queue. Shrunk signals a blocked producer that a
115 // consumer has changed the state of the queue.
116 //
117 // The methods begin with Sequential-specific code to be most clear.
118 // The lock and condition variables are not used in the Sequential
119 // case.
120 class CfgQueue {
JF 2015/01/25 22:57:00 It's probably better to put this class in its own
Jim Stichnoth 2015/01/26 04:59:43 Done.
121 public:
122 CfgQueue(size_t MaxSize, bool Sequential)
123 : MaxSize(MaxSize), Sequential(Sequential), IsEnded(false) {}
JF 2015/01/25 22:57:00 WorkQueue.reserve(MaxSize)
Jim Stichnoth 2015/01/26 04:59:43 There's no std::queue::reserve() or std::deque::re
JF 2015/01/26 17:54:50 :( I'm still not a fan of using queue, the defaul
Jim Stichnoth 2015/01/27 00:56:18 Done.
124 void add(Cfg *Func) {
JF 2015/01/25 22:57:00 I'd rename to blocking_push or something similar,
Jim Stichnoth 2015/01/26 04:59:43 Done. add() --> blockingPush(), and get() --> blo
125 if (Sequential) {
126 WorkQueue.push(Func);
127 return;
128 }
129 std::unique_lock<GlobalLockType> L(Lock);
130 // If the work queue is already "full", wait for a consumer to
131 // grab an element and shrink the queue.
132 while (WorkQueue.size() >= MaxSize) {
133 Shrunk.wait(L);
134 }
JF 2015/01/25 22:57:00 This code: while (WorkQueue.size() >= MaxSize) {
Jim Stichnoth 2015/01/26 04:59:43 Wow. Done. (my first ever C++ lambda)
135 WorkQueue.push(Func);
136 L.unlock();
137 GrewOrEnded.notify_one();
138 }
139 Cfg *get() {
JF 2015/01/25 22:57:00 I'd rename to wait_and_pop or something similar.
Jim Stichnoth 2015/01/26 04:59:43 Done.
140 if (Sequential) {
141 Cfg *Func = nullptr;
142 if (!WorkQueue.empty()) {
143 Func = WorkQueue.front();
144 WorkQueue.pop();
145 }
146 return Func;
147 }
148 std::unique_lock<GlobalLockType> L(Lock);
149 while (!IsEnded || !WorkQueue.empty()) {
JF 2015/01/25 22:57:00 Similarly here, I'd go with: Cfg *wait_and_pop()
Jim Stichnoth 2015/01/26 04:59:43 Yeah, your rewrite could have the workers shut dow
JF 2015/01/26 17:54:50 I'm in for killing Sequential, it'll make the non-
Jim Stichnoth 2015/01/27 00:56:18 Done.
150 if (!WorkQueue.empty()) {
151 Cfg *Func = WorkQueue.front();
152 WorkQueue.pop();
153 L.unlock();
154 Shrunk.notify_one();
155 return Func;
156 }
157 GrewOrEnded.wait(L);
158 }
159 return nullptr;
160 }
161 void end() {
162 if (Sequential)
163 return;
164 std::unique_lock<GlobalLockType> L(Lock);
165 IsEnded = true;
166 L.unlock();
JF 2015/01/25 22:57:00 Could you just use a lock_guard and scoping here?
Jim Stichnoth 2015/01/26 04:59:43 I thought about this for all 3 methods, but since
JF 2015/01/26 18:10:40 As discussed offline, lock_guard is simpler than u
Jim Stichnoth 2015/01/27 00:56:18 Done.
167 GrewOrEnded.notify_all();
168 }
169
170 private:
JF 2015/01/25 22:57:00 CfGQueue() = delete; CfgQueue(const CfgQueue &) =
Jim Stichnoth 2015/01/26 04:59:43 <shamecube> Done.
171 // WorkQueue and Lock are read/written by all.
172 // TODO(stichnot): Since WorkQueue has an enforced maximum size,
173 // implement it on top of something like std::array to minimize
174 // contention.
175 alignas(MaxCacheLineSize) std::queue<Cfg *> WorkQueue;
176 // Lock guards access to WorkQueue and IsEnded.
177 alignas(MaxCacheLineSize) GlobalLockType Lock;
178
179 // GrewOrEnded is written by the producer and read by the
180 // consumers. It is notified (by the producer) when something is
181 // added to the queue, in case consumers are waiting for a
182 // non-empty queue.
183 alignas(MaxCacheLineSize) std::condition_variable GrewOrEnded;
184
185 // Shrunk is notified (by the consumer) when something is removed
186 // from the queue, in case the producer is waiting for the queue
187 // to drop below maximum capacity. It is written by the consumers
188 // and read by the producer.
189 alignas(MaxCacheLineSize) std::condition_variable Shrunk;
190
191 // MaxSize and Sequential are read by all and written by none.
192 alignas(MaxCacheLineSize) const size_t MaxSize;
193 const bool Sequential;
194 // IsEnded is read by the consumers, and only written once by the
195 // producer.
196 bool IsEnded;
197 };
198
99 public: 199 public:
100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, 200 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer,
101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, 201 VerboseMask Mask, TargetArch Arch, OptLevel Opt,
102 IceString TestPrefix, const ClFlags &Flags); 202 IceString TestPrefix, const ClFlags &Flags);
103 ~GlobalContext(); 203 ~GlobalContext();
104 204
105 // Returns true if any of the specified options in the verbose mask
106 // are set. If the argument is omitted, it checks if any verbose
107 // options at all are set.
108 VerboseMask getVerbose() const { return VMask; } 205 VerboseMask getVerbose() const { return VMask; }
109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
110 void setVerbose(VerboseMask Mask) { VMask = Mask; }
111 void addVerbose(VerboseMask Mask) { VMask |= Mask; }
112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
113 206
114 // The dump and emit streams need to be used by only one thread at a 207 // The dump and emit streams need to be used by only one thread at a
115 // time. This is done by exclusively reserving the streams via 208 // time. This is done by exclusively reserving the streams via
116 // lockStr() and unlockStr(). The OstreamLocker class can be used 209 // lockStr() and unlockStr(). The OstreamLocker class can be used
117 // to conveniently manage this. 210 // to conveniently manage this.
118 // 211 //
119 // The model is that a thread grabs the stream lock, then does an 212 // The model is that a thread grabs the stream lock, then does an
120 // arbitrary amount of work during which far-away callees may grab 213 // arbitrary amount of work during which far-away callees may grab
121 // the stream and do something with it, and finally the thread 214 // the stream and do something with it, and finally the thread
122 // releases the stream lock. This allows large chunks of output to 215 // releases the stream lock. This allows large chunks of output to
123 // be dumped or emitted without risking interleaving from multiple 216 // be dumped or emitted without risking interleaving from multiple
124 // threads. 217 // threads.
125 void lockStr() { StrLock.lock(); } 218 void lockStr() { StrLock.lock(); }
126 void unlockStr() { StrLock.unlock(); } 219 void unlockStr() { StrLock.unlock(); }
127 Ostream &getStrDump() { return *StrDump; } 220 Ostream &getStrDump() { return *StrDump; }
128 Ostream &getStrEmit() { return *StrEmit; } 221 Ostream &getStrEmit() { return *StrEmit; }
129 222
130 TargetArch getTargetArch() const { return Arch; } 223 TargetArch getTargetArch() const { return Arch; }
131 OptLevel getOptLevel() const { return Opt; } 224 OptLevel getOptLevel() const { return Opt; }
225 LockedPtr<std::error_code> getErrorStatus() {
226 return LockedPtr<std::error_code>(&ErrorStatus, &ErrorStatusLock);
227 }
132 228
133 // When emitting assembly, we allow a string to be prepended to 229 // When emitting assembly, we allow a string to be prepended to
134 // names of translated functions. This makes it easier to create an 230 // names of translated functions. This makes it easier to create an
135 // execution test against a reference translator like llc, with both 231 // execution test against a reference translator like llc, with both
136 // translators using the same bitcode as input. 232 // translators using the same bitcode as input.
137 IceString getTestPrefix() const { return TestPrefix; } 233 IceString getTestPrefix() const { return TestPrefix; }
138 IceString mangleName(const IceString &Name) const; 234 IceString mangleName(const IceString &Name) const;
139 235
140 // Manage Constants. 236 // Manage Constants.
141 // getConstant*() functions are not const because they might add 237 // getConstant*() functions are not const because they might add
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 318
223 TimerStackIdT newTimerStackID(const IceString &Name); 319 TimerStackIdT newTimerStackID(const IceString &Name);
224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); 320 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); 321 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); 322 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
227 void resetTimer(TimerStackIdT StackID); 323 void resetTimer(TimerStackIdT StackID);
228 void setTimerName(TimerStackIdT StackID, const IceString &NewName); 324 void setTimerName(TimerStackIdT StackID, const IceString &NewName);
229 void dumpTimers(TimerStackIdT StackID = TSK_Default, 325 void dumpTimers(TimerStackIdT StackID = TSK_Default,
230 bool DumpCumulative = true); 326 bool DumpCumulative = true);
231 327
328 // Adds a newly parsed and constructed function to the Cfg work
329 // queue. Notifies any idle workers that a new function is
330 // available for translating. May block if the work queue is too
331 // large, in order to control memory footprint.
332 void cfgQueueAdd(Cfg *Func) { CfgQ.add(Func); }
333 // Takes a Cfg from the work queue for translating. May block if
334 // the work queue is currently empty. Returns nullptr if there is
335 // no more work - the queue is empty and either end() has been
336 // called or the Sequential flag was set.
337 Cfg *cfgQueueGet() { return CfgQ.get(); }
338 // Notifies that no more work will be added to the work queue.
339 void cfgQueueEnd() { CfgQ.end(); }
340
341 void startWorkerThreads() {
342 size_t NumWorkers = getFlags().NumTranslationThreads;
343 for (size_t i = 0; i < NumWorkers; ++i) {
344 ThreadContext *WorkerTLS = new ThreadContext();
345 AllThreadContexts.push_back(WorkerTLS);
346 TranslationThreads.push_back(std::thread(
347 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
348 }
349 if (NumWorkers) {
350 // TODO(stichnot): start a new thread for the emitter queue worker.
351 }
352 }
353
354 void waitForWorkerThreads() {
355 cfgQueueEnd();
356 // TODO(stichnot): call end() on the emitter work queue.
357 for (std::thread &Worker : TranslationThreads) {
358 Worker.join();
359 }
360 TranslationThreads.clear();
361 // TODO(stichnot): join the emitter thread.
362 }
363
364 // Translation thread startup routine.
365 void translateFunctionsWrapper(ThreadContext *MyTLS) {
366 TLS = MyTLS;
367 translateFunctions();
368 }
369 // Translate functions from the Cfg queue until the queue is empty.
370 void translateFunctions();
371
372 // Utility function to match a symbol name against a match string.
373 // This is used in a few cases where we want to take some action on
374 // a particular function or symbol based on a command-line argument,
375 // such as changing the verbose level for a particular function. An
376 // empty Match argument means match everything. Returns true if
377 // there is a match.
378 static bool matchSymbolName(const IceString &SymbolName,
379 const IceString &Match) {
380 return Match.empty() || Match == SymbolName;
381 }
382
232 private: 383 private:
233 // Try to make sure the mutexes are allocated on separate cache 384 // Try to ensure mutexes are allocated on separate cache lines.
234 // lines, assuming the maximum cache line size is 64. 385
235 const static size_t MaxCacheLineSize = 64; 386 // Managed by getAllocator()
236 alignas(MaxCacheLineSize) GlobalLockType AllocLock; 387 alignas(MaxCacheLineSize) GlobalLockType AllocLock;
388 ArenaAllocator<> Allocator;
389
390 // Managed by getConstantPool()
237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; 391 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock;
392 std::unique_ptr<ConstantPool> ConstPool;
393
394 // Managed by getErrorStatus()
395 alignas(MaxCacheLineSize) GlobalLockType ErrorStatusLock;
396 std::error_code ErrorStatus;
397
398 // Managed by getStatsCumulative()
238 alignas(MaxCacheLineSize) GlobalLockType StatsLock; 399 alignas(MaxCacheLineSize) GlobalLockType StatsLock;
400 CodeStats StatsCumulative;
401
402 // Managed by getTimers()
239 alignas(MaxCacheLineSize) GlobalLockType TimerLock; 403 alignas(MaxCacheLineSize) GlobalLockType TimerLock;
404 std::vector<TimerStack> Timers;
240 405
241 // StrLock is a global lock on the dump and emit output streams. 406 // StrLock is a global lock on the dump and emit output streams.
242 typedef std::mutex StrLockType; 407 typedef std::mutex StrLockType;
243 StrLockType StrLock; 408 alignas(MaxCacheLineSize) StrLockType StrLock;
244
245 Ostream *StrDump; // Stream for dumping / diagnostics 409 Ostream *StrDump; // Stream for dumping / diagnostics
246 Ostream *StrEmit; // Stream for code emission 410 Ostream *StrEmit; // Stream for code emission
247 411
248 ArenaAllocator<> Allocator; 412 const VerboseMask VMask;
JF 2015/01/25 22:57:00 alignas after the streams.
Jim Stichnoth 2015/01/26 04:59:44 Done. (wish it was easier to add these boundaries
JF 2015/01/26 17:54:50 Your wish is my command: #define ICE_CACHELINE_BO
JF 2015/01/26 19:11:48 A standard-compliant solution pointed out by Richa
Jim Stichnoth 2015/01/27 00:56:18 Done.
Jim Stichnoth 2015/01/27 00:56:18 Cool, thanks!
249 VerboseMask VMask;
250 std::unique_ptr<ConstantPool> ConstPool;
251 Intrinsics IntrinsicsInfo; 413 Intrinsics IntrinsicsInfo;
252 const TargetArch Arch; 414 const TargetArch Arch;
253 const OptLevel Opt; 415 const OptLevel Opt;
254 const IceString TestPrefix; 416 const IceString TestPrefix;
255 const ClFlags &Flags; 417 const ClFlags &Flags;
256 RandomNumberGenerator RNG; 418 RandomNumberGenerator RNG;
JF 2015/01/25 22:57:00 Add a TODO to move the out of this class.
Jim Stichnoth 2015/01/26 04:59:43 Done.
257 std::unique_ptr<ELFObjectWriter> ObjectWriter; 419 std::unique_ptr<ELFObjectWriter> ObjectWriter;
258 CodeStats StatsCumulative; 420 CfgQueue CfgQ;
259 std::vector<TimerStack> Timers;
260 421
261 LockedPtr<ArenaAllocator<>> getAllocator() { 422 LockedPtr<ArenaAllocator<>> getAllocator() {
262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); 423 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
263 } 424 }
264 LockedPtr<ConstantPool> getConstPool() { 425 LockedPtr<ConstantPool> getConstPool() {
265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); 426 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);
266 } 427 }
267 LockedPtr<CodeStats> getStatsCumulative() { 428 LockedPtr<CodeStats> getStatsCumulative() {
268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); 429 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
269 } 430 }
270 LockedPtr<std::vector<TimerStack>> getTimers() { 431 LockedPtr<std::vector<TimerStack>> getTimers() {
271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); 432 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);
272 } 433 }
273 434
274 std::vector<ThreadContext *> AllThreadContexts; 435 std::vector<ThreadContext *> AllThreadContexts;
436 std::vector<std::thread> TranslationThreads;
275 // Each thread has its own TLS pointer which is also held in 437 // Each thread has its own TLS pointer which is also held in
276 // AllThreadContexts. 438 // AllThreadContexts.
277 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; 439 ICE_ATTRIBUTE_TLS static ThreadContext *TLS;
278 440
279 // Private helpers for mangleName() 441 // Private helpers for mangleName()
280 typedef llvm::SmallVector<char, 32> ManglerVector; 442 typedef llvm::SmallVector<char, 32> ManglerVector;
281 void incrementSubstitutions(ManglerVector &OldName) const; 443 void incrementSubstitutions(ManglerVector &OldName) const;
282 }; 444 };
283 445
284 // Helper class to push and pop a timer marker. The constructor 446 // Helper class to push and pop a timer marker. The constructor
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
322 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } 484 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }
323 ~OstreamLocker() { Ctx->unlockStr(); } 485 ~OstreamLocker() { Ctx->unlockStr(); }
324 486
325 private: 487 private:
326 GlobalContext *const Ctx; 488 GlobalContext *const Ctx;
327 }; 489 };
328 490
329 } // end of namespace Ice 491 } // end of namespace Ice
330 492
331 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H 493 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
OLDNEW
« no previous file with comments | « src/IceDefs.h ('k') | src/IceGlobalContext.cpp » ('j') | src/IceGlobalContext.cpp » ('J')

Powered by Google App Engine
This is Rietveld 408576698