Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(240)

Side by Side Diff: src/IceGlobalContext.h

Issue 870653002: Subzero: Initial implementation of multithreaded translation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Cleanup Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file declares aspects of the compilation that persist across 10 // This file declares aspects of the compilation that persist across
11 // multiple functions. 11 // multiple functions.
12 // 12 //
13 //===----------------------------------------------------------------------===// 13 //===----------------------------------------------------------------------===//
14 14
15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H
17 17
18 #include <memory>
19 #include <mutex> 18 #include <mutex>
19 #include <queue>
20 #include <thread>
20 21
21 #include "IceDefs.h" 22 #include "IceDefs.h"
22 #include "IceClFlags.h" 23 #include "IceClFlags.h"
23 #include "IceIntrinsics.h" 24 #include "IceIntrinsics.h"
24 #include "IceRNG.h" 25 #include "IceRNG.h"
25 #include "IceTimerTree.h" 26 #include "IceTimerTree.h"
26 #include "IceTypes.h" 27 #include "IceTypes.h"
27 28
28 namespace Ice { 29 namespace Ice {
29 30
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
89 class ThreadContext { 90 class ThreadContext {
90 ThreadContext(const ThreadContext &) = delete; 91 ThreadContext(const ThreadContext &) = delete;
91 ThreadContext &operator=(const ThreadContext &) = delete; 92 ThreadContext &operator=(const ThreadContext &) = delete;
92 93
93 public: 94 public:
94 ThreadContext() {} 95 ThreadContext() {}
95 CodeStats StatsFunction; 96 CodeStats StatsFunction;
96 std::vector<TimerStack> Timers; 97 std::vector<TimerStack> Timers;
97 }; 98 };
98 99
100 class CfgQueue {
101 public:
102 explicit CfgQueue(uint32_t NumWorkers)
103 : NumWorkers(NumWorkers), IsEnded(false) {}
104 void add(Cfg *Func) {
105 std::unique_lock<GlobalLockType> L(Lock);
JF 2015/01/22 20:50:56 Can you add a comment on CfgQueue explaining the l
jvoung (off chromium) 2015/01/22 23:06:06 Yeah, for pnacl-llc, it dequeues N functions at a
Jim Stichnoth 2015/01/23 07:55:54 Done.
Jim Stichnoth 2015/01/23 07:55:55 Even for small functions, my sense is that it take
jvoung (off chromium) 2015/01/23 17:49:09 Sorry I don't have the numbers anymore, but it can
JF 2015/01/23 17:51:02 Code review was: https://codereview.chromium.org
106 // If the work queue is already "full", wait for a consumer to
107 // grab an element and shrink the queue.
108 while (WorkQueue.size() > NumWorkers) {
109 Shrunk.wait(L);
110 }
111 WorkQueue.push(Func);
112 L.unlock();
113 GrewOrEnded.notify_one();
114 }
115 Cfg *get() {
116 std::unique_lock<GlobalLockType> L(Lock);
117 while (!IsEnded || !WorkQueue.empty()) {
118 if (!WorkQueue.empty()) {
119 Cfg *Func = WorkQueue.front();
120 WorkQueue.pop();
121 L.unlock();
122 Shrunk.notify_one();
123 return Func;
124 }
125 // If the work queue is empty, and this is pure sequential
126 // execution, then return nullptr.
127 if (NumWorkers == 0)
128 return nullptr;
129 GrewOrEnded.wait(L);
130 }
131 return nullptr;
132 }
133 void end() {
134 std::unique_lock<GlobalLockType> L(Lock);
135 IsEnded = true;
136 L.unlock();
137 GrewOrEnded.notify_all();
138 }
139
140 private:
141 std::queue<Cfg *> WorkQueue;
JF 2015/01/22 20:50:56 This should probably be an std::array if the size
Jim Stichnoth 2015/01/23 07:55:55 There is just one add() and one get() per function
142 // Lock guards access to WorkQueue and IsEnded.
143 GlobalLockType Lock;
144 // GrewOrEnded is notified (by the producer) when something is
145 // added to the queue, in case consumers are waiting for a
146 // non-empty queue.
147 std::condition_variable GrewOrEnded;
148 // Shrunk is notified (by the consumer) when something is removed
149 // from the queue, in case the producer is waiting for the queue
150 // to drop below maximum capacity.
151 std::condition_variable Shrunk;
152 const uint32_t NumWorkers;
JF 2015/01/22 20:50:56 I'd make this a size_t.
Jim Stichnoth 2015/01/23 07:55:55 Done, here and in IceClFlags.h and llvm2ice.cpp.
153 bool IsEnded;
154 };
155
99 public: 156 public:
100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, 157 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer,
101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, 158 VerboseMask Mask, TargetArch Arch, OptLevel Opt,
102 IceString TestPrefix, const ClFlags &Flags); 159 IceString TestPrefix, const ClFlags &Flags);
103 ~GlobalContext(); 160 ~GlobalContext();
104 161
105 // Returns true if any of the specified options in the verbose mask
106 // are set. If the argument is omitted, it checks if any verbose
107 // options at all are set.
108 VerboseMask getVerbose() const { return VMask; } 162 VerboseMask getVerbose() const { return VMask; }
109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
110 void setVerbose(VerboseMask Mask) { VMask = Mask; }
111 void addVerbose(VerboseMask Mask) { VMask |= Mask; }
112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
113 163
114 // The dump and emit streams need to be used by only one thread at a 164 // The dump and emit streams need to be used by only one thread at a
115 // time. This is done by exclusively reserving the streams via 165 // time. This is done by exclusively reserving the streams via
116 // lockStr() and unlockStr(). The OstreamLocker class can be used 166 // lockStr() and unlockStr(). The OstreamLocker class can be used
117 // to conveniently manage this. 167 // to conveniently manage this.
118 // 168 //
119 // The model is that a thread grabs the stream lock, then does an 169 // The model is that a thread grabs the stream lock, then does an
120 // arbitrary amount of work during which far-away callees may grab 170 // arbitrary amount of work during which far-away callees may grab
121 // the stream and do something with it, and finally the thread 171 // the stream and do something with it, and finally the thread
122 // releases the stream lock. This allows large chunks of output to 172 // releases the stream lock. This allows large chunks of output to
123 // be dumped or emitted without risking interleaving from multiple 173 // be dumped or emitted without risking interleaving from multiple
124 // threads. 174 // threads.
125 void lockStr() { StrLock.lock(); } 175 void lockStr() { StrLock.lock(); }
126 void unlockStr() { StrLock.unlock(); } 176 void unlockStr() { StrLock.unlock(); }
127 Ostream &getStrDump() { return *StrDump; } 177 Ostream &getStrDump() { return *StrDump; }
128 Ostream &getStrEmit() { return *StrEmit; } 178 Ostream &getStrEmit() { return *StrEmit; }
129 179
130 TargetArch getTargetArch() const { return Arch; } 180 TargetArch getTargetArch() const { return Arch; }
131 OptLevel getOptLevel() const { return Opt; } 181 OptLevel getOptLevel() const { return Opt; }
182 bool getErrorStatus() const { return ErrorStatus; }
132 183
133 // When emitting assembly, we allow a string to be prepended to 184 // When emitting assembly, we allow a string to be prepended to
134 // names of translated functions. This makes it easier to create an 185 // names of translated functions. This makes it easier to create an
135 // execution test against a reference translator like llc, with both 186 // execution test against a reference translator like llc, with both
136 // translators using the same bitcode as input. 187 // translators using the same bitcode as input.
137 IceString getTestPrefix() const { return TestPrefix; } 188 IceString getTestPrefix() const { return TestPrefix; }
138 IceString mangleName(const IceString &Name) const; 189 IceString mangleName(const IceString &Name) const;
139 190
140 // Manage Constants. 191 // Manage Constants.
141 // getConstant*() functions are not const because they might add 192 // getConstant*() functions are not const because they might add
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 273
223 TimerStackIdT newTimerStackID(const IceString &Name); 274 TimerStackIdT newTimerStackID(const IceString &Name);
224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); 275 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); 276 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); 277 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
227 void resetTimer(TimerStackIdT StackID); 278 void resetTimer(TimerStackIdT StackID);
228 void setTimerName(TimerStackIdT StackID, const IceString &NewName); 279 void setTimerName(TimerStackIdT StackID, const IceString &NewName);
229 void dumpTimers(TimerStackIdT StackID = TSK_Default, 280 void dumpTimers(TimerStackIdT StackID = TSK_Default,
230 bool DumpCumulative = true); 281 bool DumpCumulative = true);
231 282
283 // Adds a newly parsed and constructed function to the Cfg work
284 // queue. Notifies any idle workers that a new function is
285 // available for translating. May block if the work queue is too
286 // large, in order to control memory footprint.
287 void cfgQueueAdd(Cfg *Func) { CfgQ.add(Func); }
288 // Takes a Cfg from the work queue for translating. May block if
289 // the work queue is currently empty. Returns nullptr if there is
290 // no more work (in which case the translation thread will probably
291 // just exit).
JF 2015/01/22 20:50:56 "probably"? That seems mostly accurate.
Jim Stichnoth 2015/01/23 07:55:54 Done.
292 Cfg *cfgQueueGet() { return CfgQ.get(); }
293 // Notifies that no more work will be added to the work queue.
294 void cfgQueueEnd() { CfgQ.end(); }
295
296 void startWorkerThreads() {
297 uint32_t NumWorkers = getFlags().NumTranslationThreads;
298 for (uint32_t i = 0; i < NumWorkers; ++i) {
299 ThreadContext *WorkerTLS = new ThreadContext();
300 AllThreadContexts.push_back(WorkerTLS);
301 TranslationThreads.push_back(std::thread(
302 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
303 }
304 if (NumWorkers) {
305 // TODO(stichnot): start emitter thread
JF 2015/01/22 20:50:56 ?
Jim Stichnoth 2015/01/23 07:55:54 Done.
306 }
307 }
308
309 void waitForWorkerThreads() {
310 cfgQueueEnd();
311 // TODO(stichnot): end the emitter queue
312 for (std::thread &Worker : TranslationThreads) {
313 Worker.join();
314 }
315 TranslationThreads.clear();
316 // TODO(stichnot): join the emitter queue
317 }
318
319 // Translation thread startup routine.
320 void translateFunctionsWrapper(ThreadContext *MyTLS) {
321 TLS = MyTLS;
322 translateFunctions();
323 }
324 // Translate functions from the Cfg queue until the queue is empty.
325 void translateFunctions();
326
327 // Utility function to match a symbol name against a match string.
328 // An empty match string means match everything. Returns true if
329 // there is a match.
330 static bool matchSymbolName(const IceString &SymbolName,
331 const IceString &Match) {
332 return Match.empty() || Match == SymbolName;
333 }
JF 2015/01/22 20:50:56 I don't understand what this is for.
Jim Stichnoth 2015/01/23 07:55:54 Hopefully documented better.
334
232 private: 335 private:
233 // Try to make sure the mutexes are allocated on separate cache 336 // Try to make sure the mutexes are allocated on separate cache
234 // lines, assuming the maximum cache line size is 64. 337 // lines, assuming the maximum cache line size is 64.
235 const static size_t MaxCacheLineSize = 64; 338 const static size_t MaxCacheLineSize = 64;
236 alignas(MaxCacheLineSize) GlobalLockType AllocLock; 339 alignas(MaxCacheLineSize) GlobalLockType AllocLock;
237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; 340 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock;
238 alignas(MaxCacheLineSize) GlobalLockType StatsLock; 341 alignas(MaxCacheLineSize) GlobalLockType StatsLock;
239 alignas(MaxCacheLineSize) GlobalLockType TimerLock; 342 alignas(MaxCacheLineSize) GlobalLockType TimerLock;
240 343
241 // StrLock is a global lock on the dump and emit output streams. 344 // StrLock is a global lock on the dump and emit output streams.
242 typedef std::mutex StrLockType; 345 typedef std::mutex StrLockType;
243 StrLockType StrLock; 346 StrLockType StrLock;
244 347
245 Ostream *StrDump; // Stream for dumping / diagnostics 348 Ostream *StrDump; // Stream for dumping / diagnostics
246 Ostream *StrEmit; // Stream for code emission 349 Ostream *StrEmit; // Stream for code emission
247 350
248 ArenaAllocator<> Allocator; 351 ArenaAllocator<> Allocator;
249 VerboseMask VMask; 352 VerboseMask VMask;
250 std::unique_ptr<ConstantPool> ConstPool; 353 std::unique_ptr<ConstantPool> ConstPool;
251 Intrinsics IntrinsicsInfo; 354 Intrinsics IntrinsicsInfo;
252 const TargetArch Arch; 355 const TargetArch Arch;
253 const OptLevel Opt; 356 const OptLevel Opt;
254 const IceString TestPrefix; 357 const IceString TestPrefix;
255 const ClFlags &Flags; 358 const ClFlags &Flags;
256 RandomNumberGenerator RNG; 359 RandomNumberGenerator RNG;
257 std::unique_ptr<ELFObjectWriter> ObjectWriter; 360 std::unique_ptr<ELFObjectWriter> ObjectWriter;
258 CodeStats StatsCumulative; 361 CodeStats StatsCumulative;
259 std::vector<TimerStack> Timers; 362 std::vector<TimerStack> Timers;
363 CfgQueue CfgQ;
364 bool ErrorStatus;
JF 2015/01/22 20:50:56 Use std::error_code?
Jim Stichnoth 2015/01/23 07:55:54 Done.
260 365
261 LockedPtr<ArenaAllocator<>> getAllocator() { 366 LockedPtr<ArenaAllocator<>> getAllocator() {
262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); 367 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
263 } 368 }
264 LockedPtr<ConstantPool> getConstPool() { 369 LockedPtr<ConstantPool> getConstPool() {
265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); 370 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);
266 } 371 }
267 LockedPtr<CodeStats> getStatsCumulative() { 372 LockedPtr<CodeStats> getStatsCumulative() {
268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); 373 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
269 } 374 }
270 LockedPtr<std::vector<TimerStack>> getTimers() { 375 LockedPtr<std::vector<TimerStack>> getTimers() {
271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); 376 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);
272 } 377 }
273 378
274 std::vector<ThreadContext *> AllThreadContexts; 379 std::vector<ThreadContext *> AllThreadContexts;
380 std::vector<std::thread> TranslationThreads;
275 // Each thread has its own TLS pointer which is also held in 381 // Each thread has its own TLS pointer which is also held in
276 // AllThreadContexts. 382 // AllThreadContexts.
277 thread_local static ThreadContext *TLS; 383 thread_local static ThreadContext *TLS;
278 384
279 // Private helpers for mangleName() 385 // Private helpers for mangleName()
280 typedef llvm::SmallVector<char, 32> ManglerVector; 386 typedef llvm::SmallVector<char, 32> ManglerVector;
281 void incrementSubstitutions(ManglerVector &OldName) const; 387 void incrementSubstitutions(ManglerVector &OldName) const;
282 }; 388 };
283 389
284 // Helper class to push and pop a timer marker. The constructor 390 // Helper class to push and pop a timer marker. The constructor
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
322 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } 428 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }
323 ~OstreamLocker() { Ctx->unlockStr(); } 429 ~OstreamLocker() { Ctx->unlockStr(); }
324 430
325 private: 431 private:
326 GlobalContext *const Ctx; 432 GlobalContext *const Ctx;
327 }; 433 };
328 434
329 } // end of namespace Ice 435 } // end of namespace Ice
330 436
331 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H 437 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698