OLD | NEW |
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
11 // multiple functions. | 11 // multiple functions. |
12 // | 12 // |
13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
14 | 14 |
15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H | 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H |
16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H | 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H |
17 | 17 |
18 #include <array> | 18 #include <array> |
19 #include <mutex> | 19 #include <mutex> |
20 #include <thread> | 20 #include <thread> |
21 | 21 |
22 #include "IceDefs.h" | 22 #include "IceDefs.h" |
23 #include "IceClFlags.h" | 23 #include "IceClFlags.h" |
24 #include "IceIntrinsics.h" | 24 #include "IceIntrinsics.h" |
25 #include "IceRNG.h" | 25 #include "IceRNG.h" |
| 26 #include "IceThreading.h" |
26 #include "IceTimerTree.h" | 27 #include "IceTimerTree.h" |
27 #include "IceTypes.h" | 28 #include "IceTypes.h" |
28 #include "IceUtils.h" | 29 #include "IceUtils.h" |
29 | 30 |
30 namespace Ice { | 31 namespace Ice { |
31 | 32 |
32 class ClFlags; | 33 class ClFlags; |
33 class ConstantPool; | 34 class ConstantPool; |
| 35 class EmitterWorkItem; |
34 class FuncSigType; | 36 class FuncSigType; |
35 | 37 |
36 // LockedPtr is a way to provide automatically locked access to some object. | 38 // LockedPtr is a way to provide automatically locked access to some object. |
37 template <typename T> class LockedPtr { | 39 template <typename T> class LockedPtr { |
38 LockedPtr() = delete; | 40 LockedPtr() = delete; |
39 LockedPtr(const LockedPtr &) = delete; | 41 LockedPtr(const LockedPtr &) = delete; |
40 LockedPtr &operator=(const LockedPtr &) = delete; | 42 LockedPtr &operator=(const LockedPtr &) = delete; |
41 | 43 |
42 public: | 44 public: |
43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { | 45 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { |
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
269 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 271 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
270 bool DumpCumulative = true); | 272 bool DumpCumulative = true); |
271 // The following methods affect only the calling thread's TLS timer | 273 // The following methods affect only the calling thread's TLS timer |
272 // data. | 274 // data. |
273 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 275 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
274 void pushTimer(TimerIdT ID, TimerStackIdT StackID); | 276 void pushTimer(TimerIdT ID, TimerStackIdT StackID); |
275 void popTimer(TimerIdT ID, TimerStackIdT StackID); | 277 void popTimer(TimerIdT ID, TimerStackIdT StackID); |
276 void resetTimer(TimerStackIdT StackID); | 278 void resetTimer(TimerStackIdT StackID); |
277 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 279 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
278 | 280 |
| 281 // This is the first work item sequence number that the parser |
| 282 // produces, and correspondingly the first sequence number that the |
| 283 // emitter thread will wait for. Start numbering at 1 to leave room |
| 284 // for a sentinel, in case e.g. we wish to inject items with a |
| 285 // special sequence number that may be executed out of order. |
| 286 static uint32_t getFirstSequenceNumber() { return 1; } |
279 // Adds a newly parsed and constructed function to the Cfg work | 287 // Adds a newly parsed and constructed function to the Cfg work |
280 // queue. Notifies any idle workers that a new function is | 288 // queue. Notifies any idle workers that a new function is |
281 // available for translating. May block if the work queue is too | 289 // available for translating. May block if the work queue is too |
282 // large, in order to control memory footprint. | 290 // large, in order to control memory footprint. |
283 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); | 291 void optQueueBlockingPush(std::unique_ptr<Cfg> Func); |
284 // Takes a Cfg from the work queue for translating. May block if | 292 // Takes a Cfg from the work queue for translating. May block if |
285 // the work queue is currently empty. Returns nullptr if there is | 293 // the work queue is currently empty. Returns nullptr if there is |
286 // no more work - the queue is empty and either end() has been | 294 // no more work - the queue is empty and either end() has been |
287 // called or the Sequential flag was set. | 295 // called or the Sequential flag was set. |
288 std::unique_ptr<Cfg> cfgQueueBlockingPop(); | 296 std::unique_ptr<Cfg> optQueueBlockingPop(); |
289 // Notifies that no more work will be added to the work queue. | 297 // Notifies that no more work will be added to the work queue. |
290 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } | 298 void optQueueNotifyEnd() { OptQ.notifyEnd(); } |
| 299 |
| 300 void emitQueueBlockingPush(EmitterWorkItem *Item); |
| 301 EmitterWorkItem *emitQueueBlockingPop(); |
| 302 void emitQueueNotifyEnd() { EmitQ.notifyEnd(); } |
291 | 303 |
292 void startWorkerThreads() { | 304 void startWorkerThreads() { |
293 size_t NumWorkers = getFlags().getNumTranslationThreads(); | 305 size_t NumWorkers = getFlags().getNumTranslationThreads(); |
294 auto Timers = getTimers(); | 306 auto Timers = getTimers(); |
295 for (size_t i = 0; i < NumWorkers; ++i) { | 307 for (size_t i = 0; i < NumWorkers; ++i) { |
296 ThreadContext *WorkerTLS = new ThreadContext(); | 308 ThreadContext *WorkerTLS = new ThreadContext(); |
297 Timers->initInto(WorkerTLS->Timers); | 309 Timers->initInto(WorkerTLS->Timers); |
298 AllThreadContexts.push_back(WorkerTLS); | 310 AllThreadContexts.push_back(WorkerTLS); |
299 TranslationThreads.push_back(std::thread( | 311 TranslationThreads.push_back(std::thread( |
300 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | 312 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
301 } | 313 } |
302 if (NumWorkers) { | 314 if (NumWorkers) { |
303 // TODO(stichnot): start a new thread for the emitter queue worker. | 315 ThreadContext *WorkerTLS = new ThreadContext(); |
| 316 Timers->initInto(WorkerTLS->Timers); |
| 317 AllThreadContexts.push_back(WorkerTLS); |
| 318 EmitterThreads.push_back( |
| 319 std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS)); |
304 } | 320 } |
305 } | 321 } |
306 | 322 |
307 void waitForWorkerThreads() { | 323 void waitForWorkerThreads() { |
308 cfgQueueNotifyEnd(); | 324 optQueueNotifyEnd(); |
309 // TODO(stichnot): call end() on the emitter work queue. | |
310 for (std::thread &Worker : TranslationThreads) { | 325 for (std::thread &Worker : TranslationThreads) { |
311 Worker.join(); | 326 Worker.join(); |
312 } | 327 } |
313 TranslationThreads.clear(); | 328 TranslationThreads.clear(); |
314 // TODO(stichnot): join the emitter thread. | 329 |
| 330 // Only notify the emit queue to end after all the translation |
| 331 // threads have ended. |
| 332 emitQueueNotifyEnd(); |
| 333 for (std::thread &Worker : EmitterThreads) { |
| 334 Worker.join(); |
| 335 } |
| 336 EmitterThreads.clear(); |
| 337 |
315 if (ALLOW_DUMP) { | 338 if (ALLOW_DUMP) { |
316 auto Timers = getTimers(); | 339 auto Timers = getTimers(); |
317 for (ThreadContext *TLS : AllThreadContexts) | 340 for (ThreadContext *TLS : AllThreadContexts) |
318 Timers->mergeFrom(TLS->Timers); | 341 Timers->mergeFrom(TLS->Timers); |
319 } | 342 } |
320 if (ALLOW_DUMP) { | 343 if (ALLOW_DUMP) { |
321 // Do a separate loop over AllThreadContexts to avoid holding | 344 // Do a separate loop over AllThreadContexts to avoid holding |
322 // two locks at once. | 345 // two locks at once. |
323 auto Stats = getStatsCumulative(); | 346 auto Stats = getStatsCumulative(); |
324 for (ThreadContext *TLS : AllThreadContexts) | 347 for (ThreadContext *TLS : AllThreadContexts) |
325 Stats->add(TLS->StatsCumulative); | 348 Stats->add(TLS->StatsCumulative); |
326 } | 349 } |
327 } | 350 } |
328 | 351 |
329 // Translation thread startup routine. | 352 // Translation thread startup routine. |
330 void translateFunctionsWrapper(ThreadContext *MyTLS) { | 353 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
331 ICE_TLS_SET_FIELD(TLS, MyTLS); | 354 ICE_TLS_SET_FIELD(TLS, MyTLS); |
332 translateFunctions(); | 355 translateFunctions(); |
333 } | 356 } |
334 // Translate functions from the Cfg queue until the queue is empty. | 357 // Translate functions from the Cfg queue until the queue is empty. |
335 void translateFunctions(); | 358 void translateFunctions(); |
336 | 359 |
| 360 // Emitter thread startup routine. |
| 361 void emitterWrapper(ThreadContext *MyTLS) { |
| 362 ICE_TLS_SET_FIELD(TLS, MyTLS); |
| 363 emitItems(); |
| 364 } |
| 365 // Emit functions and global initializers from the emitter queue |
| 366 // until the queue is empty. |
| 367 void emitItems(); |
| 368 |
337 // Utility function to match a symbol name against a match string. | 369 // Utility function to match a symbol name against a match string. |
338 // This is used in a few cases where we want to take some action on | 370 // This is used in a few cases where we want to take some action on |
339 // a particular function or symbol based on a command-line argument, | 371 // a particular function or symbol based on a command-line argument, |
340 // such as changing the verbose level for a particular function. An | 372 // such as changing the verbose level for a particular function. An |
341 // empty Match argument means match everything. Returns true if | 373 // empty Match argument means match everything. Returns true if |
342 // there is a match. | 374 // there is a match. |
343 static bool matchSymbolName(const IceString &SymbolName, | 375 static bool matchSymbolName(const IceString &SymbolName, |
344 const IceString &Match) { | 376 const IceString &Match) { |
345 return Match.empty() || Match == SymbolName; | 377 return Match.empty() || Match == SymbolName; |
346 } | 378 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
383 ICE_CACHELINE_BOUNDARY; | 415 ICE_CACHELINE_BOUNDARY; |
384 | 416 |
385 const VerboseMask VMask; | 417 const VerboseMask VMask; |
386 Intrinsics IntrinsicsInfo; | 418 Intrinsics IntrinsicsInfo; |
387 const TargetArch Arch; | 419 const TargetArch Arch; |
388 const OptLevel Opt; | 420 const OptLevel Opt; |
389 const IceString TestPrefix; | 421 const IceString TestPrefix; |
390 const ClFlags &Flags; | 422 const ClFlags &Flags; |
391 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. | 423 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
392 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 424 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
393 BoundedProducerConsumerQueue<Cfg> CfgQ; | 425 BoundedProducerConsumerQueue<Cfg> OptQ; |
| 426 BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ; |
394 | 427 |
395 LockedPtr<ArenaAllocator<>> getAllocator() { | 428 LockedPtr<ArenaAllocator<>> getAllocator() { |
396 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 429 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
397 } | 430 } |
398 LockedPtr<ConstantPool> getConstPool() { | 431 LockedPtr<ConstantPool> getConstPool() { |
399 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 432 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
400 } | 433 } |
401 LockedPtr<CodeStats> getStatsCumulative() { | 434 LockedPtr<CodeStats> getStatsCumulative() { |
402 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 435 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
403 } | 436 } |
404 LockedPtr<TimerList> getTimers() { | 437 LockedPtr<TimerList> getTimers() { |
405 return LockedPtr<TimerList>(&Timers, &TimerLock); | 438 return LockedPtr<TimerList>(&Timers, &TimerLock); |
406 } | 439 } |
407 | 440 |
408 std::vector<ThreadContext *> AllThreadContexts; | 441 llvm::SmallVector<ThreadContext *, 128> AllThreadContexts; |
409 std::vector<std::thread> TranslationThreads; | 442 llvm::SmallVector<std::thread, 128> TranslationThreads; |
| 443 llvm::SmallVector<std::thread, 128> EmitterThreads; |
410 // Each thread has its own TLS pointer which is also held in | 444 // Each thread has its own TLS pointer which is also held in |
411 // AllThreadContexts. | 445 // AllThreadContexts. |
412 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 446 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
413 | 447 |
414 // Private helpers for mangleName() | 448 // Private helpers for mangleName() |
415 typedef llvm::SmallVector<char, 32> ManglerVector; | 449 typedef llvm::SmallVector<char, 32> ManglerVector; |
416 void incrementSubstitutions(ManglerVector &OldName) const; | 450 void incrementSubstitutions(ManglerVector &OldName) const; |
417 | 451 |
418 public: | 452 public: |
419 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 453 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
467 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 501 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
468 ~OstreamLocker() { Ctx->unlockStr(); } | 502 ~OstreamLocker() { Ctx->unlockStr(); } |
469 | 503 |
470 private: | 504 private: |
471 GlobalContext *const Ctx; | 505 GlobalContext *const Ctx; |
472 }; | 506 }; |
473 | 507 |
474 } // end of namespace Ice | 508 } // end of namespace Ice |
475 | 509 |
476 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 510 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |