OLD | NEW |
---|---|
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
(...skipping 13 matching lines...) Expand all Loading... | |
24 #include "IceIntrinsics.h" | 24 #include "IceIntrinsics.h" |
25 #include "IceRNG.h" | 25 #include "IceRNG.h" |
26 #include "IceTimerTree.h" | 26 #include "IceTimerTree.h" |
27 #include "IceTypes.h" | 27 #include "IceTypes.h" |
28 #include "IceUtils.h" | 28 #include "IceUtils.h" |
29 | 29 |
30 namespace Ice { | 30 namespace Ice { |
31 | 31 |
32 class ClFlags; | 32 class ClFlags; |
33 class ConstantPool; | 33 class ConstantPool; |
34 class EmitterWorkItem; | |
34 class FuncSigType; | 35 class FuncSigType; |
35 | 36 |
36 // LockedPtr is a way to provide automatically locked access to some object. | 37 // LockedPtr is a way to provide automatically locked access to some object. |
37 template <typename T> class LockedPtr { | 38 template <typename T> class LockedPtr { |
38 LockedPtr() = delete; | 39 LockedPtr() = delete; |
39 LockedPtr(const LockedPtr &) = delete; | 40 LockedPtr(const LockedPtr &) = delete; |
40 LockedPtr &operator=(const LockedPtr &) = delete; | 41 LockedPtr &operator=(const LockedPtr &) = delete; |
41 | 42 |
42 public: | 43 public: |
43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { | 44 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { |
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
269 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 270 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
270 bool DumpCumulative = true); | 271 bool DumpCumulative = true); |
271 // The following methods affect only the calling thread's TLS timer | 272 // The following methods affect only the calling thread's TLS timer |
272 // data. | 273 // data. |
273 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 274 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
274 void pushTimer(TimerIdT ID, TimerStackIdT StackID); | 275 void pushTimer(TimerIdT ID, TimerStackIdT StackID); |
275 void popTimer(TimerIdT ID, TimerStackIdT StackID); | 276 void popTimer(TimerIdT ID, TimerStackIdT StackID); |
276 void resetTimer(TimerStackIdT StackID); | 277 void resetTimer(TimerStackIdT StackID); |
277 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 278 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
278 | 279 |
280 // This is the first work item sequence number that the parser | |
281 // produces, and correspondingly the first sequence number that the | |
282 // emitter thread will wait for. Start numbering at 1 to leave room | |
283 // for a sentinel, in case e.g. we wish to inject items with a | |
284 // special sequence number that may be executed out of order. | |
285 static uint32_t getFirstSequenceNumber() { return 1; } | |
279 // Adds a newly parsed and constructed function to the Cfg work | 286 // Adds a newly parsed and constructed function to the Cfg work |
280 // queue. Notifies any idle workers that a new function is | 287 // queue. Notifies any idle workers that a new function is |
281 // available for translating. May block if the work queue is too | 288 // available for translating. May block if the work queue is too |
282 // large, in order to control memory footprint. | 289 // large, in order to control memory footprint. |
283 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); | 290 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); |
284 // Takes a Cfg from the work queue for translating. May block if | 291 // Takes a Cfg from the work queue for translating. May block if |
285 // the work queue is currently empty. Returns nullptr if there is | 292 // the work queue is currently empty. Returns nullptr if there is |
286 // no more work - the queue is empty and either end() has been | 293 // no more work - the queue is empty and either end() has been |
287 // called or the Sequential flag was set. | 294 // called or the Sequential flag was set. |
288 std::unique_ptr<Cfg> cfgQueueBlockingPop(); | 295 std::unique_ptr<Cfg> cfgQueueBlockingPop(); |
289 // Notifies that no more work will be added to the work queue. | 296 // Notifies that no more work will be added to the work queue. |
290 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } | 297 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } |
291 | 298 |
299 void emitQueueBlockingPush(EmitterWorkItem *Item); | |
300 EmitterWorkItem *emitQueueBlockingPop(); | |
301 void emitQueueNotifyEnd() { EmitQ.notifyEnd(); } | |
302 | |
292 void startWorkerThreads() { | 303 void startWorkerThreads() { |
293 size_t NumWorkers = getFlags().NumTranslationThreads; | 304 size_t NumWorkers = getFlags().NumTranslationThreads; |
294 auto Timers = getTimers(); | 305 auto Timers = getTimers(); |
295 for (size_t i = 0; i < NumWorkers; ++i) { | 306 for (size_t i = 0; i < NumWorkers; ++i) { |
296 ThreadContext *WorkerTLS = new ThreadContext(); | 307 ThreadContext *WorkerTLS = new ThreadContext(); |
297 Timers->initInto(WorkerTLS->Timers); | 308 Timers->initInto(WorkerTLS->Timers); |
298 AllThreadContexts.push_back(WorkerTLS); | 309 AllThreadContexts.push_back(WorkerTLS); |
299 TranslationThreads.push_back(std::thread( | 310 TranslationThreads.push_back(std::thread( |
300 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | 311 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
301 } | 312 } |
302 if (NumWorkers) { | 313 if (NumWorkers) { |
303 // TODO(stichnot): start a new thread for the emitter queue worker. | 314 ThreadContext *WorkerTLS = new ThreadContext(); |
315 Timers->initInto(WorkerTLS->Timers); | |
316 AllThreadContexts.push_back(WorkerTLS); | |
317 EmitterThreads.push_back( | |
318 std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS)); | |
304 } | 319 } |
305 } | 320 } |
306 | 321 |
307 void waitForWorkerThreads() { | 322 void waitForWorkerThreads() { |
308 cfgQueueNotifyEnd(); | 323 cfgQueueNotifyEnd(); |
309 // TODO(stichnot): call end() on the emitter work queue. | |
310 for (std::thread &Worker : TranslationThreads) { | 324 for (std::thread &Worker : TranslationThreads) { |
311 Worker.join(); | 325 Worker.join(); |
312 } | 326 } |
313 TranslationThreads.clear(); | 327 TranslationThreads.clear(); |
314 // TODO(stichnot): join the emitter thread. | 328 |
329 // Only notify the emit queue to end after all the translation | |
330 // threads have ended. | |
331 emitQueueNotifyEnd(); | |
332 for (std::thread &Worker : EmitterThreads) { | |
333 Worker.join(); | |
334 } | |
335 EmitterThreads.clear(); | |
336 | |
315 if (ALLOW_DUMP) { | 337 if (ALLOW_DUMP) { |
316 auto Timers = getTimers(); | 338 auto Timers = getTimers(); |
317 for (ThreadContext *TLS : AllThreadContexts) | 339 for (ThreadContext *TLS : AllThreadContexts) |
318 Timers->mergeFrom(TLS->Timers); | 340 Timers->mergeFrom(TLS->Timers); |
319 } | 341 } |
320 if (ALLOW_DUMP) { | 342 if (ALLOW_DUMP) { |
321 // Do a separate loop over AllThreadContexts to avoid holding | 343 // Do a separate loop over AllThreadContexts to avoid holding |
322 // two locks at once. | 344 // two locks at once. |
323 auto Stats = getStatsCumulative(); | 345 auto Stats = getStatsCumulative(); |
324 for (ThreadContext *TLS : AllThreadContexts) | 346 for (ThreadContext *TLS : AllThreadContexts) |
325 Stats->add(TLS->StatsCumulative); | 347 Stats->add(TLS->StatsCumulative); |
326 } | 348 } |
327 } | 349 } |
328 | 350 |
329 // Translation thread startup routine. | 351 // Translation thread startup routine. |
330 void translateFunctionsWrapper(ThreadContext *MyTLS) { | 352 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
331 ICE_TLS_SET_FIELD(TLS, MyTLS); | 353 ICE_TLS_SET_FIELD(TLS, MyTLS); |
332 translateFunctions(); | 354 translateFunctions(); |
333 } | 355 } |
334 // Translate functions from the Cfg queue until the queue is empty. | 356 // Translate functions from the Cfg queue until the queue is empty. |
335 void translateFunctions(); | 357 void translateFunctions(); |
336 | 358 |
359 // Emitter thread startup routine. | |
360 void emitterWrapper(ThreadContext *MyTLS) { | |
361 ICE_TLS_SET_FIELD(TLS, MyTLS); | |
362 emitItems(); | |
363 } | |
364 // Emit functions and global initializers from the emitter queue | |
365 // until the queue is empty. | |
366 void emitItems(); | |
367 | |
337 // Utility function to match a symbol name against a match string. | 368 // Utility function to match a symbol name against a match string. |
338 // This is used in a few cases where we want to take some action on | 369 // This is used in a few cases where we want to take some action on |
339 // a particular function or symbol based on a command-line argument, | 370 // a particular function or symbol based on a command-line argument, |
340 // such as changing the verbose level for a particular function. An | 371 // such as changing the verbose level for a particular function. An |
341 // empty Match argument means match everything. Returns true if | 372 // empty Match argument means match everything. Returns true if |
342 // there is a match. | 373 // there is a match. |
343 static bool matchSymbolName(const IceString &SymbolName, | 374 static bool matchSymbolName(const IceString &SymbolName, |
344 const IceString &Match) { | 375 const IceString &Match) { |
345 return Match.empty() || Match == SymbolName; | 376 return Match.empty() || Match == SymbolName; |
346 } | 377 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
383 ICE_CACHELINE_BOUNDARY; | 414 ICE_CACHELINE_BOUNDARY; |
384 | 415 |
385 const VerboseMask VMask; | 416 const VerboseMask VMask; |
386 Intrinsics IntrinsicsInfo; | 417 Intrinsics IntrinsicsInfo; |
387 const TargetArch Arch; | 418 const TargetArch Arch; |
388 const OptLevel Opt; | 419 const OptLevel Opt; |
389 const IceString TestPrefix; | 420 const IceString TestPrefix; |
390 const ClFlags &Flags; | 421 const ClFlags &Flags; |
391 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. | 422 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
392 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 423 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
393 BoundedProducerConsumerQueue<Cfg> CfgQ; | 424 BoundedProducerConsumerQueue<Cfg> CfgQ; |
JF
2015/02/08 00:29:47
Maybe this should now be the OptimizationQ?
Jim Stichnoth
2015/02/08 17:11:23
Done.
| |
425 BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ; | |
394 | 426 |
395 LockedPtr<ArenaAllocator<>> getAllocator() { | 427 LockedPtr<ArenaAllocator<>> getAllocator() { |
396 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 428 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
397 } | 429 } |
398 LockedPtr<ConstantPool> getConstPool() { | 430 LockedPtr<ConstantPool> getConstPool() { |
399 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 431 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
400 } | 432 } |
401 LockedPtr<CodeStats> getStatsCumulative() { | 433 LockedPtr<CodeStats> getStatsCumulative() { |
402 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 434 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
403 } | 435 } |
404 LockedPtr<TimerList> getTimers() { | 436 LockedPtr<TimerList> getTimers() { |
405 return LockedPtr<TimerList>(&Timers, &TimerLock); | 437 return LockedPtr<TimerList>(&Timers, &TimerLock); |
406 } | 438 } |
407 | 439 |
408 std::vector<ThreadContext *> AllThreadContexts; | 440 std::vector<ThreadContext *> AllThreadContexts; |
409 std::vector<std::thread> TranslationThreads; | 441 std::vector<std::thread> TranslationThreads; |
442 std::vector<std::thread> EmitterThreads; | |
JF
2015/02/08 00:29:47
These could just be SmallVector or even std::array
Jim Stichnoth
2015/02/08 17:11:23
16 threads? I've forgotten how to count that low.
| |
410 // Each thread has its own TLS pointer which is also held in | 443 // Each thread has its own TLS pointer which is also held in |
411 // AllThreadContexts. | 444 // AllThreadContexts. |
412 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 445 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
413 | 446 |
414 // Private helpers for mangleName() | 447 // Private helpers for mangleName() |
415 typedef llvm::SmallVector<char, 32> ManglerVector; | 448 typedef llvm::SmallVector<char, 32> ManglerVector; |
416 void incrementSubstitutions(ManglerVector &OldName) const; | 449 void incrementSubstitutions(ManglerVector &OldName) const; |
417 | 450 |
418 public: | 451 public: |
419 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 452 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
464 OstreamLocker &operator=(const OstreamLocker &) = delete; | 497 OstreamLocker &operator=(const OstreamLocker &) = delete; |
465 | 498 |
466 public: | 499 public: |
467 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 500 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
468 ~OstreamLocker() { Ctx->unlockStr(); } | 501 ~OstreamLocker() { Ctx->unlockStr(); } |
469 | 502 |
470 private: | 503 private: |
471 GlobalContext *const Ctx; | 504 GlobalContext *const Ctx; |
472 }; | 505 }; |
473 | 506 |
507 class EmitterWorkItem { | |
JF
2015/02/08 00:29:47
This is getting to be a pretty big file, it's prob
Jim Stichnoth
2015/02/08 17:11:23
Good idea, moved this into the new IceThreading.h,
| |
508 EmitterWorkItem(const EmitterWorkItem &) = delete; | |
509 EmitterWorkItem &operator=(const EmitterWorkItem &) = delete; | |
JF
2015/02/08 00:29:47
EmitterWorkItem() = delete;
Jim Stichnoth
2015/02/08 17:11:23
Done.
| |
510 | |
511 public: | |
512 enum ItemKind { | |
513 WI_Nop, // Placeholder to maintain sequence numbers in case there | |
514 // is a translation error. | |
515 WI_GlobalInits, // A list of global initializers. | |
516 WI_Asm, // An already-assembled function that needs to be emitted, | |
517 // either as low-level asm text or as an ELF binary. | |
518 WI_Cfg // A Cfg that needs to be emitted as "readable" assembly. | |
JF
2015/02/08 00:29:47
I'm not sure I get the different between asm and c
Jim Stichnoth
2015/02/08 17:11:23
Added more comments that hopefully clarify.
JF
2015/02/08 21:15:04
Yeah, though I'm wary of having a debugging featur
Jim Stichnoth
2015/02/10 07:51:46
I added a report_fatal_error() call to GlobalConte
| |
519 }; | |
520 // Constructor for a Nop work item. | |
521 explicit EmitterWorkItem(uint32_t Seq) | |
522 : Sequence(Seq), Kind(WI_Nop), GlobalInits(nullptr), Function(nullptr), | |
523 RawFunc(nullptr) {} | |
524 // Constructor for a GlobalInits work item. | |
525 EmitterWorkItem(uint32_t Seq, VariableDeclarationList *D) | |
526 : Sequence(Seq), Kind(WI_GlobalInits), GlobalInits(D), Function(nullptr), | |
527 RawFunc(nullptr) {} | |
528 // Constructor for an Asm work item. | |
529 EmitterWorkItem(uint32_t Seq, Assembler *A) | |
530 : Sequence(Seq), Kind(WI_Asm), GlobalInits(nullptr), Function(A), | |
531 RawFunc(nullptr) {} | |
532 // Constructor for a Cfg work item. | |
533 EmitterWorkItem(uint32_t Seq, Cfg *F) | |
534 : Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr), | |
535 RawFunc(F) {} | |
536 uint32_t getSequenceNumber() const { return Sequence; } | |
537 ItemKind getKind() const { return Kind; } | |
538 VariableDeclarationList *getGlobalInits() const { | |
539 assert(getKind() == WI_GlobalInits); | |
540 return GlobalInits; | |
541 } | |
542 Assembler *getAsm() const { | |
543 assert(getKind() == WI_Asm); | |
544 return Function; | |
545 } | |
546 Cfg *getCfg() const { | |
547 assert(getKind() == WI_Cfg); | |
548 return RawFunc; | |
549 } | |
550 ~EmitterWorkItem(); | |
JF
2015/02/08 00:29:47
Define inline, since this should do anything.
Jim Stichnoth
2015/02/08 17:11:23
Tried that originally, but it's getting into icky
JF
2015/02/08 21:15:04
Oh yeah, include order would do that, and unique_p
| |
551 | |
552 private: | |
553 const uint32_t Sequence; | |
554 const ItemKind Kind; | |
555 VariableDeclarationList *const GlobalInits; | |
556 Assembler *const Function; | |
557 Cfg *const RawFunc; | |
JF
2015/02/08 00:29:47
3 x unique_ptr?
Jim Stichnoth
2015/02/10 07:51:46
Yeah, I think so, after Karl's CL lands...
| |
558 }; | |
559 | |
474 } // end of namespace Ice | 560 } // end of namespace Ice |
475 | 561 |
476 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 562 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |