Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 24 #include "IceIntrinsics.h" | 24 #include "IceIntrinsics.h" |
| 25 #include "IceRNG.h" | 25 #include "IceRNG.h" |
| 26 #include "IceTimerTree.h" | 26 #include "IceTimerTree.h" |
| 27 #include "IceTypes.h" | 27 #include "IceTypes.h" |
| 28 #include "IceUtils.h" | 28 #include "IceUtils.h" |
| 29 | 29 |
| 30 namespace Ice { | 30 namespace Ice { |
| 31 | 31 |
| 32 class ClFlags; | 32 class ClFlags; |
| 33 class ConstantPool; | 33 class ConstantPool; |
| 34 class EmitterWorkItem; | |
| 34 class FuncSigType; | 35 class FuncSigType; |
| 35 | 36 |
| 36 // LockedPtr is a way to provide automatically locked access to some object. | 37 // LockedPtr is a way to provide automatically locked access to some object. |
| 37 template <typename T> class LockedPtr { | 38 template <typename T> class LockedPtr { |
| 38 LockedPtr() = delete; | 39 LockedPtr() = delete; |
| 39 LockedPtr(const LockedPtr &) = delete; | 40 LockedPtr(const LockedPtr &) = delete; |
| 40 LockedPtr &operator=(const LockedPtr &) = delete; | 41 LockedPtr &operator=(const LockedPtr &) = delete; |
| 41 | 42 |
| 42 public: | 43 public: |
| 43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { | 44 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { |
| (...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 269 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 270 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
| 270 bool DumpCumulative = true); | 271 bool DumpCumulative = true); |
| 271 // The following methods affect only the calling thread's TLS timer | 272 // The following methods affect only the calling thread's TLS timer |
| 272 // data. | 273 // data. |
| 273 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 274 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
| 274 void pushTimer(TimerIdT ID, TimerStackIdT StackID); | 275 void pushTimer(TimerIdT ID, TimerStackIdT StackID); |
| 275 void popTimer(TimerIdT ID, TimerStackIdT StackID); | 276 void popTimer(TimerIdT ID, TimerStackIdT StackID); |
| 276 void resetTimer(TimerStackIdT StackID); | 277 void resetTimer(TimerStackIdT StackID); |
| 277 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 278 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
| 278 | 279 |
| 280 // This is the first work item sequence number that the parser | |
| 281 // produces, and correspondingly the first sequence number that the | |
| 282 // emitter thread will wait for. Start numbering at 1 to leave room | |
| 283 // for a sentinel, in case e.g. we wish to inject items with a | |
| 284 // special sequence number that may be executed out of order. | |
| 285 static uint32_t getFirstSequenceNumber() { return 1; } | |
| 279 // Adds a newly parsed and constructed function to the Cfg work | 286 // Adds a newly parsed and constructed function to the Cfg work |
| 280 // queue. Notifies any idle workers that a new function is | 287 // queue. Notifies any idle workers that a new function is |
| 281 // available for translating. May block if the work queue is too | 288 // available for translating. May block if the work queue is too |
| 282 // large, in order to control memory footprint. | 289 // large, in order to control memory footprint. |
| 283 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); | 290 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); |
| 284 // Takes a Cfg from the work queue for translating. May block if | 291 // Takes a Cfg from the work queue for translating. May block if |
| 285 // the work queue is currently empty. Returns nullptr if there is | 292 // the work queue is currently empty. Returns nullptr if there is |
| 286 // no more work - the queue is empty and either end() has been | 293 // no more work - the queue is empty and either end() has been |
| 287 // called or the Sequential flag was set. | 294 // called or the Sequential flag was set. |
| 288 std::unique_ptr<Cfg> cfgQueueBlockingPop(); | 295 std::unique_ptr<Cfg> cfgQueueBlockingPop(); |
| 289 // Notifies that no more work will be added to the work queue. | 296 // Notifies that no more work will be added to the work queue. |
| 290 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } | 297 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } |
| 291 | 298 |
| 299 void emitQueueBlockingPush(EmitterWorkItem *Item); | |
| 300 EmitterWorkItem *emitQueueBlockingPop(); | |
| 301 void emitQueueNotifyEnd() { EmitQ.notifyEnd(); } | |
| 302 | |
| 292 void startWorkerThreads() { | 303 void startWorkerThreads() { |
| 293 size_t NumWorkers = getFlags().NumTranslationThreads; | 304 size_t NumWorkers = getFlags().NumTranslationThreads; |
| 294 auto Timers = getTimers(); | 305 auto Timers = getTimers(); |
| 295 for (size_t i = 0; i < NumWorkers; ++i) { | 306 for (size_t i = 0; i < NumWorkers; ++i) { |
| 296 ThreadContext *WorkerTLS = new ThreadContext(); | 307 ThreadContext *WorkerTLS = new ThreadContext(); |
| 297 Timers->initInto(WorkerTLS->Timers); | 308 Timers->initInto(WorkerTLS->Timers); |
| 298 AllThreadContexts.push_back(WorkerTLS); | 309 AllThreadContexts.push_back(WorkerTLS); |
| 299 TranslationThreads.push_back(std::thread( | 310 TranslationThreads.push_back(std::thread( |
| 300 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | 311 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
| 301 } | 312 } |
| 302 if (NumWorkers) { | 313 if (NumWorkers) { |
| 303 // TODO(stichnot): start a new thread for the emitter queue worker. | 314 ThreadContext *WorkerTLS = new ThreadContext(); |
| 315 Timers->initInto(WorkerTLS->Timers); | |
| 316 AllThreadContexts.push_back(WorkerTLS); | |
| 317 EmitterThreads.push_back( | |
| 318 std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS)); | |
| 304 } | 319 } |
| 305 } | 320 } |
| 306 | 321 |
| 307 void waitForWorkerThreads() { | 322 void waitForWorkerThreads() { |
| 308 cfgQueueNotifyEnd(); | 323 cfgQueueNotifyEnd(); |
| 309 // TODO(stichnot): call end() on the emitter work queue. | |
| 310 for (std::thread &Worker : TranslationThreads) { | 324 for (std::thread &Worker : TranslationThreads) { |
| 311 Worker.join(); | 325 Worker.join(); |
| 312 } | 326 } |
| 313 TranslationThreads.clear(); | 327 TranslationThreads.clear(); |
| 314 // TODO(stichnot): join the emitter thread. | 328 |
| 329 // Only notify the emit queue to end after all the translation | |
| 330 // threads have ended. | |
| 331 emitQueueNotifyEnd(); | |
| 332 for (std::thread &Worker : EmitterThreads) { | |
| 333 Worker.join(); | |
| 334 } | |
| 335 EmitterThreads.clear(); | |
| 336 | |
| 315 if (ALLOW_DUMP) { | 337 if (ALLOW_DUMP) { |
| 316 auto Timers = getTimers(); | 338 auto Timers = getTimers(); |
| 317 for (ThreadContext *TLS : AllThreadContexts) | 339 for (ThreadContext *TLS : AllThreadContexts) |
| 318 Timers->mergeFrom(TLS->Timers); | 340 Timers->mergeFrom(TLS->Timers); |
| 319 } | 341 } |
| 320 if (ALLOW_DUMP) { | 342 if (ALLOW_DUMP) { |
| 321 // Do a separate loop over AllThreadContexts to avoid holding | 343 // Do a separate loop over AllThreadContexts to avoid holding |
| 322 // two locks at once. | 344 // two locks at once. |
| 323 auto Stats = getStatsCumulative(); | 345 auto Stats = getStatsCumulative(); |
| 324 for (ThreadContext *TLS : AllThreadContexts) | 346 for (ThreadContext *TLS : AllThreadContexts) |
| 325 Stats->add(TLS->StatsCumulative); | 347 Stats->add(TLS->StatsCumulative); |
| 326 } | 348 } |
| 327 } | 349 } |
| 328 | 350 |
| 329 // Translation thread startup routine. | 351 // Translation thread startup routine. |
| 330 void translateFunctionsWrapper(ThreadContext *MyTLS) { | 352 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
| 331 ICE_TLS_SET_FIELD(TLS, MyTLS); | 353 ICE_TLS_SET_FIELD(TLS, MyTLS); |
| 332 translateFunctions(); | 354 translateFunctions(); |
| 333 } | 355 } |
| 334 // Translate functions from the Cfg queue until the queue is empty. | 356 // Translate functions from the Cfg queue until the queue is empty. |
| 335 void translateFunctions(); | 357 void translateFunctions(); |
| 336 | 358 |
| 359 // Emitter thread startup routine. | |
| 360 void emitterWrapper(ThreadContext *MyTLS) { | |
| 361 ICE_TLS_SET_FIELD(TLS, MyTLS); | |
| 362 emitItems(); | |
| 363 } | |
| 364 // Emit functions and global initializers from the emitter queue | |
| 365 // until the queue is empty. | |
| 366 void emitItems(); | |
| 367 | |
| 337 // Utility function to match a symbol name against a match string. | 368 // Utility function to match a symbol name against a match string. |
| 338 // This is used in a few cases where we want to take some action on | 369 // This is used in a few cases where we want to take some action on |
| 339 // a particular function or symbol based on a command-line argument, | 370 // a particular function or symbol based on a command-line argument, |
| 340 // such as changing the verbose level for a particular function. An | 371 // such as changing the verbose level for a particular function. An |
| 341 // empty Match argument means match everything. Returns true if | 372 // empty Match argument means match everything. Returns true if |
| 342 // there is a match. | 373 // there is a match. |
| 343 static bool matchSymbolName(const IceString &SymbolName, | 374 static bool matchSymbolName(const IceString &SymbolName, |
| 344 const IceString &Match) { | 375 const IceString &Match) { |
| 345 return Match.empty() || Match == SymbolName; | 376 return Match.empty() || Match == SymbolName; |
| 346 } | 377 } |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 383 ICE_CACHELINE_BOUNDARY; | 414 ICE_CACHELINE_BOUNDARY; |
| 384 | 415 |
| 385 const VerboseMask VMask; | 416 const VerboseMask VMask; |
| 386 Intrinsics IntrinsicsInfo; | 417 Intrinsics IntrinsicsInfo; |
| 387 const TargetArch Arch; | 418 const TargetArch Arch; |
| 388 const OptLevel Opt; | 419 const OptLevel Opt; |
| 389 const IceString TestPrefix; | 420 const IceString TestPrefix; |
| 390 const ClFlags &Flags; | 421 const ClFlags &Flags; |
| 391 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. | 422 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
| 392 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 423 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
| 393 BoundedProducerConsumerQueue<Cfg> CfgQ; | 424 BoundedProducerConsumerQueue<Cfg> CfgQ; |
|
JF
2015/02/08 00:29:47
Maybe this should now be the OptimizationQ?
Jim Stichnoth
2015/02/08 17:11:23
Done.
| |
| 425 BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ; | |
| 394 | 426 |
| 395 LockedPtr<ArenaAllocator<>> getAllocator() { | 427 LockedPtr<ArenaAllocator<>> getAllocator() { |
| 396 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 428 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
| 397 } | 429 } |
| 398 LockedPtr<ConstantPool> getConstPool() { | 430 LockedPtr<ConstantPool> getConstPool() { |
| 399 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 431 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
| 400 } | 432 } |
| 401 LockedPtr<CodeStats> getStatsCumulative() { | 433 LockedPtr<CodeStats> getStatsCumulative() { |
| 402 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 434 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
| 403 } | 435 } |
| 404 LockedPtr<TimerList> getTimers() { | 436 LockedPtr<TimerList> getTimers() { |
| 405 return LockedPtr<TimerList>(&Timers, &TimerLock); | 437 return LockedPtr<TimerList>(&Timers, &TimerLock); |
| 406 } | 438 } |
| 407 | 439 |
| 408 std::vector<ThreadContext *> AllThreadContexts; | 440 std::vector<ThreadContext *> AllThreadContexts; |
| 409 std::vector<std::thread> TranslationThreads; | 441 std::vector<std::thread> TranslationThreads; |
| 442 std::vector<std::thread> EmitterThreads; | |
|
JF
2015/02/08 00:29:47
These could just be SmallVector or even std::array
Jim Stichnoth
2015/02/08 17:11:23
16 threads? I've forgotten how to count that low.
| |
| 410 // Each thread has its own TLS pointer which is also held in | 443 // Each thread has its own TLS pointer which is also held in |
| 411 // AllThreadContexts. | 444 // AllThreadContexts. |
| 412 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 445 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
| 413 | 446 |
| 414 // Private helpers for mangleName() | 447 // Private helpers for mangleName() |
| 415 typedef llvm::SmallVector<char, 32> ManglerVector; | 448 typedef llvm::SmallVector<char, 32> ManglerVector; |
| 416 void incrementSubstitutions(ManglerVector &OldName) const; | 449 void incrementSubstitutions(ManglerVector &OldName) const; |
| 417 | 450 |
| 418 public: | 451 public: |
| 419 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 452 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 464 OstreamLocker &operator=(const OstreamLocker &) = delete; | 497 OstreamLocker &operator=(const OstreamLocker &) = delete; |
| 465 | 498 |
| 466 public: | 499 public: |
| 467 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 500 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
| 468 ~OstreamLocker() { Ctx->unlockStr(); } | 501 ~OstreamLocker() { Ctx->unlockStr(); } |
| 469 | 502 |
| 470 private: | 503 private: |
| 471 GlobalContext *const Ctx; | 504 GlobalContext *const Ctx; |
| 472 }; | 505 }; |
| 473 | 506 |
| 507 class EmitterWorkItem { | |
|
JF
2015/02/08 00:29:47
This is getting to be a pretty big file, it's prob
Jim Stichnoth
2015/02/08 17:11:23
Good idea, moved this into the new IceThreading.h,
| |
| 508 EmitterWorkItem(const EmitterWorkItem &) = delete; | |
| 509 EmitterWorkItem &operator=(const EmitterWorkItem &) = delete; | |
|
JF
2015/02/08 00:29:47
EmitterWorkItem() = delete;
Jim Stichnoth
2015/02/08 17:11:23
Done.
| |
| 510 | |
| 511 public: | |
| 512 enum ItemKind { | |
| 513 WI_Nop, // Placeholder to maintain sequence numbers in case there | |
| 514 // is a translation error. | |
| 515 WI_GlobalInits, // A list of global initializers. | |
| 516 WI_Asm, // An already-assembled function that needs to be emitted, | |
| 517 // either as low-level asm text or as an ELF binary. | |
| 518 WI_Cfg // A Cfg that needs to be emitted as "readable" assembly. | |
|
JF
2015/02/08 00:29:47
I'm not sure I get the different between asm and c
Jim Stichnoth
2015/02/08 17:11:23
Added more comments that hopefully clarify.
JF
2015/02/08 21:15:04
Yeah, though I'm wary of having a debugging featur
Jim Stichnoth
2015/02/10 07:51:46
I added a report_fatal_error() call to GlobalConte
| |
| 519 }; | |
| 520 // Constructor for a Nop work item. | |
| 521 explicit EmitterWorkItem(uint32_t Seq) | |
| 522 : Sequence(Seq), Kind(WI_Nop), GlobalInits(nullptr), Function(nullptr), | |
| 523 RawFunc(nullptr) {} | |
| 524 // Constructor for a GlobalInits work item. | |
| 525 EmitterWorkItem(uint32_t Seq, VariableDeclarationList *D) | |
| 526 : Sequence(Seq), Kind(WI_GlobalInits), GlobalInits(D), Function(nullptr), | |
| 527 RawFunc(nullptr) {} | |
| 528 // Constructor for an Asm work item. | |
| 529 EmitterWorkItem(uint32_t Seq, Assembler *A) | |
| 530 : Sequence(Seq), Kind(WI_Asm), GlobalInits(nullptr), Function(A), | |
| 531 RawFunc(nullptr) {} | |
| 532 // Constructor for a Cfg work item. | |
| 533 EmitterWorkItem(uint32_t Seq, Cfg *F) | |
| 534 : Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr), | |
| 535 RawFunc(F) {} | |
| 536 uint32_t getSequenceNumber() const { return Sequence; } | |
| 537 ItemKind getKind() const { return Kind; } | |
| 538 VariableDeclarationList *getGlobalInits() const { | |
| 539 assert(getKind() == WI_GlobalInits); | |
| 540 return GlobalInits; | |
| 541 } | |
| 542 Assembler *getAsm() const { | |
| 543 assert(getKind() == WI_Asm); | |
| 544 return Function; | |
| 545 } | |
| 546 Cfg *getCfg() const { | |
| 547 assert(getKind() == WI_Cfg); | |
| 548 return RawFunc; | |
| 549 } | |
| 550 ~EmitterWorkItem(); | |
|
JF
2015/02/08 00:29:47
Define inline, since this should do anything.
Jim Stichnoth
2015/02/08 17:11:23
Tried that originally, but it's getting into icky
JF
2015/02/08 21:15:04
Oh yeah, include order would do that, and unique_p
| |
| 551 | |
| 552 private: | |
| 553 const uint32_t Sequence; | |
| 554 const ItemKind Kind; | |
| 555 VariableDeclarationList *const GlobalInits; | |
| 556 Assembler *const Function; | |
| 557 Cfg *const RawFunc; | |
|
JF
2015/02/08 00:29:47
3 x unique_ptr?
Jim Stichnoth
2015/02/10 07:51:46
Yeah, I think so, after Karl's CL lands...
| |
| 558 }; | |
| 559 | |
| 474 } // end of namespace Ice | 560 } // end of namespace Ice |
| 475 | 561 |
| 476 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 562 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| OLD | NEW |