Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(267)

Side by Side Diff: src/IceGlobalContext.h

Issue 876083007: Subzero: Emit functions and global initializers in a separate thread. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Add comments Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file declares aspects of the compilation that persist across 10 // This file declares aspects of the compilation that persist across
(...skipping 13 matching lines...) Expand all
24 #include "IceIntrinsics.h" 24 #include "IceIntrinsics.h"
25 #include "IceRNG.h" 25 #include "IceRNG.h"
26 #include "IceTimerTree.h" 26 #include "IceTimerTree.h"
27 #include "IceTypes.h" 27 #include "IceTypes.h"
28 #include "IceUtils.h" 28 #include "IceUtils.h"
29 29
30 namespace Ice { 30 namespace Ice {
31 31
32 class ClFlags; 32 class ClFlags;
33 class ConstantPool; 33 class ConstantPool;
34 class EmitterWorkItem;
34 class FuncSigType; 35 class FuncSigType;
35 36
36 // LockedPtr is a way to provide automatically locked access to some object. 37 // LockedPtr is a way to provide automatically locked access to some object.
37 template <typename T> class LockedPtr { 38 template <typename T> class LockedPtr {
38 LockedPtr() = delete; 39 LockedPtr() = delete;
39 LockedPtr(const LockedPtr &) = delete; 40 LockedPtr(const LockedPtr &) = delete;
40 LockedPtr &operator=(const LockedPtr &) = delete; 41 LockedPtr &operator=(const LockedPtr &) = delete;
41 42
42 public: 43 public:
43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { 44 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) {
(...skipping 225 matching lines...) Expand 10 before | Expand all | Expand 10 after
269 void dumpTimers(TimerStackIdT StackID = TSK_Default, 270 void dumpTimers(TimerStackIdT StackID = TSK_Default,
270 bool DumpCumulative = true); 271 bool DumpCumulative = true);
271 // The following methods affect only the calling thread's TLS timer 272 // The following methods affect only the calling thread's TLS timer
272 // data. 273 // data.
273 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); 274 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
274 void pushTimer(TimerIdT ID, TimerStackIdT StackID); 275 void pushTimer(TimerIdT ID, TimerStackIdT StackID);
275 void popTimer(TimerIdT ID, TimerStackIdT StackID); 276 void popTimer(TimerIdT ID, TimerStackIdT StackID);
276 void resetTimer(TimerStackIdT StackID); 277 void resetTimer(TimerStackIdT StackID);
277 void setTimerName(TimerStackIdT StackID, const IceString &NewName); 278 void setTimerName(TimerStackIdT StackID, const IceString &NewName);
278 279
280 // This is the first work item sequence number that the parser
281 // produces, and correspondingly the first sequence number that the
282 // emitter thread will wait for. Start numbering at 1 to leave room
283 // for a sentinel, in case e.g. we wish to inject items with a
284 // special sequence number that may be executed out of order.
285 static uint32_t getFirstSequenceNumber() { return 1; }
279 // Adds a newly parsed and constructed function to the Cfg work 286 // Adds a newly parsed and constructed function to the Cfg work
280 // queue. Notifies any idle workers that a new function is 287 // queue. Notifies any idle workers that a new function is
281 // available for translating. May block if the work queue is too 288 // available for translating. May block if the work queue is too
282 // large, in order to control memory footprint. 289 // large, in order to control memory footprint.
283 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func); 290 void cfgQueueBlockingPush(std::unique_ptr<Cfg> Func);
284 // Takes a Cfg from the work queue for translating. May block if 291 // Takes a Cfg from the work queue for translating. May block if
285 // the work queue is currently empty. Returns nullptr if there is 292 // the work queue is currently empty. Returns nullptr if there is
286 // no more work - the queue is empty and either end() has been 293 // no more work - the queue is empty and either end() has been
287 // called or the Sequential flag was set. 294 // called or the Sequential flag was set.
288 std::unique_ptr<Cfg> cfgQueueBlockingPop(); 295 std::unique_ptr<Cfg> cfgQueueBlockingPop();
289 // Notifies that no more work will be added to the work queue. 296 // Notifies that no more work will be added to the work queue.
290 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } 297 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); }
291 298
299 void emitQueueBlockingPush(EmitterWorkItem *Item);
300 EmitterWorkItem *emitQueueBlockingPop();
301 void emitQueueNotifyEnd() { EmitQ.notifyEnd(); }
302
292 void startWorkerThreads() { 303 void startWorkerThreads() {
293 size_t NumWorkers = getFlags().NumTranslationThreads; 304 size_t NumWorkers = getFlags().NumTranslationThreads;
294 auto Timers = getTimers(); 305 auto Timers = getTimers();
295 for (size_t i = 0; i < NumWorkers; ++i) { 306 for (size_t i = 0; i < NumWorkers; ++i) {
296 ThreadContext *WorkerTLS = new ThreadContext(); 307 ThreadContext *WorkerTLS = new ThreadContext();
297 Timers->initInto(WorkerTLS->Timers); 308 Timers->initInto(WorkerTLS->Timers);
298 AllThreadContexts.push_back(WorkerTLS); 309 AllThreadContexts.push_back(WorkerTLS);
299 TranslationThreads.push_back(std::thread( 310 TranslationThreads.push_back(std::thread(
300 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); 311 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
301 } 312 }
302 if (NumWorkers) { 313 if (NumWorkers) {
303 // TODO(stichnot): start a new thread for the emitter queue worker. 314 ThreadContext *WorkerTLS = new ThreadContext();
315 Timers->initInto(WorkerTLS->Timers);
316 AllThreadContexts.push_back(WorkerTLS);
317 EmitterThreads.push_back(
318 std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS));
304 } 319 }
305 } 320 }
306 321
307 void waitForWorkerThreads() { 322 void waitForWorkerThreads() {
308 cfgQueueNotifyEnd(); 323 cfgQueueNotifyEnd();
309 // TODO(stichnot): call end() on the emitter work queue.
310 for (std::thread &Worker : TranslationThreads) { 324 for (std::thread &Worker : TranslationThreads) {
311 Worker.join(); 325 Worker.join();
312 } 326 }
313 TranslationThreads.clear(); 327 TranslationThreads.clear();
314 // TODO(stichnot): join the emitter thread. 328
329 // Only notify the emit queue to end after all the translation
330 // threads have ended.
331 emitQueueNotifyEnd();
332 for (std::thread &Worker : EmitterThreads) {
333 Worker.join();
334 }
335 EmitterThreads.clear();
336
315 if (ALLOW_DUMP) { 337 if (ALLOW_DUMP) {
316 auto Timers = getTimers(); 338 auto Timers = getTimers();
317 for (ThreadContext *TLS : AllThreadContexts) 339 for (ThreadContext *TLS : AllThreadContexts)
318 Timers->mergeFrom(TLS->Timers); 340 Timers->mergeFrom(TLS->Timers);
319 } 341 }
320 if (ALLOW_DUMP) { 342 if (ALLOW_DUMP) {
321 // Do a separate loop over AllThreadContexts to avoid holding 343 // Do a separate loop over AllThreadContexts to avoid holding
322 // two locks at once. 344 // two locks at once.
323 auto Stats = getStatsCumulative(); 345 auto Stats = getStatsCumulative();
324 for (ThreadContext *TLS : AllThreadContexts) 346 for (ThreadContext *TLS : AllThreadContexts)
325 Stats->add(TLS->StatsCumulative); 347 Stats->add(TLS->StatsCumulative);
326 } 348 }
327 } 349 }
328 350
329 // Translation thread startup routine. 351 // Translation thread startup routine.
330 void translateFunctionsWrapper(ThreadContext *MyTLS) { 352 void translateFunctionsWrapper(ThreadContext *MyTLS) {
331 ICE_TLS_SET_FIELD(TLS, MyTLS); 353 ICE_TLS_SET_FIELD(TLS, MyTLS);
332 translateFunctions(); 354 translateFunctions();
333 } 355 }
334 // Translate functions from the Cfg queue until the queue is empty. 356 // Translate functions from the Cfg queue until the queue is empty.
335 void translateFunctions(); 357 void translateFunctions();
336 358
359 // Emitter thread startup routine.
360 void emitterWrapper(ThreadContext *MyTLS) {
361 ICE_TLS_SET_FIELD(TLS, MyTLS);
362 emitItems();
363 }
364 // Emit functions and global initializers from the emitter queue
365 // until the queue is empty.
366 void emitItems();
367
337 // Utility function to match a symbol name against a match string. 368 // Utility function to match a symbol name against a match string.
338 // This is used in a few cases where we want to take some action on 369 // This is used in a few cases where we want to take some action on
339 // a particular function or symbol based on a command-line argument, 370 // a particular function or symbol based on a command-line argument,
340 // such as changing the verbose level for a particular function. An 371 // such as changing the verbose level for a particular function. An
341 // empty Match argument means match everything. Returns true if 372 // empty Match argument means match everything. Returns true if
342 // there is a match. 373 // there is a match.
343 static bool matchSymbolName(const IceString &SymbolName, 374 static bool matchSymbolName(const IceString &SymbolName,
344 const IceString &Match) { 375 const IceString &Match) {
345 return Match.empty() || Match == SymbolName; 376 return Match.empty() || Match == SymbolName;
346 } 377 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
383 ICE_CACHELINE_BOUNDARY; 414 ICE_CACHELINE_BOUNDARY;
384 415
385 const VerboseMask VMask; 416 const VerboseMask VMask;
386 Intrinsics IntrinsicsInfo; 417 Intrinsics IntrinsicsInfo;
387 const TargetArch Arch; 418 const TargetArch Arch;
388 const OptLevel Opt; 419 const OptLevel Opt;
389 const IceString TestPrefix; 420 const IceString TestPrefix;
390 const ClFlags &Flags; 421 const ClFlags &Flags;
391 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. 422 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg.
392 std::unique_ptr<ELFObjectWriter> ObjectWriter; 423 std::unique_ptr<ELFObjectWriter> ObjectWriter;
393 BoundedProducerConsumerQueue<Cfg> CfgQ; 424 BoundedProducerConsumerQueue<Cfg> CfgQ;
JF 2015/02/08 00:29:47 Maybe this should now be the OptimizationQ?
Jim Stichnoth 2015/02/08 17:11:23 Done.
425 BoundedProducerConsumerQueue<EmitterWorkItem> EmitQ;
394 426
395 LockedPtr<ArenaAllocator<>> getAllocator() { 427 LockedPtr<ArenaAllocator<>> getAllocator() {
396 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); 428 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
397 } 429 }
398 LockedPtr<ConstantPool> getConstPool() { 430 LockedPtr<ConstantPool> getConstPool() {
399 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); 431 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);
400 } 432 }
401 LockedPtr<CodeStats> getStatsCumulative() { 433 LockedPtr<CodeStats> getStatsCumulative() {
402 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); 434 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
403 } 435 }
404 LockedPtr<TimerList> getTimers() { 436 LockedPtr<TimerList> getTimers() {
405 return LockedPtr<TimerList>(&Timers, &TimerLock); 437 return LockedPtr<TimerList>(&Timers, &TimerLock);
406 } 438 }
407 439
408 std::vector<ThreadContext *> AllThreadContexts; 440 std::vector<ThreadContext *> AllThreadContexts;
409 std::vector<std::thread> TranslationThreads; 441 std::vector<std::thread> TranslationThreads;
442 std::vector<std::thread> EmitterThreads;
JF 2015/02/08 00:29:47 These could just be SmallVector or even std::array
Jim Stichnoth 2015/02/08 17:11:23 16 threads? I've forgotten how to count that low.
410 // Each thread has its own TLS pointer which is also held in 443 // Each thread has its own TLS pointer which is also held in
411 // AllThreadContexts. 444 // AllThreadContexts.
412 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); 445 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS);
413 446
414 // Private helpers for mangleName() 447 // Private helpers for mangleName()
415 typedef llvm::SmallVector<char, 32> ManglerVector; 448 typedef llvm::SmallVector<char, 32> ManglerVector;
416 void incrementSubstitutions(ManglerVector &OldName) const; 449 void incrementSubstitutions(ManglerVector &OldName) const;
417 450
418 public: 451 public:
419 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } 452 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
464 OstreamLocker &operator=(const OstreamLocker &) = delete; 497 OstreamLocker &operator=(const OstreamLocker &) = delete;
465 498
466 public: 499 public:
467 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } 500 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }
468 ~OstreamLocker() { Ctx->unlockStr(); } 501 ~OstreamLocker() { Ctx->unlockStr(); }
469 502
470 private: 503 private:
471 GlobalContext *const Ctx; 504 GlobalContext *const Ctx;
472 }; 505 };
473 506
507 class EmitterWorkItem {
JF 2015/02/08 00:29:47 This is getting to be a pretty big file, it's prob
Jim Stichnoth 2015/02/08 17:11:23 Good idea, moved this into the new IceThreading.h,
508 EmitterWorkItem(const EmitterWorkItem &) = delete;
509 EmitterWorkItem &operator=(const EmitterWorkItem &) = delete;
JF 2015/02/08 00:29:47 EmitterWorkItem() = delete;
Jim Stichnoth 2015/02/08 17:11:23 Done.
510
511 public:
512 enum ItemKind {
513 WI_Nop, // Placeholder to maintain sequence numbers in case there
514 // is a translation error.
515 WI_GlobalInits, // A list of global initializers.
516 WI_Asm, // An already-assembled function that needs to be emitted,
517 // either as low-level asm text or as an ELF binary.
518 WI_Cfg // A Cfg that needs to be emitted as "readable" assembly.
JF 2015/02/08 00:29:47 I'm not sure I get the different between asm and c
Jim Stichnoth 2015/02/08 17:11:23 Added more comments that hopefully clarify.
JF 2015/02/08 21:15:04 Yeah, though I'm wary of having a debugging featur
Jim Stichnoth 2015/02/10 07:51:46 I added a report_fatal_error() call to GlobalConte
519 };
520 // Constructor for a Nop work item.
521 explicit EmitterWorkItem(uint32_t Seq)
522 : Sequence(Seq), Kind(WI_Nop), GlobalInits(nullptr), Function(nullptr),
523 RawFunc(nullptr) {}
524 // Constructor for a GlobalInits work item.
525 EmitterWorkItem(uint32_t Seq, VariableDeclarationList *D)
526 : Sequence(Seq), Kind(WI_GlobalInits), GlobalInits(D), Function(nullptr),
527 RawFunc(nullptr) {}
528 // Constructor for an Asm work item.
529 EmitterWorkItem(uint32_t Seq, Assembler *A)
530 : Sequence(Seq), Kind(WI_Asm), GlobalInits(nullptr), Function(A),
531 RawFunc(nullptr) {}
532 // Constructor for a Cfg work item.
533 EmitterWorkItem(uint32_t Seq, Cfg *F)
534 : Sequence(Seq), Kind(WI_Cfg), GlobalInits(nullptr), Function(nullptr),
535 RawFunc(F) {}
536 uint32_t getSequenceNumber() const { return Sequence; }
537 ItemKind getKind() const { return Kind; }
538 VariableDeclarationList *getGlobalInits() const {
539 assert(getKind() == WI_GlobalInits);
540 return GlobalInits;
541 }
542 Assembler *getAsm() const {
543 assert(getKind() == WI_Asm);
544 return Function;
545 }
546 Cfg *getCfg() const {
547 assert(getKind() == WI_Cfg);
548 return RawFunc;
549 }
550 ~EmitterWorkItem();
JF 2015/02/08 00:29:47 Define inline, since this should do anything.
Jim Stichnoth 2015/02/08 17:11:23 Tried that originally, but it's getting into icky
JF 2015/02/08 21:15:04 Oh yeah, include order would do that, and unique_p
551
552 private:
553 const uint32_t Sequence;
554 const ItemKind Kind;
555 VariableDeclarationList *const GlobalInits;
556 Assembler *const Function;
557 Cfg *const RawFunc;
JF 2015/02/08 00:29:47 3 x unique_ptr?
Jim Stichnoth 2015/02/10 07:51:46 Yeah, I think so, after Karl's CL lands...
558 };
559
474 } // end of namespace Ice 560 } // end of namespace Ice
475 561
476 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H 562 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698