Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: src/IceGlobalContext.h

Issue 878383004: Subzero: Fix timers for multithreaded translation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Cleanup Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file declares aspects of the compilation that persist across 10 // This file declares aspects of the compilation that persist across
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
76 void dump(const IceString &Name, Ostream &Str); 76 void dump(const IceString &Name, Ostream &Str);
77 77
78 private: 78 private:
79 uint32_t InstructionsEmitted; 79 uint32_t InstructionsEmitted;
80 uint32_t RegistersSaved; 80 uint32_t RegistersSaved;
81 uint32_t FrameBytes; 81 uint32_t FrameBytes;
82 uint32_t Spills; 82 uint32_t Spills;
83 uint32_t Fills; 83 uint32_t Fills;
84 }; 84 };
85 85
86 // TimerList is a vector of TimerStack objects, with extra methods
87 // to initialize and merge these vectors.
88 class TimerList : public std::vector<TimerStack> {
89 public:
90 // initInto() initializes a target list of timers based on the
91 // current list. In particular, it creates the same number of
92 // timers, in the same order, with the same names, but initially
93 // empty of timing data.
94 void initInto(TimerList &Dest) const {
95 if (!ALLOW_DUMP)
96 return;
97 Dest.clear();
98 for (const TimerStack &Stack : *this) {
99 Dest.push_back(TimerStack(Stack.getName()));
100 }
101 }
102 void mergeFrom(TimerList &Src) {
103 if (!ALLOW_DUMP)
104 return;
105 assert(size() == Src.size());
106 size_type i = 0;
107 for (TimerStack &Stack : *this) {
108 assert(Stack.getName() == Src[i].getName());
109 Stack.mergeFrom(Src[i]);
110 ++i;
111 }
112 }
113 };
114
86 // ThreadContext contains thread-local data. This data can be 115 // ThreadContext contains thread-local data. This data can be
87 // combined/reduced as needed after all threads complete. 116 // combined/reduced as needed after all threads complete.
88 class ThreadContext { 117 class ThreadContext {
89 ThreadContext(const ThreadContext &) = delete; 118 ThreadContext(const ThreadContext &) = delete;
90 ThreadContext &operator=(const ThreadContext &) = delete; 119 ThreadContext &operator=(const ThreadContext &) = delete;
91 120
92 public: 121 public:
93 ThreadContext() {} 122 ThreadContext() {}
94 CodeStats StatsFunction; 123 CodeStats StatsFunction;
95 std::vector<TimerStack> Timers; 124 TimerList Timers;
96 }; 125 };
97 126
98 public: 127 public:
99 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, 128 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer,
100 VerboseMask Mask, TargetArch Arch, OptLevel Opt, 129 VerboseMask Mask, TargetArch Arch, OptLevel Opt,
101 IceString TestPrefix, const ClFlags &Flags); 130 IceString TestPrefix, const ClFlags &Flags);
102 ~GlobalContext(); 131 ~GlobalContext();
103 132
104 VerboseMask getVerbose() const { return VMask; } 133 VerboseMask getVerbose() const { return VMask; }
105 134
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after
204 void statsUpdateFills() { 233 void statsUpdateFills() {
205 if (!ALLOW_DUMP || !getFlags().DumpStats) 234 if (!ALLOW_DUMP || !getFlags().DumpStats)
206 return; 235 return;
207 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); 236 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills();
208 getStatsCumulative()->updateFills(); 237 getStatsCumulative()->updateFills();
209 } 238 }
210 239
211 // These are predefined TimerStackIdT values. 240 // These are predefined TimerStackIdT values.
212 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; 241 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num };
213 242
243 // newTimerStackID() creates a new TimerStack in the global space.
244 // It does not affect any TimerStack objects in TLS.
214 TimerStackIdT newTimerStackID(const IceString &Name); 245 TimerStackIdT newTimerStackID(const IceString &Name);
246 // dumpTimers() dumps the global timer data. As such, one probably
247 // wants to call mergeTimerStacks() as a prerequisite.
248 void dumpTimers(TimerStackIdT StackID = TSK_Default,
249 bool DumpCumulative = true);
250 // The following methods affect only the calling thread's TLS timer
251 // data.
215 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); 252 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);
216 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); 253 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
217 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); 254 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);
218 void resetTimer(TimerStackIdT StackID); 255 void resetTimer(TimerStackIdT StackID);
219 void setTimerName(TimerStackIdT StackID, const IceString &NewName); 256 void setTimerName(TimerStackIdT StackID, const IceString &NewName);
220 void dumpTimers(TimerStackIdT StackID = TSK_Default,
221 bool DumpCumulative = true);
222 257
223 // Adds a newly parsed and constructed function to the Cfg work 258 // Adds a newly parsed and constructed function to the Cfg work
224 // queue. Notifies any idle workers that a new function is 259 // queue. Notifies any idle workers that a new function is
225 // available for translating. May block if the work queue is too 260 // available for translating. May block if the work queue is too
226 // large, in order to control memory footprint. 261 // large, in order to control memory footprint.
227 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } 262 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); }
228 // Takes a Cfg from the work queue for translating. May block if 263 // Takes a Cfg from the work queue for translating. May block if
229 // the work queue is currently empty. Returns nullptr if there is 264 // the work queue is currently empty. Returns nullptr if there is
230 // no more work - the queue is empty and either end() has been 265 // no more work - the queue is empty and either end() has been
231 // called or the Sequential flag was set. 266 // called or the Sequential flag was set.
232 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } 267 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); }
233 // Notifies that no more work will be added to the work queue. 268 // Notifies that no more work will be added to the work queue.
234 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } 269 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); }
235 270
236 void startWorkerThreads() { 271 void startWorkerThreads() {
237 size_t NumWorkers = getFlags().NumTranslationThreads; 272 size_t NumWorkers = getFlags().NumTranslationThreads;
273 auto Timers = getTimers();
238 for (size_t i = 0; i < NumWorkers; ++i) { 274 for (size_t i = 0; i < NumWorkers; ++i) {
239 ThreadContext *WorkerTLS = new ThreadContext(); 275 ThreadContext *WorkerTLS = new ThreadContext();
276 Timers->initInto(WorkerTLS->Timers);
240 AllThreadContexts.push_back(WorkerTLS); 277 AllThreadContexts.push_back(WorkerTLS);
241 TranslationThreads.push_back(std::thread( 278 TranslationThreads.push_back(std::thread(
242 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); 279 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
243 } 280 }
244 if (NumWorkers) { 281 if (NumWorkers) {
245 // TODO(stichnot): start a new thread for the emitter queue worker. 282 // TODO(stichnot): start a new thread for the emitter queue worker.
246 } 283 }
247 } 284 }
248 285
249 void waitForWorkerThreads() { 286 void waitForWorkerThreads() {
250 cfgQueueNotifyEnd(); 287 cfgQueueNotifyEnd();
251 // TODO(stichnot): call end() on the emitter work queue. 288 // TODO(stichnot): call end() on the emitter work queue.
252 for (std::thread &Worker : TranslationThreads) { 289 for (std::thread &Worker : TranslationThreads) {
253 Worker.join(); 290 Worker.join();
254 } 291 }
255 TranslationThreads.clear(); 292 TranslationThreads.clear();
256 // TODO(stichnot): join the emitter thread. 293 // TODO(stichnot): join the emitter thread.
294 if (ALLOW_DUMP) {
295 auto Timers = getTimers();
296 for (ThreadContext *TLS : AllThreadContexts)
297 Timers->mergeFrom(TLS->Timers);
298 }
257 } 299 }
258 300
259 // Translation thread startup routine. 301 // Translation thread startup routine.
260 void translateFunctionsWrapper(ThreadContext *MyTLS) { 302 void translateFunctionsWrapper(ThreadContext *MyTLS) {
261 ICE_TLS_SET_FIELD(TLS, MyTLS); 303 ICE_TLS_SET_FIELD(TLS, MyTLS);
262 translateFunctions(); 304 translateFunctions();
263 } 305 }
264 // Translate functions from the Cfg queue until the queue is empty. 306 // Translate functions from the Cfg queue until the queue is empty.
265 void translateFunctions(); 307 void translateFunctions();
266 308
(...skipping 27 matching lines...) Expand all
294 ErrorCode ErrorStatus; 336 ErrorCode ErrorStatus;
295 337
296 ICE_CACHELINE_BOUNDARY; 338 ICE_CACHELINE_BOUNDARY;
297 // Managed by getStatsCumulative() 339 // Managed by getStatsCumulative()
298 GlobalLockType StatsLock; 340 GlobalLockType StatsLock;
299 CodeStats StatsCumulative; 341 CodeStats StatsCumulative;
300 342
301 ICE_CACHELINE_BOUNDARY; 343 ICE_CACHELINE_BOUNDARY;
302 // Managed by getTimers() 344 // Managed by getTimers()
303 GlobalLockType TimerLock; 345 GlobalLockType TimerLock;
304 std::vector<TimerStack> Timers; 346 TimerList Timers;
305 347
306 ICE_CACHELINE_BOUNDARY; 348 ICE_CACHELINE_BOUNDARY;
307 // StrLock is a global lock on the dump and emit output streams. 349 // StrLock is a global lock on the dump and emit output streams.
308 typedef std::mutex StrLockType; 350 typedef std::mutex StrLockType;
309 StrLockType StrLock; 351 StrLockType StrLock;
310 Ostream *StrDump; // Stream for dumping / diagnostics 352 Ostream *StrDump; // Stream for dumping / diagnostics
311 Ostream *StrEmit; // Stream for code emission 353 Ostream *StrEmit; // Stream for code emission
312 354
313 ICE_CACHELINE_BOUNDARY; 355 ICE_CACHELINE_BOUNDARY;
314 356
315 const VerboseMask VMask; 357 const VerboseMask VMask;
316 Intrinsics IntrinsicsInfo; 358 Intrinsics IntrinsicsInfo;
317 const TargetArch Arch; 359 const TargetArch Arch;
318 const OptLevel Opt; 360 const OptLevel Opt;
319 const IceString TestPrefix; 361 const IceString TestPrefix;
320 const ClFlags &Flags; 362 const ClFlags &Flags;
321 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. 363 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg.
322 std::unique_ptr<ELFObjectWriter> ObjectWriter; 364 std::unique_ptr<ELFObjectWriter> ObjectWriter;
323 BoundedProducerConsumerQueue<Cfg> CfgQ; 365 BoundedProducerConsumerQueue<Cfg> CfgQ;
324 366
325 LockedPtr<ArenaAllocator<>> getAllocator() { 367 LockedPtr<ArenaAllocator<>> getAllocator() {
326 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); 368 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
327 } 369 }
328 LockedPtr<ConstantPool> getConstPool() { 370 LockedPtr<ConstantPool> getConstPool() {
329 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); 371 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);
330 } 372 }
331 LockedPtr<CodeStats> getStatsCumulative() { 373 LockedPtr<CodeStats> getStatsCumulative() {
332 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); 374 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);
333 } 375 }
334 LockedPtr<std::vector<TimerStack>> getTimers() { 376 LockedPtr<TimerList> getTimers() {
335 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); 377 return LockedPtr<TimerList>(&Timers, &TimerLock);
336 } 378 }
337 379
338 std::vector<ThreadContext *> AllThreadContexts; 380 std::vector<ThreadContext *> AllThreadContexts;
339 std::vector<std::thread> TranslationThreads; 381 std::vector<std::thread> TranslationThreads;
340 // Each thread has its own TLS pointer which is also held in 382 // Each thread has its own TLS pointer which is also held in
341 // AllThreadContexts. 383 // AllThreadContexts.
342 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); 384 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS);
343 385
344 // Private helpers for mangleName() 386 // Private helpers for mangleName()
345 typedef llvm::SmallVector<char, 32> ManglerVector; 387 typedef llvm::SmallVector<char, 32> ManglerVector;
346 void incrementSubstitutions(ManglerVector &OldName) const; 388 void incrementSubstitutions(ManglerVector &OldName) const;
347 389
348 public: 390 public:
349 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } 391 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
350 }; 392 };
351 393
352 // Helper class to push and pop a timer marker. The constructor 394 // Helper class to push and pop a timer marker. The constructor
353 // pushes a marker, and the destructor pops it. This is for 395 // pushes a marker, and the destructor pops it. This is for
354 // convenient timing of regions of code. 396 // convenient timing of regions of code.
355 class TimerMarker { 397 class TimerMarker {
356 TimerMarker(const TimerMarker &) = delete; 398 TimerMarker(const TimerMarker &) = delete;
357 TimerMarker &operator=(const TimerMarker &) = delete; 399 TimerMarker &operator=(const TimerMarker &) = delete;
358 400
359 public: 401 public:
360 TimerMarker(TimerIdT ID, GlobalContext *Ctx) 402 TimerMarker(TimerIdT ID, GlobalContext *Ctx,
361 : ID(ID), Ctx(Ctx), Active(false) { 403 TimerStackIdT StackID = GlobalContext::TSK_Default)
404 : ID(ID), Ctx(Ctx), StackID(StackID), Active(false) {
jvoung (off chromium) 2015/01/30 18:42:56 I wonder if this ctor should be defined out of lin
Jim Stichnoth 2015/01/30 20:22:25 I originally wanted both ctors (and the dtor) to b
jvoung (off chromium) 2015/01/30 21:04:44 I see okay.
362 if (ALLOW_DUMP) { 405 if (ALLOW_DUMP) {
363 Active = Ctx->getFlags().SubzeroTimingEnabled; 406 switch (StackID) {
407 case GlobalContext::TSK_Default:
408 Active = Ctx->getFlags().SubzeroTimingEnabled;
409 break;
410 case GlobalContext::TSK_Funcs:
411 Active = Ctx->getFlags().TimeEachFunction;
412 default:
413 break;
414 }
364 if (Active) 415 if (Active)
365 Ctx->pushTimer(ID); 416 Ctx->pushTimer(ID, StackID);
366 } 417 }
367 } 418 }
368 TimerMarker(TimerIdT ID, const Cfg *Func); 419 TimerMarker(TimerIdT ID, const Cfg *Func,
420 TimerStackIdT StackID = GlobalContext::TSK_Default);
369 421
370 ~TimerMarker() { 422 ~TimerMarker() {
371 if (ALLOW_DUMP && Active) 423 if (ALLOW_DUMP && Active)
372 Ctx->popTimer(ID); 424 Ctx->popTimer(ID, StackID);
373 } 425 }
374 426
375 private: 427 private:
376 TimerIdT ID; 428 const TimerIdT ID;
377 GlobalContext *const Ctx; 429 GlobalContext *const Ctx;
430 const TimerStackIdT StackID;
378 bool Active; 431 bool Active;
379 }; 432 };
380 433
381 // Helper class for locking the streams and then automatically 434 // Helper class for locking the streams and then automatically
382 // unlocking them. 435 // unlocking them.
383 class OstreamLocker { 436 class OstreamLocker {
384 private: 437 private:
385 OstreamLocker() = delete; 438 OstreamLocker() = delete;
386 OstreamLocker(const OstreamLocker &) = delete; 439 OstreamLocker(const OstreamLocker &) = delete;
387 OstreamLocker &operator=(const OstreamLocker &) = delete; 440 OstreamLocker &operator=(const OstreamLocker &) = delete;
388 441
389 public: 442 public:
390 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } 443 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }
391 ~OstreamLocker() { Ctx->unlockStr(); } 444 ~OstreamLocker() { Ctx->unlockStr(); }
392 445
393 private: 446 private:
394 GlobalContext *const Ctx; 447 GlobalContext *const Ctx;
395 }; 448 };
396 449
397 } // end of namespace Ice 450 } // end of namespace Ice
398 451
399 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H 452 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H
OLDNEW
« no previous file with comments | « src/IceCfg.cpp ('k') | src/IceGlobalContext.cpp » ('j') | src/IceGlobalContext.cpp » ('J')

Powered by Google App Engine
This is Rietveld 408576698