OLD | NEW |
---|---|
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
76 void dump(const IceString &Name, Ostream &Str); | 76 void dump(const IceString &Name, Ostream &Str); |
77 | 77 |
78 private: | 78 private: |
79 uint32_t InstructionsEmitted; | 79 uint32_t InstructionsEmitted; |
80 uint32_t RegistersSaved; | 80 uint32_t RegistersSaved; |
81 uint32_t FrameBytes; | 81 uint32_t FrameBytes; |
82 uint32_t Spills; | 82 uint32_t Spills; |
83 uint32_t Fills; | 83 uint32_t Fills; |
84 }; | 84 }; |
85 | 85 |
86 // TimerList is a vector of TimerStack objects, with extra methods | |
87 // to initialize and merge these vectors. | |
88 class TimerList : public std::vector<TimerStack> { | |
89 public: | |
90 // initInto() initializes a target list of timers based on the | |
91 // current list. In particular, it creates the same number of | |
92 // timers, in the same order, with the same names, but initially | |
93 // empty of timing data. | |
94 void initInto(TimerList &Dest) const { | |
95 if (!ALLOW_DUMP) | |
96 return; | |
97 Dest.clear(); | |
98 for (const TimerStack &Stack : *this) { | |
99 Dest.push_back(TimerStack(Stack.getName())); | |
100 } | |
101 } | |
102 void mergeFrom(TimerList &Src) { | |
103 if (!ALLOW_DUMP) | |
104 return; | |
105 assert(size() == Src.size()); | |
106 size_type i = 0; | |
107 for (TimerStack &Stack : *this) { | |
108 assert(Stack.getName() == Src[i].getName()); | |
109 Stack.mergeFrom(Src[i]); | |
110 ++i; | |
111 } | |
112 } | |
113 }; | |
114 | |
86 // ThreadContext contains thread-local data. This data can be | 115 // ThreadContext contains thread-local data. This data can be |
87 // combined/reduced as needed after all threads complete. | 116 // combined/reduced as needed after all threads complete. |
88 class ThreadContext { | 117 class ThreadContext { |
89 ThreadContext(const ThreadContext &) = delete; | 118 ThreadContext(const ThreadContext &) = delete; |
90 ThreadContext &operator=(const ThreadContext &) = delete; | 119 ThreadContext &operator=(const ThreadContext &) = delete; |
91 | 120 |
92 public: | 121 public: |
93 ThreadContext() {} | 122 ThreadContext() {} |
94 CodeStats StatsFunction; | 123 CodeStats StatsFunction; |
95 std::vector<TimerStack> Timers; | 124 TimerList Timers; |
96 }; | 125 }; |
97 | 126 |
98 public: | 127 public: |
99 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 128 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
100 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 129 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
101 IceString TestPrefix, const ClFlags &Flags); | 130 IceString TestPrefix, const ClFlags &Flags); |
102 ~GlobalContext(); | 131 ~GlobalContext(); |
103 | 132 |
104 VerboseMask getVerbose() const { return VMask; } | 133 VerboseMask getVerbose() const { return VMask; } |
105 | 134 |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
204 void statsUpdateFills() { | 233 void statsUpdateFills() { |
205 if (!ALLOW_DUMP || !getFlags().DumpStats) | 234 if (!ALLOW_DUMP || !getFlags().DumpStats) |
206 return; | 235 return; |
207 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); | 236 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); |
208 getStatsCumulative()->updateFills(); | 237 getStatsCumulative()->updateFills(); |
209 } | 238 } |
210 | 239 |
211 // These are predefined TimerStackIdT values. | 240 // These are predefined TimerStackIdT values. |
212 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; | 241 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; |
213 | 242 |
243 // newTimerStackID() creates a new TimerStack in the global space. | |
244 // It does not affect any TimerStack objects in TLS. | |
214 TimerStackIdT newTimerStackID(const IceString &Name); | 245 TimerStackIdT newTimerStackID(const IceString &Name); |
246 // dumpTimers() dumps the global timer data. As such, one probably | |
247 // wants to call mergeTimerStacks() as a prerequisite. | |
248 void dumpTimers(TimerStackIdT StackID = TSK_Default, | |
249 bool DumpCumulative = true); | |
250 // The following methods affect only the calling thread's TLS timer | |
251 // data. | |
215 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 252 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
216 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 253 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
217 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 254 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
218 void resetTimer(TimerStackIdT StackID); | 255 void resetTimer(TimerStackIdT StackID); |
219 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 256 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
220 void dumpTimers(TimerStackIdT StackID = TSK_Default, | |
221 bool DumpCumulative = true); | |
222 | 257 |
223 // Adds a newly parsed and constructed function to the Cfg work | 258 // Adds a newly parsed and constructed function to the Cfg work |
224 // queue. Notifies any idle workers that a new function is | 259 // queue. Notifies any idle workers that a new function is |
225 // available for translating. May block if the work queue is too | 260 // available for translating. May block if the work queue is too |
226 // large, in order to control memory footprint. | 261 // large, in order to control memory footprint. |
227 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } | 262 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } |
228 // Takes a Cfg from the work queue for translating. May block if | 263 // Takes a Cfg from the work queue for translating. May block if |
229 // the work queue is currently empty. Returns nullptr if there is | 264 // the work queue is currently empty. Returns nullptr if there is |
230 // no more work - the queue is empty and either end() has been | 265 // no more work - the queue is empty and either end() has been |
231 // called or the Sequential flag was set. | 266 // called or the Sequential flag was set. |
232 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } | 267 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } |
233 // Notifies that no more work will be added to the work queue. | 268 // Notifies that no more work will be added to the work queue. |
234 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } | 269 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } |
235 | 270 |
236 void startWorkerThreads() { | 271 void startWorkerThreads() { |
237 size_t NumWorkers = getFlags().NumTranslationThreads; | 272 size_t NumWorkers = getFlags().NumTranslationThreads; |
273 auto Timers = getTimers(); | |
238 for (size_t i = 0; i < NumWorkers; ++i) { | 274 for (size_t i = 0; i < NumWorkers; ++i) { |
239 ThreadContext *WorkerTLS = new ThreadContext(); | 275 ThreadContext *WorkerTLS = new ThreadContext(); |
276 Timers->initInto(WorkerTLS->Timers); | |
240 AllThreadContexts.push_back(WorkerTLS); | 277 AllThreadContexts.push_back(WorkerTLS); |
241 TranslationThreads.push_back(std::thread( | 278 TranslationThreads.push_back(std::thread( |
242 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | 279 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
243 } | 280 } |
244 if (NumWorkers) { | 281 if (NumWorkers) { |
245 // TODO(stichnot): start a new thread for the emitter queue worker. | 282 // TODO(stichnot): start a new thread for the emitter queue worker. |
246 } | 283 } |
247 } | 284 } |
248 | 285 |
249 void waitForWorkerThreads() { | 286 void waitForWorkerThreads() { |
250 cfgQueueNotifyEnd(); | 287 cfgQueueNotifyEnd(); |
251 // TODO(stichnot): call end() on the emitter work queue. | 288 // TODO(stichnot): call end() on the emitter work queue. |
252 for (std::thread &Worker : TranslationThreads) { | 289 for (std::thread &Worker : TranslationThreads) { |
253 Worker.join(); | 290 Worker.join(); |
254 } | 291 } |
255 TranslationThreads.clear(); | 292 TranslationThreads.clear(); |
256 // TODO(stichnot): join the emitter thread. | 293 // TODO(stichnot): join the emitter thread. |
294 if (ALLOW_DUMP) { | |
295 auto Timers = getTimers(); | |
296 for (ThreadContext *TLS : AllThreadContexts) | |
297 Timers->mergeFrom(TLS->Timers); | |
298 } | |
257 } | 299 } |
258 | 300 |
259 // Translation thread startup routine. | 301 // Translation thread startup routine. |
260 void translateFunctionsWrapper(ThreadContext *MyTLS) { | 302 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
261 ICE_TLS_SET_FIELD(TLS, MyTLS); | 303 ICE_TLS_SET_FIELD(TLS, MyTLS); |
262 translateFunctions(); | 304 translateFunctions(); |
263 } | 305 } |
264 // Translate functions from the Cfg queue until the queue is empty. | 306 // Translate functions from the Cfg queue until the queue is empty. |
265 void translateFunctions(); | 307 void translateFunctions(); |
266 | 308 |
(...skipping 27 matching lines...) Expand all Loading... | |
294 ErrorCode ErrorStatus; | 336 ErrorCode ErrorStatus; |
295 | 337 |
296 ICE_CACHELINE_BOUNDARY; | 338 ICE_CACHELINE_BOUNDARY; |
297 // Managed by getStatsCumulative() | 339 // Managed by getStatsCumulative() |
298 GlobalLockType StatsLock; | 340 GlobalLockType StatsLock; |
299 CodeStats StatsCumulative; | 341 CodeStats StatsCumulative; |
300 | 342 |
301 ICE_CACHELINE_BOUNDARY; | 343 ICE_CACHELINE_BOUNDARY; |
302 // Managed by getTimers() | 344 // Managed by getTimers() |
303 GlobalLockType TimerLock; | 345 GlobalLockType TimerLock; |
304 std::vector<TimerStack> Timers; | 346 TimerList Timers; |
305 | 347 |
306 ICE_CACHELINE_BOUNDARY; | 348 ICE_CACHELINE_BOUNDARY; |
307 // StrLock is a global lock on the dump and emit output streams. | 349 // StrLock is a global lock on the dump and emit output streams. |
308 typedef std::mutex StrLockType; | 350 typedef std::mutex StrLockType; |
309 StrLockType StrLock; | 351 StrLockType StrLock; |
310 Ostream *StrDump; // Stream for dumping / diagnostics | 352 Ostream *StrDump; // Stream for dumping / diagnostics |
311 Ostream *StrEmit; // Stream for code emission | 353 Ostream *StrEmit; // Stream for code emission |
312 | 354 |
313 ICE_CACHELINE_BOUNDARY; | 355 ICE_CACHELINE_BOUNDARY; |
314 | 356 |
315 const VerboseMask VMask; | 357 const VerboseMask VMask; |
316 Intrinsics IntrinsicsInfo; | 358 Intrinsics IntrinsicsInfo; |
317 const TargetArch Arch; | 359 const TargetArch Arch; |
318 const OptLevel Opt; | 360 const OptLevel Opt; |
319 const IceString TestPrefix; | 361 const IceString TestPrefix; |
320 const ClFlags &Flags; | 362 const ClFlags &Flags; |
321 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. | 363 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
322 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 364 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
323 BoundedProducerConsumerQueue<Cfg> CfgQ; | 365 BoundedProducerConsumerQueue<Cfg> CfgQ; |
324 | 366 |
325 LockedPtr<ArenaAllocator<>> getAllocator() { | 367 LockedPtr<ArenaAllocator<>> getAllocator() { |
326 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 368 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
327 } | 369 } |
328 LockedPtr<ConstantPool> getConstPool() { | 370 LockedPtr<ConstantPool> getConstPool() { |
329 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 371 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
330 } | 372 } |
331 LockedPtr<CodeStats> getStatsCumulative() { | 373 LockedPtr<CodeStats> getStatsCumulative() { |
332 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 374 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
333 } | 375 } |
334 LockedPtr<std::vector<TimerStack>> getTimers() { | 376 LockedPtr<TimerList> getTimers() { |
335 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 377 return LockedPtr<TimerList>(&Timers, &TimerLock); |
336 } | 378 } |
337 | 379 |
338 std::vector<ThreadContext *> AllThreadContexts; | 380 std::vector<ThreadContext *> AllThreadContexts; |
339 std::vector<std::thread> TranslationThreads; | 381 std::vector<std::thread> TranslationThreads; |
340 // Each thread has its own TLS pointer which is also held in | 382 // Each thread has its own TLS pointer which is also held in |
341 // AllThreadContexts. | 383 // AllThreadContexts. |
342 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 384 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
343 | 385 |
344 // Private helpers for mangleName() | 386 // Private helpers for mangleName() |
345 typedef llvm::SmallVector<char, 32> ManglerVector; | 387 typedef llvm::SmallVector<char, 32> ManglerVector; |
346 void incrementSubstitutions(ManglerVector &OldName) const; | 388 void incrementSubstitutions(ManglerVector &OldName) const; |
347 | 389 |
348 public: | 390 public: |
349 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 391 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
350 }; | 392 }; |
351 | 393 |
352 // Helper class to push and pop a timer marker. The constructor | 394 // Helper class to push and pop a timer marker. The constructor |
353 // pushes a marker, and the destructor pops it. This is for | 395 // pushes a marker, and the destructor pops it. This is for |
354 // convenient timing of regions of code. | 396 // convenient timing of regions of code. |
355 class TimerMarker { | 397 class TimerMarker { |
356 TimerMarker(const TimerMarker &) = delete; | 398 TimerMarker(const TimerMarker &) = delete; |
357 TimerMarker &operator=(const TimerMarker &) = delete; | 399 TimerMarker &operator=(const TimerMarker &) = delete; |
358 | 400 |
359 public: | 401 public: |
360 TimerMarker(TimerIdT ID, GlobalContext *Ctx) | 402 TimerMarker(TimerIdT ID, GlobalContext *Ctx, |
361 : ID(ID), Ctx(Ctx), Active(false) { | 403 TimerStackIdT StackID = GlobalContext::TSK_Default) |
404 : ID(ID), Ctx(Ctx), StackID(StackID), Active(false) { | |
jvoung (off chromium)
2015/01/30 18:42:56
I wonder if this ctor should be defined out of lin
Jim Stichnoth
2015/01/30 20:22:25
I originally wanted both ctors (and the dtor) to b
jvoung (off chromium)
2015/01/30 21:04:44
I see okay.
| |
362 if (ALLOW_DUMP) { | 405 if (ALLOW_DUMP) { |
363 Active = Ctx->getFlags().SubzeroTimingEnabled; | 406 switch (StackID) { |
407 case GlobalContext::TSK_Default: | |
408 Active = Ctx->getFlags().SubzeroTimingEnabled; | |
409 break; | |
410 case GlobalContext::TSK_Funcs: | |
411 Active = Ctx->getFlags().TimeEachFunction; | |
412 default: | |
413 break; | |
414 } | |
364 if (Active) | 415 if (Active) |
365 Ctx->pushTimer(ID); | 416 Ctx->pushTimer(ID, StackID); |
366 } | 417 } |
367 } | 418 } |
368 TimerMarker(TimerIdT ID, const Cfg *Func); | 419 TimerMarker(TimerIdT ID, const Cfg *Func, |
420 TimerStackIdT StackID = GlobalContext::TSK_Default); | |
369 | 421 |
370 ~TimerMarker() { | 422 ~TimerMarker() { |
371 if (ALLOW_DUMP && Active) | 423 if (ALLOW_DUMP && Active) |
372 Ctx->popTimer(ID); | 424 Ctx->popTimer(ID, StackID); |
373 } | 425 } |
374 | 426 |
375 private: | 427 private: |
376 TimerIdT ID; | 428 const TimerIdT ID; |
377 GlobalContext *const Ctx; | 429 GlobalContext *const Ctx; |
430 const TimerStackIdT StackID; | |
378 bool Active; | 431 bool Active; |
379 }; | 432 }; |
380 | 433 |
381 // Helper class for locking the streams and then automatically | 434 // Helper class for locking the streams and then automatically |
382 // unlocking them. | 435 // unlocking them. |
383 class OstreamLocker { | 436 class OstreamLocker { |
384 private: | 437 private: |
385 OstreamLocker() = delete; | 438 OstreamLocker() = delete; |
386 OstreamLocker(const OstreamLocker &) = delete; | 439 OstreamLocker(const OstreamLocker &) = delete; |
387 OstreamLocker &operator=(const OstreamLocker &) = delete; | 440 OstreamLocker &operator=(const OstreamLocker &) = delete; |
388 | 441 |
389 public: | 442 public: |
390 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 443 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
391 ~OstreamLocker() { Ctx->unlockStr(); } | 444 ~OstreamLocker() { Ctx->unlockStr(); } |
392 | 445 |
393 private: | 446 private: |
394 GlobalContext *const Ctx; | 447 GlobalContext *const Ctx; |
395 }; | 448 }; |
396 | 449 |
397 } // end of namespace Ice | 450 } // end of namespace Ice |
398 | 451 |
399 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 452 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |