OLD | NEW |
1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
76 void dump(const IceString &Name, Ostream &Str); | 76 void dump(const IceString &Name, Ostream &Str); |
77 | 77 |
78 private: | 78 private: |
79 uint32_t InstructionsEmitted; | 79 uint32_t InstructionsEmitted; |
80 uint32_t RegistersSaved; | 80 uint32_t RegistersSaved; |
81 uint32_t FrameBytes; | 81 uint32_t FrameBytes; |
82 uint32_t Spills; | 82 uint32_t Spills; |
83 uint32_t Fills; | 83 uint32_t Fills; |
84 }; | 84 }; |
85 | 85 |
| 86 // TimerList is a vector of TimerStack objects, with extra methods |
| 87 // to initialize and merge these vectors. |
| 88 class TimerList : public std::vector<TimerStack> { |
| 89 public: |
| 90 // initInto() initializes a target list of timers based on the |
| 91 // current list. In particular, it creates the same number of |
| 92 // timers, in the same order, with the same names, but initially |
| 93 // empty of timing data. |
| 94 void initInto(TimerList &Dest) const { |
| 95 if (!ALLOW_DUMP) |
| 96 return; |
| 97 Dest.clear(); |
| 98 for (const TimerStack &Stack : *this) { |
| 99 Dest.push_back(TimerStack(Stack.getName())); |
| 100 } |
| 101 } |
| 102 void mergeFrom(TimerList &Src) { |
| 103 if (!ALLOW_DUMP) |
| 104 return; |
| 105 assert(size() == Src.size()); |
| 106 size_type i = 0; |
| 107 for (TimerStack &Stack : *this) { |
| 108 assert(Stack.getName() == Src[i].getName()); |
| 109 Stack.mergeFrom(Src[i]); |
| 110 ++i; |
| 111 } |
| 112 } |
| 113 }; |
| 114 |
86 // ThreadContext contains thread-local data. This data can be | 115 // ThreadContext contains thread-local data. This data can be |
87 // combined/reduced as needed after all threads complete. | 116 // combined/reduced as needed after all threads complete. |
88 class ThreadContext { | 117 class ThreadContext { |
89 ThreadContext(const ThreadContext &) = delete; | 118 ThreadContext(const ThreadContext &) = delete; |
90 ThreadContext &operator=(const ThreadContext &) = delete; | 119 ThreadContext &operator=(const ThreadContext &) = delete; |
91 | 120 |
92 public: | 121 public: |
93 ThreadContext() {} | 122 ThreadContext() {} |
94 CodeStats StatsFunction; | 123 CodeStats StatsFunction; |
95 std::vector<TimerStack> Timers; | 124 TimerList Timers; |
96 }; | 125 }; |
97 | 126 |
98 public: | 127 public: |
99 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 128 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
100 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 129 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
101 IceString TestPrefix, const ClFlags &Flags); | 130 IceString TestPrefix, const ClFlags &Flags); |
102 ~GlobalContext(); | 131 ~GlobalContext(); |
103 | 132 |
104 VerboseMask getVerbose() const { return VMask; } | 133 VerboseMask getVerbose() const { return VMask; } |
105 | 134 |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
204 void statsUpdateFills() { | 233 void statsUpdateFills() { |
205 if (!ALLOW_DUMP || !getFlags().DumpStats) | 234 if (!ALLOW_DUMP || !getFlags().DumpStats) |
206 return; | 235 return; |
207 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); | 236 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); |
208 getStatsCumulative()->updateFills(); | 237 getStatsCumulative()->updateFills(); |
209 } | 238 } |
210 | 239 |
211 // These are predefined TimerStackIdT values. | 240 // These are predefined TimerStackIdT values. |
212 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; | 241 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; |
213 | 242 |
| 243 // newTimerStackID() creates a new TimerStack in the global space. |
| 244 // It does not affect any TimerStack objects in TLS. |
214 TimerStackIdT newTimerStackID(const IceString &Name); | 245 TimerStackIdT newTimerStackID(const IceString &Name); |
| 246 // dumpTimers() dumps the global timer data. As such, one probably |
| 247 // wants to call mergeTimerStacks() as a prerequisite. |
| 248 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
| 249 bool DumpCumulative = true); |
| 250 // The following methods affect only the calling thread's TLS timer |
| 251 // data. |
215 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 252 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
216 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 253 void pushTimer(TimerIdT ID, TimerStackIdT StackID); |
217 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 254 void popTimer(TimerIdT ID, TimerStackIdT StackID); |
218 void resetTimer(TimerStackIdT StackID); | 255 void resetTimer(TimerStackIdT StackID); |
219 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 256 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
220 void dumpTimers(TimerStackIdT StackID = TSK_Default, | |
221 bool DumpCumulative = true); | |
222 | 257 |
223 // Adds a newly parsed and constructed function to the Cfg work | 258 // Adds a newly parsed and constructed function to the Cfg work |
224 // queue. Notifies any idle workers that a new function is | 259 // queue. Notifies any idle workers that a new function is |
225 // available for translating. May block if the work queue is too | 260 // available for translating. May block if the work queue is too |
226 // large, in order to control memory footprint. | 261 // large, in order to control memory footprint. |
227 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } | 262 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } |
228 // Takes a Cfg from the work queue for translating. May block if | 263 // Takes a Cfg from the work queue for translating. May block if |
229 // the work queue is currently empty. Returns nullptr if there is | 264 // the work queue is currently empty. Returns nullptr if there is |
230 // no more work - the queue is empty and either end() has been | 265 // no more work - the queue is empty and either end() has been |
231 // called or the Sequential flag was set. | 266 // called or the Sequential flag was set. |
232 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } | 267 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } |
233 // Notifies that no more work will be added to the work queue. | 268 // Notifies that no more work will be added to the work queue. |
234 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } | 269 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } |
235 | 270 |
236 void startWorkerThreads() { | 271 void startWorkerThreads() { |
237 size_t NumWorkers = getFlags().NumTranslationThreads; | 272 size_t NumWorkers = getFlags().NumTranslationThreads; |
| 273 auto Timers = getTimers(); |
238 for (size_t i = 0; i < NumWorkers; ++i) { | 274 for (size_t i = 0; i < NumWorkers; ++i) { |
239 ThreadContext *WorkerTLS = new ThreadContext(); | 275 ThreadContext *WorkerTLS = new ThreadContext(); |
| 276 Timers->initInto(WorkerTLS->Timers); |
240 AllThreadContexts.push_back(WorkerTLS); | 277 AllThreadContexts.push_back(WorkerTLS); |
241 TranslationThreads.push_back(std::thread( | 278 TranslationThreads.push_back(std::thread( |
242 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | 279 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
243 } | 280 } |
244 if (NumWorkers) { | 281 if (NumWorkers) { |
245 // TODO(stichnot): start a new thread for the emitter queue worker. | 282 // TODO(stichnot): start a new thread for the emitter queue worker. |
246 } | 283 } |
247 } | 284 } |
248 | 285 |
249 void waitForWorkerThreads() { | 286 void waitForWorkerThreads() { |
250 cfgQueueNotifyEnd(); | 287 cfgQueueNotifyEnd(); |
251 // TODO(stichnot): call end() on the emitter work queue. | 288 // TODO(stichnot): call end() on the emitter work queue. |
252 for (std::thread &Worker : TranslationThreads) { | 289 for (std::thread &Worker : TranslationThreads) { |
253 Worker.join(); | 290 Worker.join(); |
254 } | 291 } |
255 TranslationThreads.clear(); | 292 TranslationThreads.clear(); |
256 // TODO(stichnot): join the emitter thread. | 293 // TODO(stichnot): join the emitter thread. |
| 294 if (ALLOW_DUMP) { |
| 295 auto Timers = getTimers(); |
| 296 for (ThreadContext *TLS : AllThreadContexts) |
| 297 Timers->mergeFrom(TLS->Timers); |
| 298 } |
257 } | 299 } |
258 | 300 |
259 // Translation thread startup routine. | 301 // Translation thread startup routine. |
260 void translateFunctionsWrapper(ThreadContext *MyTLS) { | 302 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
261 ICE_TLS_SET_FIELD(TLS, MyTLS); | 303 ICE_TLS_SET_FIELD(TLS, MyTLS); |
262 translateFunctions(); | 304 translateFunctions(); |
263 } | 305 } |
264 // Translate functions from the Cfg queue until the queue is empty. | 306 // Translate functions from the Cfg queue until the queue is empty. |
265 void translateFunctions(); | 307 void translateFunctions(); |
266 | 308 |
(...skipping 27 matching lines...) Expand all Loading... |
294 ErrorCode ErrorStatus; | 336 ErrorCode ErrorStatus; |
295 | 337 |
296 ICE_CACHELINE_BOUNDARY; | 338 ICE_CACHELINE_BOUNDARY; |
297 // Managed by getStatsCumulative() | 339 // Managed by getStatsCumulative() |
298 GlobalLockType StatsLock; | 340 GlobalLockType StatsLock; |
299 CodeStats StatsCumulative; | 341 CodeStats StatsCumulative; |
300 | 342 |
301 ICE_CACHELINE_BOUNDARY; | 343 ICE_CACHELINE_BOUNDARY; |
302 // Managed by getTimers() | 344 // Managed by getTimers() |
303 GlobalLockType TimerLock; | 345 GlobalLockType TimerLock; |
304 std::vector<TimerStack> Timers; | 346 TimerList Timers; |
305 | 347 |
306 ICE_CACHELINE_BOUNDARY; | 348 ICE_CACHELINE_BOUNDARY; |
307 // StrLock is a global lock on the dump and emit output streams. | 349 // StrLock is a global lock on the dump and emit output streams. |
308 typedef std::mutex StrLockType; | 350 typedef std::mutex StrLockType; |
309 StrLockType StrLock; | 351 StrLockType StrLock; |
310 Ostream *StrDump; // Stream for dumping / diagnostics | 352 Ostream *StrDump; // Stream for dumping / diagnostics |
311 Ostream *StrEmit; // Stream for code emission | 353 Ostream *StrEmit; // Stream for code emission |
312 | 354 |
313 ICE_CACHELINE_BOUNDARY; | 355 ICE_CACHELINE_BOUNDARY; |
314 | 356 |
315 const VerboseMask VMask; | 357 const VerboseMask VMask; |
316 Intrinsics IntrinsicsInfo; | 358 Intrinsics IntrinsicsInfo; |
317 const TargetArch Arch; | 359 const TargetArch Arch; |
318 const OptLevel Opt; | 360 const OptLevel Opt; |
319 const IceString TestPrefix; | 361 const IceString TestPrefix; |
320 const ClFlags &Flags; | 362 const ClFlags &Flags; |
321 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. | 363 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
322 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 364 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
323 BoundedProducerConsumerQueue<Cfg> CfgQ; | 365 BoundedProducerConsumerQueue<Cfg> CfgQ; |
324 | 366 |
325 LockedPtr<ArenaAllocator<>> getAllocator() { | 367 LockedPtr<ArenaAllocator<>> getAllocator() { |
326 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 368 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
327 } | 369 } |
328 LockedPtr<ConstantPool> getConstPool() { | 370 LockedPtr<ConstantPool> getConstPool() { |
329 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 371 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
330 } | 372 } |
331 LockedPtr<CodeStats> getStatsCumulative() { | 373 LockedPtr<CodeStats> getStatsCumulative() { |
332 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 374 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
333 } | 375 } |
334 LockedPtr<std::vector<TimerStack>> getTimers() { | 376 LockedPtr<TimerList> getTimers() { |
335 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 377 return LockedPtr<TimerList>(&Timers, &TimerLock); |
336 } | 378 } |
337 | 379 |
338 std::vector<ThreadContext *> AllThreadContexts; | 380 std::vector<ThreadContext *> AllThreadContexts; |
339 std::vector<std::thread> TranslationThreads; | 381 std::vector<std::thread> TranslationThreads; |
340 // Each thread has its own TLS pointer which is also held in | 382 // Each thread has its own TLS pointer which is also held in |
341 // AllThreadContexts. | 383 // AllThreadContexts. |
342 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 384 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
343 | 385 |
344 // Private helpers for mangleName() | 386 // Private helpers for mangleName() |
345 typedef llvm::SmallVector<char, 32> ManglerVector; | 387 typedef llvm::SmallVector<char, 32> ManglerVector; |
346 void incrementSubstitutions(ManglerVector &OldName) const; | 388 void incrementSubstitutions(ManglerVector &OldName) const; |
347 | 389 |
348 public: | 390 public: |
349 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 391 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
350 }; | 392 }; |
351 | 393 |
352 // Helper class to push and pop a timer marker. The constructor | 394 // Helper class to push and pop a timer marker. The constructor |
353 // pushes a marker, and the destructor pops it. This is for | 395 // pushes a marker, and the destructor pops it. This is for |
354 // convenient timing of regions of code. | 396 // convenient timing of regions of code. |
355 class TimerMarker { | 397 class TimerMarker { |
356 TimerMarker(const TimerMarker &) = delete; | 398 TimerMarker(const TimerMarker &) = delete; |
357 TimerMarker &operator=(const TimerMarker &) = delete; | 399 TimerMarker &operator=(const TimerMarker &) = delete; |
358 | 400 |
359 public: | 401 public: |
360 TimerMarker(TimerIdT ID, GlobalContext *Ctx) | 402 TimerMarker(TimerIdT ID, GlobalContext *Ctx, |
361 : ID(ID), Ctx(Ctx), Active(false) { | 403 TimerStackIdT StackID = GlobalContext::TSK_Default) |
362 if (ALLOW_DUMP) { | 404 : ID(ID), Ctx(Ctx), StackID(StackID), Active(false) { |
363 Active = Ctx->getFlags().SubzeroTimingEnabled; | 405 if (ALLOW_DUMP) |
364 if (Active) | 406 push(); |
365 Ctx->pushTimer(ID); | |
366 } | |
367 } | 407 } |
368 TimerMarker(TimerIdT ID, const Cfg *Func); | 408 TimerMarker(TimerIdT ID, const Cfg *Func, |
| 409 TimerStackIdT StackID = GlobalContext::TSK_Default) |
| 410 : ID(ID), Ctx(nullptr), StackID(StackID), Active(false) { |
| 411 // Ctx gets set at the beginning of pushCfg(). |
| 412 if (ALLOW_DUMP) |
| 413 pushCfg(Func); |
| 414 } |
369 | 415 |
370 ~TimerMarker() { | 416 ~TimerMarker() { |
371 if (ALLOW_DUMP && Active) | 417 if (ALLOW_DUMP && Active) |
372 Ctx->popTimer(ID); | 418 Ctx->popTimer(ID, StackID); |
373 } | 419 } |
374 | 420 |
375 private: | 421 private: |
376 TimerIdT ID; | 422 void push(); |
377 GlobalContext *const Ctx; | 423 void pushCfg(const Cfg *Func); |
| 424 const TimerIdT ID; |
| 425 GlobalContext *Ctx; |
| 426 const TimerStackIdT StackID; |
378 bool Active; | 427 bool Active; |
379 }; | 428 }; |
380 | 429 |
381 // Helper class for locking the streams and then automatically | 430 // Helper class for locking the streams and then automatically |
382 // unlocking them. | 431 // unlocking them. |
383 class OstreamLocker { | 432 class OstreamLocker { |
384 private: | 433 private: |
385 OstreamLocker() = delete; | 434 OstreamLocker() = delete; |
386 OstreamLocker(const OstreamLocker &) = delete; | 435 OstreamLocker(const OstreamLocker &) = delete; |
387 OstreamLocker &operator=(const OstreamLocker &) = delete; | 436 OstreamLocker &operator=(const OstreamLocker &) = delete; |
388 | 437 |
389 public: | 438 public: |
390 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 439 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
391 ~OstreamLocker() { Ctx->unlockStr(); } | 440 ~OstreamLocker() { Ctx->unlockStr(); } |
392 | 441 |
393 private: | 442 private: |
394 GlobalContext *const Ctx; | 443 GlobalContext *const Ctx; |
395 }; | 444 }; |
396 | 445 |
397 } // end of namespace Ice | 446 } // end of namespace Ice |
398 | 447 |
399 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 448 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
OLD | NEW |