| OLD | NEW |
| 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 76 void dump(const IceString &Name, Ostream &Str); | 76 void dump(const IceString &Name, Ostream &Str); |
| 77 | 77 |
| 78 private: | 78 private: |
| 79 uint32_t InstructionsEmitted; | 79 uint32_t InstructionsEmitted; |
| 80 uint32_t RegistersSaved; | 80 uint32_t RegistersSaved; |
| 81 uint32_t FrameBytes; | 81 uint32_t FrameBytes; |
| 82 uint32_t Spills; | 82 uint32_t Spills; |
| 83 uint32_t Fills; | 83 uint32_t Fills; |
| 84 }; | 84 }; |
| 85 | 85 |
| 86 // TimerList is a vector of TimerStack objects, with extra methods |
| 87 // to initialize and merge these vectors. |
| 88 class TimerList : public std::vector<TimerStack> { |
| 89 public: |
| 90 // initInto() initializes a target list of timers based on the |
| 91 // current list. In particular, it creates the same number of |
| 92 // timers, in the same order, with the same names, but initially |
| 93 // empty of timing data. |
| 94 void initInto(TimerList &Dest) const { |
| 95 if (!ALLOW_DUMP) |
| 96 return; |
| 97 Dest.clear(); |
| 98 for (const TimerStack &Stack : *this) { |
| 99 Dest.push_back(TimerStack(Stack.getName())); |
| 100 } |
| 101 } |
| 102 void mergeFrom(TimerList &Src) { |
| 103 if (!ALLOW_DUMP) |
| 104 return; |
| 105 assert(size() == Src.size()); |
| 106 size_type i = 0; |
| 107 for (TimerStack &Stack : *this) { |
| 108 assert(Stack.getName() == Src[i].getName()); |
| 109 Stack.mergeFrom(Src[i]); |
| 110 ++i; |
| 111 } |
| 112 } |
| 113 }; |
| 114 |
| 86 // ThreadContext contains thread-local data. This data can be | 115 // ThreadContext contains thread-local data. This data can be |
| 87 // combined/reduced as needed after all threads complete. | 116 // combined/reduced as needed after all threads complete. |
| 88 class ThreadContext { | 117 class ThreadContext { |
| 89 ThreadContext(const ThreadContext &) = delete; | 118 ThreadContext(const ThreadContext &) = delete; |
| 90 ThreadContext &operator=(const ThreadContext &) = delete; | 119 ThreadContext &operator=(const ThreadContext &) = delete; |
| 91 | 120 |
| 92 public: | 121 public: |
| 93 ThreadContext() {} | 122 ThreadContext() {} |
| 94 CodeStats StatsFunction; | 123 CodeStats StatsFunction; |
| 95 std::vector<TimerStack> Timers; | 124 TimerList Timers; |
| 96 }; | 125 }; |
| 97 | 126 |
| 98 public: | 127 public: |
| 99 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 128 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
| 100 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 129 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
| 101 IceString TestPrefix, const ClFlags &Flags); | 130 IceString TestPrefix, const ClFlags &Flags); |
| 102 ~GlobalContext(); | 131 ~GlobalContext(); |
| 103 | 132 |
| 104 VerboseMask getVerbose() const { return VMask; } | 133 VerboseMask getVerbose() const { return VMask; } |
| 105 | 134 |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 204 void statsUpdateFills() { | 233 void statsUpdateFills() { |
| 205 if (!ALLOW_DUMP || !getFlags().DumpStats) | 234 if (!ALLOW_DUMP || !getFlags().DumpStats) |
| 206 return; | 235 return; |
| 207 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); | 236 ICE_TLS_GET_FIELD(TLS)->StatsFunction.updateFills(); |
| 208 getStatsCumulative()->updateFills(); | 237 getStatsCumulative()->updateFills(); |
| 209 } | 238 } |
| 210 | 239 |
| 211 // These are predefined TimerStackIdT values. | 240 // These are predefined TimerStackIdT values. |
| 212 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; | 241 enum TimerStackKind { TSK_Default = 0, TSK_Funcs, TSK_Num }; |
| 213 | 242 |
| 243 // newTimerStackID() creates a new TimerStack in the global space. |
| 244 // It does not affect any TimerStack objects in TLS. |
| 214 TimerStackIdT newTimerStackID(const IceString &Name); | 245 TimerStackIdT newTimerStackID(const IceString &Name); |
| 246 // dumpTimers() dumps the global timer data. As such, one probably |
| 247 // wants to call mergeTimerStacks() as a prerequisite. |
| 248 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
| 249 bool DumpCumulative = true); |
| 250 // The following methods affect only the calling thread's TLS timer |
| 251 // data. |
| 215 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 252 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
| 216 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 253 void pushTimer(TimerIdT ID, TimerStackIdT StackID); |
| 217 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 254 void popTimer(TimerIdT ID, TimerStackIdT StackID); |
| 218 void resetTimer(TimerStackIdT StackID); | 255 void resetTimer(TimerStackIdT StackID); |
| 219 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 256 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
| 220 void dumpTimers(TimerStackIdT StackID = TSK_Default, | |
| 221 bool DumpCumulative = true); | |
| 222 | 257 |
| 223 // Adds a newly parsed and constructed function to the Cfg work | 258 // Adds a newly parsed and constructed function to the Cfg work |
| 224 // queue. Notifies any idle workers that a new function is | 259 // queue. Notifies any idle workers that a new function is |
| 225 // available for translating. May block if the work queue is too | 260 // available for translating. May block if the work queue is too |
| 226 // large, in order to control memory footprint. | 261 // large, in order to control memory footprint. |
| 227 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } | 262 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } |
| 228 // Takes a Cfg from the work queue for translating. May block if | 263 // Takes a Cfg from the work queue for translating. May block if |
| 229 // the work queue is currently empty. Returns nullptr if there is | 264 // the work queue is currently empty. Returns nullptr if there is |
| 230 // no more work - the queue is empty and either end() has been | 265 // no more work - the queue is empty and either end() has been |
| 231 // called or the Sequential flag was set. | 266 // called or the Sequential flag was set. |
| 232 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } | 267 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } |
| 233 // Notifies that no more work will be added to the work queue. | 268 // Notifies that no more work will be added to the work queue. |
| 234 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } | 269 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } |
| 235 | 270 |
| 236 void startWorkerThreads() { | 271 void startWorkerThreads() { |
| 237 size_t NumWorkers = getFlags().NumTranslationThreads; | 272 size_t NumWorkers = getFlags().NumTranslationThreads; |
| 273 auto Timers = getTimers(); |
| 238 for (size_t i = 0; i < NumWorkers; ++i) { | 274 for (size_t i = 0; i < NumWorkers; ++i) { |
| 239 ThreadContext *WorkerTLS = new ThreadContext(); | 275 ThreadContext *WorkerTLS = new ThreadContext(); |
| 276 Timers->initInto(WorkerTLS->Timers); |
| 240 AllThreadContexts.push_back(WorkerTLS); | 277 AllThreadContexts.push_back(WorkerTLS); |
| 241 TranslationThreads.push_back(std::thread( | 278 TranslationThreads.push_back(std::thread( |
| 242 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | 279 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
| 243 } | 280 } |
| 244 if (NumWorkers) { | 281 if (NumWorkers) { |
| 245 // TODO(stichnot): start a new thread for the emitter queue worker. | 282 // TODO(stichnot): start a new thread for the emitter queue worker. |
| 246 } | 283 } |
| 247 } | 284 } |
| 248 | 285 |
| 249 void waitForWorkerThreads() { | 286 void waitForWorkerThreads() { |
| 250 cfgQueueNotifyEnd(); | 287 cfgQueueNotifyEnd(); |
| 251 // TODO(stichnot): call end() on the emitter work queue. | 288 // TODO(stichnot): call end() on the emitter work queue. |
| 252 for (std::thread &Worker : TranslationThreads) { | 289 for (std::thread &Worker : TranslationThreads) { |
| 253 Worker.join(); | 290 Worker.join(); |
| 254 } | 291 } |
| 255 TranslationThreads.clear(); | 292 TranslationThreads.clear(); |
| 256 // TODO(stichnot): join the emitter thread. | 293 // TODO(stichnot): join the emitter thread. |
| 294 if (ALLOW_DUMP) { |
| 295 auto Timers = getTimers(); |
| 296 for (ThreadContext *TLS : AllThreadContexts) |
| 297 Timers->mergeFrom(TLS->Timers); |
| 298 } |
| 257 } | 299 } |
| 258 | 300 |
| 259 // Translation thread startup routine. | 301 // Translation thread startup routine. |
| 260 void translateFunctionsWrapper(ThreadContext *MyTLS) { | 302 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
| 261 ICE_TLS_SET_FIELD(TLS, MyTLS); | 303 ICE_TLS_SET_FIELD(TLS, MyTLS); |
| 262 translateFunctions(); | 304 translateFunctions(); |
| 263 } | 305 } |
| 264 // Translate functions from the Cfg queue until the queue is empty. | 306 // Translate functions from the Cfg queue until the queue is empty. |
| 265 void translateFunctions(); | 307 void translateFunctions(); |
| 266 | 308 |
| (...skipping 27 matching lines...) Expand all Loading... |
| 294 ErrorCode ErrorStatus; | 336 ErrorCode ErrorStatus; |
| 295 | 337 |
| 296 ICE_CACHELINE_BOUNDARY; | 338 ICE_CACHELINE_BOUNDARY; |
| 297 // Managed by getStatsCumulative() | 339 // Managed by getStatsCumulative() |
| 298 GlobalLockType StatsLock; | 340 GlobalLockType StatsLock; |
| 299 CodeStats StatsCumulative; | 341 CodeStats StatsCumulative; |
| 300 | 342 |
| 301 ICE_CACHELINE_BOUNDARY; | 343 ICE_CACHELINE_BOUNDARY; |
| 302 // Managed by getTimers() | 344 // Managed by getTimers() |
| 303 GlobalLockType TimerLock; | 345 GlobalLockType TimerLock; |
| 304 std::vector<TimerStack> Timers; | 346 TimerList Timers; |
| 305 | 347 |
| 306 ICE_CACHELINE_BOUNDARY; | 348 ICE_CACHELINE_BOUNDARY; |
| 307 // StrLock is a global lock on the dump and emit output streams. | 349 // StrLock is a global lock on the dump and emit output streams. |
| 308 typedef std::mutex StrLockType; | 350 typedef std::mutex StrLockType; |
| 309 StrLockType StrLock; | 351 StrLockType StrLock; |
| 310 Ostream *StrDump; // Stream for dumping / diagnostics | 352 Ostream *StrDump; // Stream for dumping / diagnostics |
| 311 Ostream *StrEmit; // Stream for code emission | 353 Ostream *StrEmit; // Stream for code emission |
| 312 | 354 |
| 313 ICE_CACHELINE_BOUNDARY; | 355 ICE_CACHELINE_BOUNDARY; |
| 314 | 356 |
| 315 const VerboseMask VMask; | 357 const VerboseMask VMask; |
| 316 Intrinsics IntrinsicsInfo; | 358 Intrinsics IntrinsicsInfo; |
| 317 const TargetArch Arch; | 359 const TargetArch Arch; |
| 318 const OptLevel Opt; | 360 const OptLevel Opt; |
| 319 const IceString TestPrefix; | 361 const IceString TestPrefix; |
| 320 const ClFlags &Flags; | 362 const ClFlags &Flags; |
| 321 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. | 363 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
| 322 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 364 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
| 323 BoundedProducerConsumerQueue<Cfg> CfgQ; | 365 BoundedProducerConsumerQueue<Cfg> CfgQ; |
| 324 | 366 |
| 325 LockedPtr<ArenaAllocator<>> getAllocator() { | 367 LockedPtr<ArenaAllocator<>> getAllocator() { |
| 326 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 368 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
| 327 } | 369 } |
| 328 LockedPtr<ConstantPool> getConstPool() { | 370 LockedPtr<ConstantPool> getConstPool() { |
| 329 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 371 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
| 330 } | 372 } |
| 331 LockedPtr<CodeStats> getStatsCumulative() { | 373 LockedPtr<CodeStats> getStatsCumulative() { |
| 332 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 374 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
| 333 } | 375 } |
| 334 LockedPtr<std::vector<TimerStack>> getTimers() { | 376 LockedPtr<TimerList> getTimers() { |
| 335 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 377 return LockedPtr<TimerList>(&Timers, &TimerLock); |
| 336 } | 378 } |
| 337 | 379 |
| 338 std::vector<ThreadContext *> AllThreadContexts; | 380 std::vector<ThreadContext *> AllThreadContexts; |
| 339 std::vector<std::thread> TranslationThreads; | 381 std::vector<std::thread> TranslationThreads; |
| 340 // Each thread has its own TLS pointer which is also held in | 382 // Each thread has its own TLS pointer which is also held in |
| 341 // AllThreadContexts. | 383 // AllThreadContexts. |
| 342 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 384 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
| 343 | 385 |
| 344 // Private helpers for mangleName() | 386 // Private helpers for mangleName() |
| 345 typedef llvm::SmallVector<char, 32> ManglerVector; | 387 typedef llvm::SmallVector<char, 32> ManglerVector; |
| 346 void incrementSubstitutions(ManglerVector &OldName) const; | 388 void incrementSubstitutions(ManglerVector &OldName) const; |
| 347 | 389 |
| 348 public: | 390 public: |
| 349 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 391 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
| 350 }; | 392 }; |
| 351 | 393 |
| 352 // Helper class to push and pop a timer marker. The constructor | 394 // Helper class to push and pop a timer marker. The constructor |
| 353 // pushes a marker, and the destructor pops it. This is for | 395 // pushes a marker, and the destructor pops it. This is for |
| 354 // convenient timing of regions of code. | 396 // convenient timing of regions of code. |
| 355 class TimerMarker { | 397 class TimerMarker { |
| 356 TimerMarker(const TimerMarker &) = delete; | 398 TimerMarker(const TimerMarker &) = delete; |
| 357 TimerMarker &operator=(const TimerMarker &) = delete; | 399 TimerMarker &operator=(const TimerMarker &) = delete; |
| 358 | 400 |
| 359 public: | 401 public: |
| 360 TimerMarker(TimerIdT ID, GlobalContext *Ctx) | 402 TimerMarker(TimerIdT ID, GlobalContext *Ctx, |
| 361 : ID(ID), Ctx(Ctx), Active(false) { | 403 TimerStackIdT StackID = GlobalContext::TSK_Default) |
| 362 if (ALLOW_DUMP) { | 404 : ID(ID), Ctx(Ctx), StackID(StackID), Active(false) { |
| 363 Active = Ctx->getFlags().SubzeroTimingEnabled; | 405 if (ALLOW_DUMP) |
| 364 if (Active) | 406 push(); |
| 365 Ctx->pushTimer(ID); | |
| 366 } | |
| 367 } | 407 } |
| 368 TimerMarker(TimerIdT ID, const Cfg *Func); | 408 TimerMarker(TimerIdT ID, const Cfg *Func, |
| 409 TimerStackIdT StackID = GlobalContext::TSK_Default) |
| 410 : ID(ID), Ctx(nullptr), StackID(StackID), Active(false) { |
| 411 // Ctx gets set at the beginning of pushCfg(). |
| 412 if (ALLOW_DUMP) |
| 413 pushCfg(Func); |
| 414 } |
| 369 | 415 |
| 370 ~TimerMarker() { | 416 ~TimerMarker() { |
| 371 if (ALLOW_DUMP && Active) | 417 if (ALLOW_DUMP && Active) |
| 372 Ctx->popTimer(ID); | 418 Ctx->popTimer(ID, StackID); |
| 373 } | 419 } |
| 374 | 420 |
| 375 private: | 421 private: |
| 376 TimerIdT ID; | 422 void push(); |
| 377 GlobalContext *const Ctx; | 423 void pushCfg(const Cfg *Func); |
| 424 const TimerIdT ID; |
| 425 GlobalContext *Ctx; |
| 426 const TimerStackIdT StackID; |
| 378 bool Active; | 427 bool Active; |
| 379 }; | 428 }; |
| 380 | 429 |
| 381 // Helper class for locking the streams and then automatically | 430 // Helper class for locking the streams and then automatically |
| 382 // unlocking them. | 431 // unlocking them. |
| 383 class OstreamLocker { | 432 class OstreamLocker { |
| 384 private: | 433 private: |
| 385 OstreamLocker() = delete; | 434 OstreamLocker() = delete; |
| 386 OstreamLocker(const OstreamLocker &) = delete; | 435 OstreamLocker(const OstreamLocker &) = delete; |
| 387 OstreamLocker &operator=(const OstreamLocker &) = delete; | 436 OstreamLocker &operator=(const OstreamLocker &) = delete; |
| 388 | 437 |
| 389 public: | 438 public: |
| 390 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 439 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
| 391 ~OstreamLocker() { Ctx->unlockStr(); } | 440 ~OstreamLocker() { Ctx->unlockStr(); } |
| 392 | 441 |
| 393 private: | 442 private: |
| 394 GlobalContext *const Ctx; | 443 GlobalContext *const Ctx; |
| 395 }; | 444 }; |
| 396 | 445 |
| 397 } // end of namespace Ice | 446 } // end of namespace Ice |
| 398 | 447 |
| 399 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 448 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| OLD | NEW |