| OLD | NEW |
| 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
| 11 // multiple functions. | 11 // multiple functions. |
| 12 // | 12 // |
| 13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
| 14 | 14 |
| 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H | 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H | 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| 17 | 17 |
| 18 #include <memory> | |
| 19 #include <mutex> | 18 #include <mutex> |
| 19 #include <thread> |
| 20 | 20 |
| 21 #include "IceDefs.h" | 21 #include "IceDefs.h" |
| 22 #include "IceClFlags.h" | 22 #include "IceClFlags.h" |
| 23 #include "IceIntrinsics.h" | 23 #include "IceIntrinsics.h" |
| 24 #include "IceRNG.h" | 24 #include "IceRNG.h" |
| 25 #include "IceTimerTree.h" | 25 #include "IceTimerTree.h" |
| 26 #include "IceTypes.h" | 26 #include "IceTypes.h" |
| 27 #include "IceUtils.h" |
| 27 | 28 |
| 28 namespace Ice { | 29 namespace Ice { |
| 29 | 30 |
| 30 class ClFlags; | 31 class ClFlags; |
| 31 class ConstantPool; | 32 class ConstantPool; |
| 32 class FuncSigType; | 33 class FuncSigType; |
| 33 | 34 |
| 34 typedef std::mutex GlobalLockType; | |
| 35 | |
| 36 // LockedPtr is a way to provide automatically locked access to some object. | 35 // LockedPtr is a way to provide automatically locked access to some object. |
| 37 template <typename T> class LockedPtr { | 36 template <typename T> class LockedPtr { |
| 38 LockedPtr() = delete; | 37 LockedPtr() = delete; |
| 39 LockedPtr(const LockedPtr &) = delete; | 38 LockedPtr(const LockedPtr &) = delete; |
| 40 LockedPtr &operator=(const LockedPtr &) = delete; | 39 LockedPtr &operator=(const LockedPtr &) = delete; |
| 41 | 40 |
| 42 public: | 41 public: |
| 43 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { | 42 LockedPtr(T *Value, GlobalLockType *Lock) : Value(Value), Lock(Lock) { |
| 44 Lock->lock(); | 43 Lock->lock(); |
| 45 } | 44 } |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 95 CodeStats StatsFunction; | 94 CodeStats StatsFunction; |
| 96 std::vector<TimerStack> Timers; | 95 std::vector<TimerStack> Timers; |
| 97 }; | 96 }; |
| 98 | 97 |
| 99 public: | 98 public: |
| 100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 99 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
| 101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 100 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
| 102 IceString TestPrefix, const ClFlags &Flags); | 101 IceString TestPrefix, const ClFlags &Flags); |
| 103 ~GlobalContext(); | 102 ~GlobalContext(); |
| 104 | 103 |
| 105 // Returns true if any of the specified options in the verbose mask | |
| 106 // are set. If the argument is omitted, it checks if any verbose | |
| 107 // options at all are set. | |
| 108 VerboseMask getVerbose() const { return VMask; } | 104 VerboseMask getVerbose() const { return VMask; } |
| 109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } | |
| 110 void setVerbose(VerboseMask Mask) { VMask = Mask; } | |
| 111 void addVerbose(VerboseMask Mask) { VMask |= Mask; } | |
| 112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } | |
| 113 | 105 |
| 114 // The dump and emit streams need to be used by only one thread at a | 106 // The dump and emit streams need to be used by only one thread at a |
| 115 // time. This is done by exclusively reserving the streams via | 107 // time. This is done by exclusively reserving the streams via |
| 116 // lockStr() and unlockStr(). The OstreamLocker class can be used | 108 // lockStr() and unlockStr(). The OstreamLocker class can be used |
| 117 // to conveniently manage this. | 109 // to conveniently manage this. |
| 118 // | 110 // |
| 119 // The model is that a thread grabs the stream lock, then does an | 111 // The model is that a thread grabs the stream lock, then does an |
| 120 // arbitrary amount of work during which far-away callees may grab | 112 // arbitrary amount of work during which far-away callees may grab |
| 121 // the stream and do something with it, and finally the thread | 113 // the stream and do something with it, and finally the thread |
| 122 // releases the stream lock. This allows large chunks of output to | 114 // releases the stream lock. This allows large chunks of output to |
| 123 // be dumped or emitted without risking interleaving from multiple | 115 // be dumped or emitted without risking interleaving from multiple |
| 124 // threads. | 116 // threads. |
| 125 void lockStr() { StrLock.lock(); } | 117 void lockStr() { StrLock.lock(); } |
| 126 void unlockStr() { StrLock.unlock(); } | 118 void unlockStr() { StrLock.unlock(); } |
| 127 Ostream &getStrDump() { return *StrDump; } | 119 Ostream &getStrDump() { return *StrDump; } |
| 128 Ostream &getStrEmit() { return *StrEmit; } | 120 Ostream &getStrEmit() { return *StrEmit; } |
| 129 | 121 |
| 130 TargetArch getTargetArch() const { return Arch; } | 122 TargetArch getTargetArch() const { return Arch; } |
| 131 OptLevel getOptLevel() const { return Opt; } | 123 OptLevel getOptLevel() const { return Opt; } |
| 124 LockedPtr<ErrorCode> getErrorStatus() { |
| 125 return LockedPtr<ErrorCode>(&ErrorStatus, &ErrorStatusLock); |
| 126 } |
| 132 | 127 |
| 133 // When emitting assembly, we allow a string to be prepended to | 128 // When emitting assembly, we allow a string to be prepended to |
| 134 // names of translated functions. This makes it easier to create an | 129 // names of translated functions. This makes it easier to create an |
| 135 // execution test against a reference translator like llc, with both | 130 // execution test against a reference translator like llc, with both |
| 136 // translators using the same bitcode as input. | 131 // translators using the same bitcode as input. |
| 137 IceString getTestPrefix() const { return TestPrefix; } | 132 IceString getTestPrefix() const { return TestPrefix; } |
| 138 IceString mangleName(const IceString &Name) const; | 133 IceString mangleName(const IceString &Name) const; |
| 139 | 134 |
| 140 // Manage Constants. | 135 // Manage Constants. |
| 141 // getConstant*() functions are not const because they might add | 136 // getConstant*() functions are not const because they might add |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 | 217 |
| 223 TimerStackIdT newTimerStackID(const IceString &Name); | 218 TimerStackIdT newTimerStackID(const IceString &Name); |
| 224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 219 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
| 225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 220 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
| 226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 221 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
| 227 void resetTimer(TimerStackIdT StackID); | 222 void resetTimer(TimerStackIdT StackID); |
| 228 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 223 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
| 229 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 224 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
| 230 bool DumpCumulative = true); | 225 bool DumpCumulative = true); |
| 231 | 226 |
| 227 // Adds a newly parsed and constructed function to the Cfg work |
| 228 // queue. Notifies any idle workers that a new function is |
| 229 // available for translating. May block if the work queue is too |
| 230 // large, in order to control memory footprint. |
| 231 void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); } |
| 232 // Takes a Cfg from the work queue for translating. May block if |
| 233 // the work queue is currently empty. Returns nullptr if there is |
| 234 // no more work - the queue is empty and either end() has been |
| 235 // called or the Sequential flag was set. |
| 236 Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); } |
| 237 // Notifies that no more work will be added to the work queue. |
| 238 void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); } |
| 239 |
| 240 void startWorkerThreads() { |
| 241 size_t NumWorkers = getFlags().NumTranslationThreads; |
| 242 for (size_t i = 0; i < NumWorkers; ++i) { |
| 243 ThreadContext *WorkerTLS = new ThreadContext(); |
| 244 AllThreadContexts.push_back(WorkerTLS); |
| 245 TranslationThreads.push_back(std::thread( |
| 246 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); |
| 247 } |
| 248 if (NumWorkers) { |
| 249 // TODO(stichnot): start a new thread for the emitter queue worker. |
| 250 } |
| 251 } |
| 252 |
| 253 void waitForWorkerThreads() { |
| 254 cfgQueueNotifyEnd(); |
| 255 // TODO(stichnot): call end() on the emitter work queue. |
| 256 for (std::thread &Worker : TranslationThreads) { |
| 257 Worker.join(); |
| 258 } |
| 259 TranslationThreads.clear(); |
| 260 // TODO(stichnot): join the emitter thread. |
| 261 } |
| 262 |
| 263 // Translation thread startup routine. |
| 264 void translateFunctionsWrapper(ThreadContext *MyTLS) { |
| 265 ICE_TLS_SET_FIELD(TLS, MyTLS); |
| 266 translateFunctions(); |
| 267 } |
| 268 // Translate functions from the Cfg queue until the queue is empty. |
| 269 void translateFunctions(); |
| 270 |
| 271 // Utility function to match a symbol name against a match string. |
| 272 // This is used in a few cases where we want to take some action on |
| 273 // a particular function or symbol based on a command-line argument, |
| 274 // such as changing the verbose level for a particular function. An |
| 275 // empty Match argument means match everything. Returns true if |
| 276 // there is a match. |
| 277 static bool matchSymbolName(const IceString &SymbolName, |
| 278 const IceString &Match) { |
| 279 return Match.empty() || Match == SymbolName; |
| 280 } |
| 281 |
| 232 private: | 282 private: |
| 233 // Try to make sure the mutexes are allocated on separate cache | 283 // Try to ensure mutexes are allocated on separate cache lines. |
| 234 // lines, assuming the maximum cache line size is 64. | |
| 235 const static size_t MaxCacheLineSize = 64; | |
| 236 alignas(MaxCacheLineSize) GlobalLockType AllocLock; | |
| 237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; | |
| 238 alignas(MaxCacheLineSize) GlobalLockType StatsLock; | |
| 239 alignas(MaxCacheLineSize) GlobalLockType TimerLock; | |
| 240 | 284 |
| 285 ICE_CACHELINE_BOUNDARY; |
| 286 // Managed by getAllocator() |
| 287 GlobalLockType AllocLock; |
| 288 ArenaAllocator<> Allocator; |
| 289 |
| 290 ICE_CACHELINE_BOUNDARY; |
| 291 // Managed by getConstantPool() |
| 292 GlobalLockType ConstPoolLock; |
| 293 std::unique_ptr<ConstantPool> ConstPool; |
| 294 |
| 295 ICE_CACHELINE_BOUNDARY; |
| 296 // Managed by getErrorStatus() |
| 297 GlobalLockType ErrorStatusLock; |
| 298 ErrorCode ErrorStatus; |
| 299 |
| 300 ICE_CACHELINE_BOUNDARY; |
| 301 // Managed by getStatsCumulative() |
| 302 GlobalLockType StatsLock; |
| 303 CodeStats StatsCumulative; |
| 304 |
| 305 ICE_CACHELINE_BOUNDARY; |
| 306 // Managed by getTimers() |
| 307 GlobalLockType TimerLock; |
| 308 std::vector<TimerStack> Timers; |
| 309 |
| 310 ICE_CACHELINE_BOUNDARY; |
| 241 // StrLock is a global lock on the dump and emit output streams. | 311 // StrLock is a global lock on the dump and emit output streams. |
| 242 typedef std::mutex StrLockType; | 312 typedef std::mutex StrLockType; |
| 243 StrLockType StrLock; | 313 StrLockType StrLock; |
| 244 | |
| 245 Ostream *StrDump; // Stream for dumping / diagnostics | 314 Ostream *StrDump; // Stream for dumping / diagnostics |
| 246 Ostream *StrEmit; // Stream for code emission | 315 Ostream *StrEmit; // Stream for code emission |
| 247 | 316 |
| 248 ArenaAllocator<> Allocator; | 317 ICE_CACHELINE_BOUNDARY; |
| 249 VerboseMask VMask; | 318 |
| 250 std::unique_ptr<ConstantPool> ConstPool; | 319 const VerboseMask VMask; |
| 251 Intrinsics IntrinsicsInfo; | 320 Intrinsics IntrinsicsInfo; |
| 252 const TargetArch Arch; | 321 const TargetArch Arch; |
| 253 const OptLevel Opt; | 322 const OptLevel Opt; |
| 254 const IceString TestPrefix; | 323 const IceString TestPrefix; |
| 255 const ClFlags &Flags; | 324 const ClFlags &Flags; |
| 256 RandomNumberGenerator RNG; | 325 RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg. |
| 257 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 326 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
| 258 CodeStats StatsCumulative; | 327 BoundedProducerConsumerQueue<Cfg> CfgQ; |
| 259 std::vector<TimerStack> Timers; | |
| 260 | 328 |
| 261 LockedPtr<ArenaAllocator<>> getAllocator() { | 329 LockedPtr<ArenaAllocator<>> getAllocator() { |
| 262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 330 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
| 263 } | 331 } |
| 264 LockedPtr<ConstantPool> getConstPool() { | 332 LockedPtr<ConstantPool> getConstPool() { |
| 265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 333 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
| 266 } | 334 } |
| 267 LockedPtr<CodeStats> getStatsCumulative() { | 335 LockedPtr<CodeStats> getStatsCumulative() { |
| 268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 336 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
| 269 } | 337 } |
| 270 LockedPtr<std::vector<TimerStack>> getTimers() { | 338 LockedPtr<std::vector<TimerStack>> getTimers() { |
| 271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 339 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); |
| 272 } | 340 } |
| 273 | 341 |
| 274 std::vector<ThreadContext *> AllThreadContexts; | 342 std::vector<ThreadContext *> AllThreadContexts; |
| 343 std::vector<std::thread> TranslationThreads; |
| 275 // Each thread has its own TLS pointer which is also held in | 344 // Each thread has its own TLS pointer which is also held in |
| 276 // AllThreadContexts. | 345 // AllThreadContexts. |
| 277 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); | 346 ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS); |
| 278 | 347 |
| 279 // Private helpers for mangleName() | 348 // Private helpers for mangleName() |
| 280 typedef llvm::SmallVector<char, 32> ManglerVector; | 349 typedef llvm::SmallVector<char, 32> ManglerVector; |
| 281 void incrementSubstitutions(ManglerVector &OldName) const; | 350 void incrementSubstitutions(ManglerVector &OldName) const; |
| 282 | 351 |
| 283 public: | 352 public: |
| 284 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } | 353 static void TlsInit() { ICE_TLS_INIT_FIELD(TLS); } |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 325 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 394 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
| 326 ~OstreamLocker() { Ctx->unlockStr(); } | 395 ~OstreamLocker() { Ctx->unlockStr(); } |
| 327 | 396 |
| 328 private: | 397 private: |
| 329 GlobalContext *const Ctx; | 398 GlobalContext *const Ctx; |
| 330 }; | 399 }; |
| 331 | 400 |
| 332 } // end of namespace Ice | 401 } // end of namespace Ice |
| 333 | 402 |
| 334 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 403 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| OLD | NEW |