Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// | 1 //===- subzero/src/IceGlobalContext.h - Global context defs -----*- C++ -*-===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file declares aspects of the compilation that persist across | 10 // This file declares aspects of the compilation that persist across |
| 11 // multiple functions. | 11 // multiple functions. |
| 12 // | 12 // |
| 13 //===----------------------------------------------------------------------===// | 13 //===----------------------------------------------------------------------===// |
| 14 | 14 |
| 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H | 15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H | 16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| 17 | 17 |
| 18 #include <memory> | |
| 19 #include <mutex> | 18 #include <mutex> |
| 19 #include <queue> | |
| 20 #include <thread> | |
| 20 | 21 |
| 21 #include "IceDefs.h" | 22 #include "IceDefs.h" |
| 22 #include "IceClFlags.h" | 23 #include "IceClFlags.h" |
| 23 #include "IceIntrinsics.h" | 24 #include "IceIntrinsics.h" |
| 24 #include "IceRNG.h" | 25 #include "IceRNG.h" |
| 25 #include "IceTimerTree.h" | 26 #include "IceTimerTree.h" |
| 26 #include "IceTypes.h" | 27 #include "IceTypes.h" |
| 27 | 28 |
| 28 namespace Ice { | 29 namespace Ice { |
| 29 | 30 |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 89 class ThreadContext { | 90 class ThreadContext { |
| 90 ThreadContext(const ThreadContext &) = delete; | 91 ThreadContext(const ThreadContext &) = delete; |
| 91 ThreadContext &operator=(const ThreadContext &) = delete; | 92 ThreadContext &operator=(const ThreadContext &) = delete; |
| 92 | 93 |
| 93 public: | 94 public: |
| 94 ThreadContext() {} | 95 ThreadContext() {} |
| 95 CodeStats StatsFunction; | 96 CodeStats StatsFunction; |
| 96 std::vector<TimerStack> Timers; | 97 std::vector<TimerStack> Timers; |
| 97 }; | 98 }; |
| 98 | 99 |
| 100 // CfgQueue is the translation work queue. It allows multiple | |
| 101 // producers and multiple consumers (though currently only a single | |
| 102 // producer is used). The producer adds entries using add(), and | |
| 103 // may block if the queue is "full" to control Cfg memory footprint. | |
| 104 // The producer uses end() to indicate that no more entries will be | |
| 105 // added. The consumer removes an item using get(), which will | |
| 106 // return nullptr if end() has been called and the queue is empty. | |
| 107 // | |
| 108 // The MaxSize ctor arg controls the maximum size the queue can grow | |
| 109 // to. The Sequential arg indicates purely sequential execution in | |
| 110 // which the single thread should never wait(). | |
|
JF
2015/01/23 23:01:47
Shouldn't the locks be entirely bypassed when sequ
Jim Stichnoth
2015/01/25 07:29:38
Nice, done.
| |
| 111 // | |
| 112 // Two condition variables are used in the implementation. | |
| 113 // GrewOrEnded signals waiting workers that the producer has changed | |
| 114 // the state of the queue. Shrunk signals a blocked producer that a | |
| 115 // consumer has changed the state of the queue. | |
| 116 class CfgQueue { | |
| 117 public: | |
| 118 CfgQueue(size_t MaxSize, bool Sequential) | |
| 119 : IsEnded(false), MaxSize(MaxSize), Sequential(Sequential) {} | |
|
JF
2015/01/23 23:01:47
Should the CfgQueue assert that the queue is empty
Jim Stichnoth
2015/01/25 07:29:38
I don't see how the WorkQueue could ever be non-em
| |
| 120 void add(Cfg *Func) { | |
| 121 std::unique_lock<GlobalLockType> L(Lock); | |
| 122 // If the work queue is already "full", wait for a consumer to | |
| 123 // grab an element and shrink the queue. | |
| 124 while (!Sequential && WorkQueue.size() >= MaxSize) { | |
| 125 Shrunk.wait(L); | |
| 126 } | |
| 127 WorkQueue.push(Func); | |
| 128 L.unlock(); | |
| 129 GrewOrEnded.notify_one(); | |
| 130 } | |
| 131 Cfg *get() { | |
| 132 std::unique_lock<GlobalLockType> L(Lock); | |
| 133 while (!IsEnded || !WorkQueue.empty()) { | |
| 134 if (!WorkQueue.empty()) { | |
| 135 Cfg *Func = WorkQueue.front(); | |
| 136 WorkQueue.pop(); | |
| 137 L.unlock(); | |
| 138 Shrunk.notify_one(); | |
| 139 return Func; | |
| 140 } | |
| 141 // If the work queue is empty, and this is pure sequential | |
| 142 // execution, then return nullptr. | |
| 143 if (Sequential) | |
| 144 return nullptr; | |
| 145 GrewOrEnded.wait(L); | |
| 146 } | |
| 147 return nullptr; | |
| 148 } | |
| 149 void end() { | |
| 150 std::unique_lock<GlobalLockType> L(Lock); | |
| 151 IsEnded = true; | |
| 152 L.unlock(); | |
| 153 GrewOrEnded.notify_all(); | |
| 154 } | |
| 155 | |
| 156 private: | |
| 157 // WorkQueue and Lock are read/written by all. | |
| 158 // TODO(stichnot): Since WorkQueue has an enforced maximum size, | |
| 159 // implement it on top of something like std::array to minimize | |
| 160 // contention. | |
| 161 alignas(MaxCacheLineSize) std::queue<Cfg *> WorkQueue; | |
| 162 // Lock guards access to WorkQueue and IsEnded. | |
| 163 alignas(MaxCacheLineSize) GlobalLockType Lock; | |
| 164 | |
| 165 // IsEnded and GrewOrEnded are written by the producer and read by | |
| 166 // the consumers. | |
| 167 alignas(MaxCacheLineSize) bool IsEnded; | |
|
JF
2015/01/23 22:22:11
Move to end with Sequential: it's only written to
Jim Stichnoth
2015/01/25 07:29:38
Done.
| |
| 168 // GrewOrEnded is notified (by the producer) when something is | |
| 169 // added to the queue, in case consumers are waiting for a | |
| 170 // non-empty queue. | |
| 171 std::condition_variable GrewOrEnded; | |
| 172 | |
| 173 // Shrunk is notified (by the consumer) when something is removed | |
| 174 // from the queue, in case the producer is waiting for the queue | |
| 175 // to drop below maximum capacity. It is written by the consumers | |
| 176 // and read by the producer. | |
| 177 alignas(MaxCacheLineSize) std::condition_variable Shrunk; | |
| 178 | |
| 179 // MaxSize and Sequential are read by all and written by none. | |
| 180 alignas(MaxCacheLineSize) const size_t MaxSize; | |
| 181 const bool Sequential; | |
| 182 }; | |
| 183 | |
| 99 public: | 184 public: |
| 100 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, | 185 GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer, |
| 101 VerboseMask Mask, TargetArch Arch, OptLevel Opt, | 186 VerboseMask Mask, TargetArch Arch, OptLevel Opt, |
| 102 IceString TestPrefix, const ClFlags &Flags); | 187 IceString TestPrefix, const ClFlags &Flags); |
| 103 ~GlobalContext(); | 188 ~GlobalContext(); |
| 104 | 189 |
| 105 // Returns true if any of the specified options in the verbose mask | |
| 106 // are set. If the argument is omitted, it checks if any verbose | |
| 107 // options at all are set. | |
| 108 VerboseMask getVerbose() const { return VMask; } | 190 VerboseMask getVerbose() const { return VMask; } |
| 109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; } | |
| 110 void setVerbose(VerboseMask Mask) { VMask = Mask; } | |
| 111 void addVerbose(VerboseMask Mask) { VMask |= Mask; } | |
| 112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; } | |
| 113 | 191 |
| 114 // The dump and emit streams need to be used by only one thread at a | 192 // The dump and emit streams need to be used by only one thread at a |
| 115 // time. This is done by exclusively reserving the streams via | 193 // time. This is done by exclusively reserving the streams via |
| 116 // lockStr() and unlockStr(). The OstreamLocker class can be used | 194 // lockStr() and unlockStr(). The OstreamLocker class can be used |
| 117 // to conveniently manage this. | 195 // to conveniently manage this. |
| 118 // | 196 // |
| 119 // The model is that a thread grabs the stream lock, then does an | 197 // The model is that a thread grabs the stream lock, then does an |
| 120 // arbitrary amount of work during which far-away callees may grab | 198 // arbitrary amount of work during which far-away callees may grab |
| 121 // the stream and do something with it, and finally the thread | 199 // the stream and do something with it, and finally the thread |
| 122 // releases the stream lock. This allows large chunks of output to | 200 // releases the stream lock. This allows large chunks of output to |
| 123 // be dumped or emitted without risking interleaving from multiple | 201 // be dumped or emitted without risking interleaving from multiple |
| 124 // threads. | 202 // threads. |
| 125 void lockStr() { StrLock.lock(); } | 203 void lockStr() { StrLock.lock(); } |
| 126 void unlockStr() { StrLock.unlock(); } | 204 void unlockStr() { StrLock.unlock(); } |
| 127 Ostream &getStrDump() { return *StrDump; } | 205 Ostream &getStrDump() { return *StrDump; } |
| 128 Ostream &getStrEmit() { return *StrEmit; } | 206 Ostream &getStrEmit() { return *StrEmit; } |
| 129 | 207 |
| 130 TargetArch getTargetArch() const { return Arch; } | 208 TargetArch getTargetArch() const { return Arch; } |
| 131 OptLevel getOptLevel() const { return Opt; } | 209 OptLevel getOptLevel() const { return Opt; } |
| 210 std::error_code getErrorStatus() const { return ErrorStatus; } | |
| 132 | 211 |
| 133 // When emitting assembly, we allow a string to be prepended to | 212 // When emitting assembly, we allow a string to be prepended to |
| 134 // names of translated functions. This makes it easier to create an | 213 // names of translated functions. This makes it easier to create an |
| 135 // execution test against a reference translator like llc, with both | 214 // execution test against a reference translator like llc, with both |
| 136 // translators using the same bitcode as input. | 215 // translators using the same bitcode as input. |
| 137 IceString getTestPrefix() const { return TestPrefix; } | 216 IceString getTestPrefix() const { return TestPrefix; } |
| 138 IceString mangleName(const IceString &Name) const; | 217 IceString mangleName(const IceString &Name) const; |
| 139 | 218 |
| 140 // Manage Constants. | 219 // Manage Constants. |
| 141 // getConstant*() functions are not const because they might add | 220 // getConstant*() functions are not const because they might add |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 222 | 301 |
| 223 TimerStackIdT newTimerStackID(const IceString &Name); | 302 TimerStackIdT newTimerStackID(const IceString &Name); |
| 224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); | 303 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name); |
| 225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 304 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
| 226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); | 305 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default); |
| 227 void resetTimer(TimerStackIdT StackID); | 306 void resetTimer(TimerStackIdT StackID); |
| 228 void setTimerName(TimerStackIdT StackID, const IceString &NewName); | 307 void setTimerName(TimerStackIdT StackID, const IceString &NewName); |
| 229 void dumpTimers(TimerStackIdT StackID = TSK_Default, | 308 void dumpTimers(TimerStackIdT StackID = TSK_Default, |
| 230 bool DumpCumulative = true); | 309 bool DumpCumulative = true); |
| 231 | 310 |
| 311 // Adds a newly parsed and constructed function to the Cfg work | |
| 312 // queue. Notifies any idle workers that a new function is | |
| 313 // available for translating. May block if the work queue is too | |
| 314 // large, in order to control memory footprint. | |
| 315 void cfgQueueAdd(Cfg *Func) { CfgQ.add(Func); } | |
| 316 // Takes a Cfg from the work queue for translating. May block if | |
| 317 // the work queue is currently empty. Returns nullptr if there is | |
| 318 // no more work - the queue is empty and either end() has been | |
| 319 // called or the Sequential flag was set. | |
| 320 Cfg *cfgQueueGet() { return CfgQ.get(); } | |
| 321 // Notifies that no more work will be added to the work queue. | |
| 322 void cfgQueueEnd() { CfgQ.end(); } | |
| 323 | |
| 324 void startWorkerThreads() { | |
| 325 size_t NumWorkers = getFlags().NumTranslationThreads; | |
| 326 for (size_t i = 0; i < NumWorkers; ++i) { | |
| 327 ThreadContext *WorkerTLS = new ThreadContext(); | |
| 328 AllThreadContexts.push_back(WorkerTLS); | |
| 329 TranslationThreads.push_back(std::thread( | |
| 330 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS)); | |
| 331 } | |
| 332 if (NumWorkers) { | |
| 333 // TODO(stichnot): start a new thread for the emitter queue worker. | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 void waitForWorkerThreads() { | |
| 338 cfgQueueEnd(); | |
| 339 // TODO(stichnot): call end() on the emitter work queue. | |
| 340 for (std::thread &Worker : TranslationThreads) { | |
| 341 Worker.join(); | |
| 342 } | |
| 343 TranslationThreads.clear(); | |
| 344 // TODO(stichnot): join the emitter thread. | |
| 345 } | |
| 346 | |
| 347 // Translation thread startup routine. | |
| 348 void translateFunctionsWrapper(ThreadContext *MyTLS) { | |
| 349 TLS = MyTLS; | |
| 350 translateFunctions(); | |
| 351 } | |
| 352 // Translate functions from the Cfg queue until the queue is empty. | |
| 353 void translateFunctions(); | |
| 354 | |
| 355 // Utility function to match a symbol name against a match string. | |
| 356 // This is used in a few cases where we want to take some action on | |
| 357 // a particular function or symbol based on a command-line argument, | |
| 358 // such as changing the verbose level for a particular function. An | |
| 359 // empty Match argument means match everything. Returns true if | |
| 360 // there is a match. | |
| 361 static bool matchSymbolName(const IceString &SymbolName, | |
| 362 const IceString &Match) { | |
| 363 return Match.empty() || Match == SymbolName; | |
| 364 } | |
| 365 | |
| 232 private: | 366 private: |
| 233 // Try to make sure the mutexes are allocated on separate cache | 367 // Try to ensure mutexes are allocated on separate cache lines. |
| 234 // lines, assuming the maximum cache line size is 64. | |
| 235 const static size_t MaxCacheLineSize = 64; | |
| 236 alignas(MaxCacheLineSize) GlobalLockType AllocLock; | 368 alignas(MaxCacheLineSize) GlobalLockType AllocLock; |
| 237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; | 369 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock; |
| 238 alignas(MaxCacheLineSize) GlobalLockType StatsLock; | 370 alignas(MaxCacheLineSize) GlobalLockType StatsLock; |
| 239 alignas(MaxCacheLineSize) GlobalLockType TimerLock; | 371 alignas(MaxCacheLineSize) GlobalLockType TimerLock; |
| 240 | 372 |
| 241 // StrLock is a global lock on the dump and emit output streams. | 373 // StrLock is a global lock on the dump and emit output streams. |
| 242 typedef std::mutex StrLockType; | 374 typedef std::mutex StrLockType; |
| 243 StrLockType StrLock; | 375 StrLockType StrLock; |
| 244 | 376 |
| 245 Ostream *StrDump; // Stream for dumping / diagnostics | 377 Ostream *StrDump; // Stream for dumping / diagnostics |
| 246 Ostream *StrEmit; // Stream for code emission | 378 Ostream *StrEmit; // Stream for code emission |
| 247 | 379 |
| 248 ArenaAllocator<> Allocator; | 380 ArenaAllocator<> Allocator; |
| 249 VerboseMask VMask; | 381 VerboseMask VMask; |
| 250 std::unique_ptr<ConstantPool> ConstPool; | 382 std::unique_ptr<ConstantPool> ConstPool; |
| 251 Intrinsics IntrinsicsInfo; | 383 Intrinsics IntrinsicsInfo; |
| 252 const TargetArch Arch; | 384 const TargetArch Arch; |
| 253 const OptLevel Opt; | 385 const OptLevel Opt; |
| 254 const IceString TestPrefix; | 386 const IceString TestPrefix; |
| 255 const ClFlags &Flags; | 387 const ClFlags &Flags; |
| 256 RandomNumberGenerator RNG; | 388 RandomNumberGenerator RNG; |
| 257 std::unique_ptr<ELFObjectWriter> ObjectWriter; | 389 std::unique_ptr<ELFObjectWriter> ObjectWriter; |
| 258 CodeStats StatsCumulative; | 390 CodeStats StatsCumulative; |
| 259 std::vector<TimerStack> Timers; | 391 std::vector<TimerStack> Timers; |
| 392 CfgQueue CfgQ; | |
| 393 std::error_code ErrorStatus; | |
| 260 | 394 |
| 261 LockedPtr<ArenaAllocator<>> getAllocator() { | 395 LockedPtr<ArenaAllocator<>> getAllocator() { |
| 262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); | 396 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock); |
| 263 } | 397 } |
| 264 LockedPtr<ConstantPool> getConstPool() { | 398 LockedPtr<ConstantPool> getConstPool() { |
| 265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); | 399 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock); |
| 266 } | 400 } |
| 267 LockedPtr<CodeStats> getStatsCumulative() { | 401 LockedPtr<CodeStats> getStatsCumulative() { |
| 268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); | 402 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock); |
| 269 } | 403 } |
| 270 LockedPtr<std::vector<TimerStack>> getTimers() { | 404 LockedPtr<std::vector<TimerStack>> getTimers() { |
| 271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); | 405 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock); |
| 272 } | 406 } |
| 273 | 407 |
| 274 std::vector<ThreadContext *> AllThreadContexts; | 408 std::vector<ThreadContext *> AllThreadContexts; |
| 409 std::vector<std::thread> TranslationThreads; | |
| 275 // Each thread has its own TLS pointer which is also held in | 410 // Each thread has its own TLS pointer which is also held in |
| 276 // AllThreadContexts. | 411 // AllThreadContexts. |
| 277 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; | 412 ICE_ATTRIBUTE_TLS static ThreadContext *TLS; |
| 278 | 413 |
| 279 // Private helpers for mangleName() | 414 // Private helpers for mangleName() |
| 280 typedef llvm::SmallVector<char, 32> ManglerVector; | 415 typedef llvm::SmallVector<char, 32> ManglerVector; |
| 281 void incrementSubstitutions(ManglerVector &OldName) const; | 416 void incrementSubstitutions(ManglerVector &OldName) const; |
| 282 }; | 417 }; |
| 283 | 418 |
| 284 // Helper class to push and pop a timer marker. The constructor | 419 // Helper class to push and pop a timer marker. The constructor |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 322 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } | 457 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); } |
| 323 ~OstreamLocker() { Ctx->unlockStr(); } | 458 ~OstreamLocker() { Ctx->unlockStr(); } |
| 324 | 459 |
| 325 private: | 460 private: |
| 326 GlobalContext *const Ctx; | 461 GlobalContext *const Ctx; |
| 327 }; | 462 }; |
| 328 | 463 |
| 329 } // end of namespace Ice | 464 } // end of namespace Ice |
| 330 | 465 |
| 331 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H | 466 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H |
| OLD | NEW |