| Index: src/IceGlobalContext.h
|
| diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
|
| index 35d5493e9d6133a7b4493c70fce01de26b3688bf..dfa665b25da23ce2475040899d89143153363934 100644
|
| --- a/src/IceGlobalContext.h
|
| +++ b/src/IceGlobalContext.h
|
| @@ -15,8 +15,8 @@
|
| #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
|
| #define SUBZERO_SRC_ICEGLOBALCONTEXT_H
|
|
|
| -#include <memory>
|
| #include <mutex>
|
| +#include <thread>
|
|
|
| #include "IceDefs.h"
|
| #include "IceClFlags.h"
|
| @@ -24,6 +24,7 @@
|
| #include "IceRNG.h"
|
| #include "IceTimerTree.h"
|
| #include "IceTypes.h"
|
| +#include "IceUtils.h"
|
|
|
| namespace Ice {
|
|
|
| @@ -31,8 +32,6 @@ class ClFlags;
|
| class ConstantPool;
|
| class FuncSigType;
|
|
|
| -typedef std::mutex GlobalLockType;
|
| -
|
| // LockedPtr is a way to provide automatically locked access to some object.
|
| template <typename T> class LockedPtr {
|
| LockedPtr() = delete;
|
| @@ -102,14 +101,7 @@ public:
|
| IceString TestPrefix, const ClFlags &Flags);
|
| ~GlobalContext();
|
|
|
| - // Returns true if any of the specified options in the verbose mask
|
| - // are set. If the argument is omitted, it checks if any verbose
|
| - // options at all are set.
|
| VerboseMask getVerbose() const { return VMask; }
|
| - bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
|
| - void setVerbose(VerboseMask Mask) { VMask = Mask; }
|
| - void addVerbose(VerboseMask Mask) { VMask |= Mask; }
|
| - void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
|
|
|
| // The dump and emit streams need to be used by only one thread at a
|
| // time. This is done by exclusively reserving the streams via
|
| @@ -129,6 +121,9 @@ public:
|
|
|
| TargetArch getTargetArch() const { return Arch; }
|
| OptLevel getOptLevel() const { return Opt; }
|
| + LockedPtr<ErrorCode> getErrorStatus() {
|
| + return LockedPtr<ErrorCode>(&ErrorStatus, &ErrorStatusLock);
|
| + }
|
|
|
| // When emitting assembly, we allow a string to be prepended to
|
| // names of translated functions. This makes it easier to create an
|
| @@ -229,34 +224,107 @@ public:
|
| void dumpTimers(TimerStackIdT StackID = TSK_Default,
|
| bool DumpCumulative = true);
|
|
|
| + // Adds a newly parsed and constructed function to the Cfg work
|
| + // queue. Notifies any idle workers that a new function is
|
| + // available for translating. May block if the work queue is too
|
| + // large, in order to control memory footprint.
|
| + void cfgQueueBlockingPush(Cfg *Func) { CfgQ.blockingPush(Func); }
|
| + // Takes a Cfg from the work queue for translating. May block if
|
| + // the work queue is currently empty. Returns nullptr if there is
|
| + // no more work - the queue is empty and either end() has been
|
| + // called or the Sequential flag was set.
|
| + Cfg *cfgQueueBlockingPop() { return CfgQ.blockingPop(); }
|
| + // Notifies that no more work will be added to the work queue.
|
| + void cfgQueueNotifyEnd() { CfgQ.notifyEnd(); }
|
| +
|
| + void startWorkerThreads() {
|
| + size_t NumWorkers = getFlags().NumTranslationThreads;
|
| + for (size_t i = 0; i < NumWorkers; ++i) {
|
| + ThreadContext *WorkerTLS = new ThreadContext();
|
| + AllThreadContexts.push_back(WorkerTLS);
|
| + TranslationThreads.push_back(std::thread(
|
| + &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
|
| + }
|
| + if (NumWorkers) {
|
| + // TODO(stichnot): start a new thread for the emitter queue worker.
|
| + }
|
| + }
|
| +
|
| + void waitForWorkerThreads() {
|
| + cfgQueueNotifyEnd();
|
| + // TODO(stichnot): call end() on the emitter work queue.
|
| + for (std::thread &Worker : TranslationThreads) {
|
| + Worker.join();
|
| + }
|
| + TranslationThreads.clear();
|
| + // TODO(stichnot): join the emitter thread.
|
| + }
|
| +
|
| + // Translation thread startup routine.
|
| + void translateFunctionsWrapper(ThreadContext *MyTLS) {
|
| + ICE_TLS_SET_FIELD(TLS, MyTLS);
|
| + translateFunctions();
|
| + }
|
| + // Translate functions from the Cfg queue until the queue is empty.
|
| + void translateFunctions();
|
| +
|
| + // Utility function to match a symbol name against a match string.
|
| + // This is used in a few cases where we want to take some action on
|
| + // a particular function or symbol based on a command-line argument,
|
| + // such as changing the verbose level for a particular function. An
|
| + // empty Match argument means match everything. Returns true if
|
| + // there is a match.
|
| + static bool matchSymbolName(const IceString &SymbolName,
|
| + const IceString &Match) {
|
| + return Match.empty() || Match == SymbolName;
|
| + }
|
| +
|
| private:
|
| - // Try to make sure the mutexes are allocated on separate cache
|
| - // lines, assuming the maximum cache line size is 64.
|
| - const static size_t MaxCacheLineSize = 64;
|
| - alignas(MaxCacheLineSize) GlobalLockType AllocLock;
|
| - alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock;
|
| - alignas(MaxCacheLineSize) GlobalLockType StatsLock;
|
| - alignas(MaxCacheLineSize) GlobalLockType TimerLock;
|
| + // Try to ensure mutexes are allocated on separate cache lines.
|
| +
|
| + ICE_CACHELINE_BOUNDARY;
|
| + // Managed by getAllocator()
|
| + GlobalLockType AllocLock;
|
| + ArenaAllocator<> Allocator;
|
|
|
| + ICE_CACHELINE_BOUNDARY;
|
| + // Managed by getConstantPool()
|
| + GlobalLockType ConstPoolLock;
|
| + std::unique_ptr<ConstantPool> ConstPool;
|
| +
|
| + ICE_CACHELINE_BOUNDARY;
|
| + // Managed by getErrorStatus()
|
| + GlobalLockType ErrorStatusLock;
|
| + ErrorCode ErrorStatus;
|
| +
|
| + ICE_CACHELINE_BOUNDARY;
|
| + // Managed by getStatsCumulative()
|
| + GlobalLockType StatsLock;
|
| + CodeStats StatsCumulative;
|
| +
|
| + ICE_CACHELINE_BOUNDARY;
|
| + // Managed by getTimers()
|
| + GlobalLockType TimerLock;
|
| + std::vector<TimerStack> Timers;
|
| +
|
| + ICE_CACHELINE_BOUNDARY;
|
| // StrLock is a global lock on the dump and emit output streams.
|
| typedef std::mutex StrLockType;
|
| StrLockType StrLock;
|
| -
|
| Ostream *StrDump; // Stream for dumping / diagnostics
|
| Ostream *StrEmit; // Stream for code emission
|
|
|
| - ArenaAllocator<> Allocator;
|
| - VerboseMask VMask;
|
| - std::unique_ptr<ConstantPool> ConstPool;
|
| + ICE_CACHELINE_BOUNDARY;
|
| +
|
| + const VerboseMask VMask;
|
| Intrinsics IntrinsicsInfo;
|
| const TargetArch Arch;
|
| const OptLevel Opt;
|
| const IceString TestPrefix;
|
| const ClFlags &Flags;
|
| - RandomNumberGenerator RNG;
|
| + RandomNumberGenerator RNG; // TODO(stichnot): Move into Cfg.
|
| std::unique_ptr<ELFObjectWriter> ObjectWriter;
|
| - CodeStats StatsCumulative;
|
| - std::vector<TimerStack> Timers;
|
| + BoundedProducerConsumerQueue<Cfg> CfgQ;
|
|
|
| LockedPtr<ArenaAllocator<>> getAllocator() {
|
| return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
|
| @@ -272,6 +340,7 @@ private:
|
| }
|
|
|
| std::vector<ThreadContext *> AllThreadContexts;
|
| + std::vector<std::thread> TranslationThreads;
|
| // Each thread has its own TLS pointer which is also held in
|
| // AllThreadContexts.
|
| ICE_TLS_DECLARE_FIELD(ThreadContext *, TLS);
|
|
|