Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Unified Diff: src/IceGlobalContext.h

Issue 870653002: Subzero: Initial implementation of multithreaded translation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Cleanup Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceGlobalContext.h
diff --git a/src/IceGlobalContext.h b/src/IceGlobalContext.h
index 751147ee48502811e0f37eaf21c92d0dae0ad17c..3583fbf26ecc7b9f4430f0dc47800244c2d68d63 100644
--- a/src/IceGlobalContext.h
+++ b/src/IceGlobalContext.h
@@ -15,8 +15,9 @@
#ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H
#define SUBZERO_SRC_ICEGLOBALCONTEXT_H
-#include <memory>
#include <mutex>
+#include <queue>
+#include <thread>
#include "IceDefs.h"
#include "IceClFlags.h"
@@ -96,20 +97,69 @@ class GlobalContext {
std::vector<TimerStack> Timers;
};
+ class CfgQueue {
+ public:
+ explicit CfgQueue(uint32_t NumWorkers)
+ : NumWorkers(NumWorkers), IsEnded(false) {}
+ void add(Cfg *Func) {
+ std::unique_lock<GlobalLockType> L(Lock);
JF 2015/01/22 20:50:56 Can you add a comment on CfgQueue explaining the l
jvoung (off chromium) 2015/01/22 23:06:06 Yeah, for pnacl-llc, it dequeues N functions at a
Jim Stichnoth 2015/01/23 07:55:54 Done.
Jim Stichnoth 2015/01/23 07:55:55 Even for small functions, my sense is that it take
jvoung (off chromium) 2015/01/23 17:49:09 Sorry I don't have the numbers anymore, but it can
JF 2015/01/23 17:51:02 Code review was: https://codereview.chromium.org
+ // If the work queue is already "full", wait for a consumer to
+ // grab an element and shrink the queue.
+ while (WorkQueue.size() > NumWorkers) {
+ Shrunk.wait(L);
+ }
+ WorkQueue.push(Func);
+ L.unlock();
+ GrewOrEnded.notify_one();
+ }
+ Cfg *get() {
+ std::unique_lock<GlobalLockType> L(Lock);
+ while (!IsEnded || !WorkQueue.empty()) {
+ if (!WorkQueue.empty()) {
+ Cfg *Func = WorkQueue.front();
+ WorkQueue.pop();
+ L.unlock();
+ Shrunk.notify_one();
+ return Func;
+ }
+ // If the work queue is empty, and this is pure sequential
+ // execution, then return nullptr.
+ if (NumWorkers == 0)
+ return nullptr;
+ GrewOrEnded.wait(L);
+ }
+ return nullptr;
+ }
+ void end() {
+ std::unique_lock<GlobalLockType> L(Lock);
+ IsEnded = true;
+ L.unlock();
+ GrewOrEnded.notify_all();
+ }
+
+ private:
+ std::queue<Cfg *> WorkQueue;
JF 2015/01/22 20:50:56 This should probably be an std::array if the size
Jim Stichnoth 2015/01/23 07:55:55 There is just one add() and one get() per function
+ // Lock guards access to WorkQueue and IsEnded.
+ GlobalLockType Lock;
+ // GrewOrEnded is notified (by the producer) when something is
+ // added to the queue, in case consumers are waiting for a
+ // non-empty queue.
+ std::condition_variable GrewOrEnded;
+ // Shrunk is notified (by the consumer) when something is removed
+ // from the queue, in case the producer is waiting for the queue
+ // to drop below maximum capacity.
+ std::condition_variable Shrunk;
+ const uint32_t NumWorkers;
JF 2015/01/22 20:50:56 I'd make this a size_t.
Jim Stichnoth 2015/01/23 07:55:55 Done, here and in IceClFlags.h and llvm2ice.cpp.
+ bool IsEnded;
+ };
+
public:
GlobalContext(Ostream *OsDump, Ostream *OsEmit, ELFStreamer *ELFStreamer,
VerboseMask Mask, TargetArch Arch, OptLevel Opt,
IceString TestPrefix, const ClFlags &Flags);
~GlobalContext();
- // Returns true if any of the specified options in the verbose mask
- // are set. If the argument is omitted, it checks if any verbose
- // options at all are set.
VerboseMask getVerbose() const { return VMask; }
- bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }
- void setVerbose(VerboseMask Mask) { VMask = Mask; }
- void addVerbose(VerboseMask Mask) { VMask |= Mask; }
- void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }
// The dump and emit streams need to be used by only one thread at a
// time. This is done by exclusively reserving the streams via
@@ -129,6 +179,7 @@ public:
TargetArch getTargetArch() const { return Arch; }
OptLevel getOptLevel() const { return Opt; }
+ bool getErrorStatus() const { return ErrorStatus; }
// When emitting assembly, we allow a string to be prepended to
// names of translated functions. This makes it easier to create an
@@ -229,6 +280,58 @@ public:
void dumpTimers(TimerStackIdT StackID = TSK_Default,
bool DumpCumulative = true);
+ // Adds a newly parsed and constructed function to the Cfg work
+ // queue. Notifies any idle workers that a new function is
+ // available for translating. May block if the work queue is too
+ // large, in order to control memory footprint.
+ void cfgQueueAdd(Cfg *Func) { CfgQ.add(Func); }
+ // Takes a Cfg from the work queue for translating. May block if
+ // the work queue is currently empty. Returns nullptr if there is
+ // no more work (in which case the translation thread will probably
+ // just exit).
JF 2015/01/22 20:50:56 "probably"? That seems mostly accurate.
Jim Stichnoth 2015/01/23 07:55:54 Done.
+ Cfg *cfgQueueGet() { return CfgQ.get(); }
+ // Notifies that no more work will be added to the work queue.
+ void cfgQueueEnd() { CfgQ.end(); }
+
+ void startWorkerThreads() {
+ uint32_t NumWorkers = getFlags().NumTranslationThreads;
+ for (uint32_t i = 0; i < NumWorkers; ++i) {
+ ThreadContext *WorkerTLS = new ThreadContext();
+ AllThreadContexts.push_back(WorkerTLS);
+ TranslationThreads.push_back(std::thread(
+ &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
+ }
+ if (NumWorkers) {
+ // TODO(stichnot): start emitter thread
JF 2015/01/22 20:50:56 ?
Jim Stichnoth 2015/01/23 07:55:54 Done.
+ }
+ }
+
+ void waitForWorkerThreads() {
+ cfgQueueEnd();
+ // TODO(stichnot): end the emitter queue
+ for (std::thread &Worker : TranslationThreads) {
+ Worker.join();
+ }
+ TranslationThreads.clear();
+ // TODO(stichnot): join the emitter queue
+ }
+
+ // Translation thread startup routine.
+ void translateFunctionsWrapper(ThreadContext *MyTLS) {
+ TLS = MyTLS;
+ translateFunctions();
+ }
+ // Translate functions from the Cfg queue until the queue is empty.
+ void translateFunctions();
+
+ // Utility function to match a symbol name against a match string.
+ // An empty match string means match everything. Returns true if
+ // there is a match.
+ static bool matchSymbolName(const IceString &SymbolName,
+ const IceString &Match) {
+ return Match.empty() || Match == SymbolName;
+}
JF 2015/01/22 20:50:56 I don't understand what this is for.
Jim Stichnoth 2015/01/23 07:55:54 Hopefully documented better.
+
private:
// Try to make sure the mutexes are allocated on separate cache
// lines, assuming the maximum cache line size is 64.
@@ -257,6 +360,8 @@ private:
std::unique_ptr<ELFObjectWriter> ObjectWriter;
CodeStats StatsCumulative;
std::vector<TimerStack> Timers;
+ CfgQueue CfgQ;
+ bool ErrorStatus;
JF 2015/01/22 20:50:56 Use std::error_code?
Jim Stichnoth 2015/01/23 07:55:54 Done.
LockedPtr<ArenaAllocator<>> getAllocator() {
return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);
@@ -272,6 +377,7 @@ private:
}
std::vector<ThreadContext *> AllThreadContexts;
+ std::vector<std::thread> TranslationThreads;
// Each thread has its own TLS pointer which is also held in
// AllThreadContexts.
thread_local static ThreadContext *TLS;

Powered by Google App Engine
This is Rietveld 408576698