src/IceGlobalContext.h - Issue 870653002: Subzero: Initial implementation of multithreaded translation.

Side by Side Diff: src/IceGlobalContext.h

Issue 870653002: Subzero: Initial implementation of multithreaded translation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Code review changes, continued Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceGlobalContext.h - Global context defs ------ C++ --===//	1 //===- subzero/src/IceGlobalContext.h - Global context defs ------ C++ --===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file declares aspects of the compilation that persist across	10 // This file declares aspects of the compilation that persist across

11 // multiple functions.	11 // multiple functions.

12 //	12 //

13 //===----------------------------------------------------------------------===//	13 //===----------------------------------------------------------------------===//

14	14

15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H	15 #ifndef SUBZERO_SRC_ICEGLOBALCONTEXT_H

16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H	16 #define SUBZERO_SRC_ICEGLOBALCONTEXT_H

17	17

18 #include <memory>

19 #include <mutex>	18 #include <mutex>

	19 #include <queue>

	20 #include <thread>

20	21

21 #include "IceDefs.h"	22 #include "IceDefs.h"

22 #include "IceClFlags.h"	23 #include "IceClFlags.h"

23 #include "IceIntrinsics.h"	24 #include "IceIntrinsics.h"

24 #include "IceRNG.h"	25 #include "IceRNG.h"

25 #include "IceTimerTree.h"	26 #include "IceTimerTree.h"

26 #include "IceTypes.h"	27 #include "IceTypes.h"

27	28

28 namespace Ice {	29 namespace Ice {

29	30

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
89 class ThreadContext {	90 class ThreadContext {

90 ThreadContext(const ThreadContext &) = delete;	91 ThreadContext(const ThreadContext &) = delete;

91 ThreadContext &operator=(const ThreadContext &) = delete;	92 ThreadContext &operator=(const ThreadContext &) = delete;

92	93

93 public:	94 public:

94 ThreadContext() {}	95 ThreadContext() {}

95 CodeStats StatsFunction;	96 CodeStats StatsFunction;

96 std::vector<TimerStack> Timers;	97 std::vector<TimerStack> Timers;

97 };	98 };

98	99

	100 // CfgQueue is the translation work queue. It allows multiple

	101 // producers and multiple consumers (though currently only a single

	102 // producer is used). The producer adds entries using add(), and

	103 // may block if the queue is "full" to control Cfg memory footprint.

	104 // The producer uses end() to indicate that no more entries will be

	105 // added. The consumer removes an item using get(), which will

	106 // return nullptr if end() has been called and the queue is empty.

	107 //

	108 // The MaxSize ctor arg controls the maximum size the queue can grow

	109 // to. The Sequential arg indicates purely sequential execution in

	110 // which the single thread should never wait().
	JF 2015/01/23 23:01:47 Shouldn't the locks be entirely bypassed when sequ Shouldn't the locks be entirely bypassed when sequential? That would make the non-sequential code easier to read, and you'd probably have sequential and non-sequential tsan tests. Jim Stichnoth 2015/01/25 07:29:38 Nice, done. Show quoted text On 2015/01/23 23:01:47, JF wrote: > Shouldn't the locks be entirely bypassed when sequential? That would make the > non-sequential code easier to read, and you'd probably have sequential and > non-sequential tsan tests. Nice, done.
	111 //

	112 // Two condition variables are used in the implementation.

	113 // GrewOrEnded signals waiting workers that the producer has changed

	114 // the state of the queue. Shrunk signals a blocked producer that a

	115 // consumer has changed the state of the queue.

	116 class CfgQueue {

	117 public:

	118 CfgQueue(size_t MaxSize, bool Sequential)

	119 : IsEnded(false), MaxSize(MaxSize), Sequential(Sequential) {}
	JF 2015/01/23 23:01:47 Should the CfgQueue assert that the queue is empty Should the CfgQueue assert that the queue is empty (I guess it would need to grab the lock for this to work)? It seems unlikely to get messed up, but it would be tricky to find if it is! Jim Stichnoth 2015/01/25 07:29:38 I don't see how the WorkQueue could ever be non-em Show quoted text On 2015/01/23 23:01:47, JF wrote: > Should the CfgQueue assert that the queue is empty (I guess it would need to > grab the lock for this to work)? It seems unlikely to get messed up, but it > would be tricky to find if it is! I don't see how the WorkQueue could ever be non-empty in the CfgQueue ctor?
	120 void add(Cfg *Func) {

	121 std::unique_lock<GlobalLockType> L(Lock);

	122 // If the work queue is already "full", wait for a consumer to

	123 // grab an element and shrink the queue.

	124 while (!Sequential && WorkQueue.size() >= MaxSize) {

	125 Shrunk.wait(L);

	126 }

	127 WorkQueue.push(Func);

	128 L.unlock();

	129 GrewOrEnded.notify_one();

	130 }

	131 Cfg *get() {

	132 std::unique_lock<GlobalLockType> L(Lock);

	133 while (!IsEnded \|\| !WorkQueue.empty()) {

	134 if (!WorkQueue.empty()) {

	135 Cfg *Func = WorkQueue.front();

	136 WorkQueue.pop();

	137 L.unlock();

	138 Shrunk.notify_one();

	139 return Func;

	140 }

	141 // If the work queue is empty, and this is pure sequential

	142 // execution, then return nullptr.

	143 if (Sequential)

	144 return nullptr;

	145 GrewOrEnded.wait(L);

	146 }

	147 return nullptr;

	148 }

	149 void end() {

	150 std::unique_lock<GlobalLockType> L(Lock);

	151 IsEnded = true;

	152 L.unlock();

	153 GrewOrEnded.notify_all();

	154 }

	155

	156 private:

	157 // WorkQueue and Lock are read/written by all.

	158 // TODO(stichnot): Since WorkQueue has an enforced maximum size,

	159 // implement it on top of something like std::array to minimize

	160 // contention.

	161 alignas(MaxCacheLineSize) std::queue<Cfg *> WorkQueue;

	162 // Lock guards access to WorkQueue and IsEnded.

	163 alignas(MaxCacheLineSize) GlobalLockType Lock;

	164

	165 // IsEnded and GrewOrEnded are written by the producer and read by

	166 // the consumers.

	167 alignas(MaxCacheLineSize) bool IsEnded;
	JF 2015/01/23 22:22:11 Move to end with Sequential: it's only written to Move to end with Sequential: it's only written to once, during normal operation it's only read. Jim Stichnoth 2015/01/25 07:29:38 Done. Show quoted text On 2015/01/23 22:22:11, JF wrote: > Move to end with Sequential: it's only written to once, during normal operation > it's only read. Done.
	168 // GrewOrEnded is notified (by the producer) when something is

	169 // added to the queue, in case consumers are waiting for a

	170 // non-empty queue.

	171 std::condition_variable GrewOrEnded;

	172

	173 // Shrunk is notified (by the consumer) when something is removed

	174 // from the queue, in case the producer is waiting for the queue

	175 // to drop below maximum capacity. It is written by the consumers

	176 // and read by the producer.

	177 alignas(MaxCacheLineSize) std::condition_variable Shrunk;

	178

	179 // MaxSize and Sequential are read by all and written by none.

	180 alignas(MaxCacheLineSize) const size_t MaxSize;

	181 const bool Sequential;

	182 };

	183

99 public:	184 public:

100 GlobalContext(Ostream OsDump, Ostream OsEmit, ELFStreamer *ELFStreamer,	185 GlobalContext(Ostream OsDump, Ostream OsEmit, ELFStreamer *ELFStreamer,

101 VerboseMask Mask, TargetArch Arch, OptLevel Opt,	186 VerboseMask Mask, TargetArch Arch, OptLevel Opt,

102 IceString TestPrefix, const ClFlags &Flags);	187 IceString TestPrefix, const ClFlags &Flags);

103 ~GlobalContext();	188 ~GlobalContext();

104	189

105 // Returns true if any of the specified options in the verbose mask

106 // are set. If the argument is omitted, it checks if any verbose

107 // options at all are set.

108 VerboseMask getVerbose() const { return VMask; }	190 VerboseMask getVerbose() const { return VMask; }

109 bool isVerbose(VerboseMask Mask = IceV_All) const { return VMask & Mask; }

110 void setVerbose(VerboseMask Mask) { VMask = Mask; }

111 void addVerbose(VerboseMask Mask) { VMask \|= Mask; }

112 void subVerbose(VerboseMask Mask) { VMask &= ~Mask; }

113	191

114 // The dump and emit streams need to be used by only one thread at a	192 // The dump and emit streams need to be used by only one thread at a

115 // time. This is done by exclusively reserving the streams via	193 // time. This is done by exclusively reserving the streams via

116 // lockStr() and unlockStr(). The OstreamLocker class can be used	194 // lockStr() and unlockStr(). The OstreamLocker class can be used

117 // to conveniently manage this.	195 // to conveniently manage this.

118 //	196 //

119 // The model is that a thread grabs the stream lock, then does an	197 // The model is that a thread grabs the stream lock, then does an

120 // arbitrary amount of work during which far-away callees may grab	198 // arbitrary amount of work during which far-away callees may grab

121 // the stream and do something with it, and finally the thread	199 // the stream and do something with it, and finally the thread

122 // releases the stream lock. This allows large chunks of output to	200 // releases the stream lock. This allows large chunks of output to

123 // be dumped or emitted without risking interleaving from multiple	201 // be dumped or emitted without risking interleaving from multiple

124 // threads.	202 // threads.

125 void lockStr() { StrLock.lock(); }	203 void lockStr() { StrLock.lock(); }

126 void unlockStr() { StrLock.unlock(); }	204 void unlockStr() { StrLock.unlock(); }

127 Ostream &getStrDump() { return *StrDump; }	205 Ostream &getStrDump() { return *StrDump; }

128 Ostream &getStrEmit() { return *StrEmit; }	206 Ostream &getStrEmit() { return *StrEmit; }

129	207

130 TargetArch getTargetArch() const { return Arch; }	208 TargetArch getTargetArch() const { return Arch; }

131 OptLevel getOptLevel() const { return Opt; }	209 OptLevel getOptLevel() const { return Opt; }

	210 std::error_code getErrorStatus() const { return ErrorStatus; }

132	211

133 // When emitting assembly, we allow a string to be prepended to	212 // When emitting assembly, we allow a string to be prepended to

134 // names of translated functions. This makes it easier to create an	213 // names of translated functions. This makes it easier to create an

135 // execution test against a reference translator like llc, with both	214 // execution test against a reference translator like llc, with both

136 // translators using the same bitcode as input.	215 // translators using the same bitcode as input.

137 IceString getTestPrefix() const { return TestPrefix; }	216 IceString getTestPrefix() const { return TestPrefix; }

138 IceString mangleName(const IceString &Name) const;	217 IceString mangleName(const IceString &Name) const;

139	218

140 // Manage Constants.	219 // Manage Constants.

141 // getConstant*() functions are not const because they might add	220 // getConstant*() functions are not const because they might add

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
222	301

223 TimerStackIdT newTimerStackID(const IceString &Name);	302 TimerStackIdT newTimerStackID(const IceString &Name);

224 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);	303 TimerIdT getTimerID(TimerStackIdT StackID, const IceString &Name);

225 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);	304 void pushTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);

226 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);	305 void popTimer(TimerIdT ID, TimerStackIdT StackID = TSK_Default);

227 void resetTimer(TimerStackIdT StackID);	306 void resetTimer(TimerStackIdT StackID);

228 void setTimerName(TimerStackIdT StackID, const IceString &NewName);	307 void setTimerName(TimerStackIdT StackID, const IceString &NewName);

229 void dumpTimers(TimerStackIdT StackID = TSK_Default,	308 void dumpTimers(TimerStackIdT StackID = TSK_Default,

230 bool DumpCumulative = true);	309 bool DumpCumulative = true);

231	310

	311 // Adds a newly parsed and constructed function to the Cfg work

	312 // queue. Notifies any idle workers that a new function is

	313 // available for translating. May block if the work queue is too

	314 // large, in order to control memory footprint.

	315 void cfgQueueAdd(Cfg *Func) { CfgQ.add(Func); }

	316 // Takes a Cfg from the work queue for translating. May block if

	317 // the work queue is currently empty. Returns nullptr if there is

	318 // no more work - the queue is empty and either end() has been

	319 // called or the Sequential flag was set.

	320 Cfg *cfgQueueGet() { return CfgQ.get(); }

	321 // Notifies that no more work will be added to the work queue.

	322 void cfgQueueEnd() { CfgQ.end(); }

	323

	324 void startWorkerThreads() {

	325 size_t NumWorkers = getFlags().NumTranslationThreads;

	326 for (size_t i = 0; i < NumWorkers; ++i) {

	327 ThreadContext *WorkerTLS = new ThreadContext();

	328 AllThreadContexts.push_back(WorkerTLS);

	329 TranslationThreads.push_back(std::thread(

	330 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));

	331 }

	332 if (NumWorkers) {

	333 // TODO(stichnot): start a new thread for the emitter queue worker.

	334 }

	335 }

	336

	337 void waitForWorkerThreads() {

	338 cfgQueueEnd();

	339 // TODO(stichnot): call end() on the emitter work queue.

	340 for (std::thread &Worker : TranslationThreads) {

	341 Worker.join();

	342 }

	343 TranslationThreads.clear();

	344 // TODO(stichnot): join the emitter thread.

	345 }

	346

	347 // Translation thread startup routine.

	348 void translateFunctionsWrapper(ThreadContext *MyTLS) {

	349 TLS = MyTLS;

	350 translateFunctions();

	351 }

	352 // Translate functions from the Cfg queue until the queue is empty.

	353 void translateFunctions();

	354

	355 // Utility function to match a symbol name against a match string.

	356 // This is used in a few cases where we want to take some action on

	357 // a particular function or symbol based on a command-line argument,

	358 // such as changing the verbose level for a particular function. An

	359 // empty Match argument means match everything. Returns true if

	360 // there is a match.

	361 static bool matchSymbolName(const IceString &SymbolName,

	362 const IceString &Match) {

	363 return Match.empty() \|\| Match == SymbolName;

	364 }

	365

232 private:	366 private:

233 // Try to make sure the mutexes are allocated on separate cache	367 // Try to ensure mutexes are allocated on separate cache lines.

234 // lines, assuming the maximum cache line size is 64.

235 const static size_t MaxCacheLineSize = 64;

236 alignas(MaxCacheLineSize) GlobalLockType AllocLock;	368 alignas(MaxCacheLineSize) GlobalLockType AllocLock;

237 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock;	369 alignas(MaxCacheLineSize) GlobalLockType ConstPoolLock;

238 alignas(MaxCacheLineSize) GlobalLockType StatsLock;	370 alignas(MaxCacheLineSize) GlobalLockType StatsLock;

239 alignas(MaxCacheLineSize) GlobalLockType TimerLock;	371 alignas(MaxCacheLineSize) GlobalLockType TimerLock;

240	372

241 // StrLock is a global lock on the dump and emit output streams.	373 // StrLock is a global lock on the dump and emit output streams.

242 typedef std::mutex StrLockType;	374 typedef std::mutex StrLockType;

243 StrLockType StrLock;	375 StrLockType StrLock;

244	376

245 Ostream *StrDump; // Stream for dumping / diagnostics	377 Ostream *StrDump; // Stream for dumping / diagnostics

246 Ostream *StrEmit; // Stream for code emission	378 Ostream *StrEmit; // Stream for code emission

247	379

248 ArenaAllocator<> Allocator;	380 ArenaAllocator<> Allocator;

249 VerboseMask VMask;	381 VerboseMask VMask;

250 std::unique_ptr<ConstantPool> ConstPool;	382 std::unique_ptr<ConstantPool> ConstPool;

251 Intrinsics IntrinsicsInfo;	383 Intrinsics IntrinsicsInfo;

252 const TargetArch Arch;	384 const TargetArch Arch;

253 const OptLevel Opt;	385 const OptLevel Opt;

254 const IceString TestPrefix;	386 const IceString TestPrefix;

255 const ClFlags &Flags;	387 const ClFlags &Flags;

256 RandomNumberGenerator RNG;	388 RandomNumberGenerator RNG;

257 std::unique_ptr<ELFObjectWriter> ObjectWriter;	389 std::unique_ptr<ELFObjectWriter> ObjectWriter;

258 CodeStats StatsCumulative;	390 CodeStats StatsCumulative;

259 std::vector<TimerStack> Timers;	391 std::vector<TimerStack> Timers;

	392 CfgQueue CfgQ;

	393 std::error_code ErrorStatus;

260	394

261 LockedPtr<ArenaAllocator<>> getAllocator() {	395 LockedPtr<ArenaAllocator<>> getAllocator() {

262 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);	396 return LockedPtr<ArenaAllocator<>>(&Allocator, &AllocLock);

263 }	397 }

264 LockedPtr<ConstantPool> getConstPool() {	398 LockedPtr<ConstantPool> getConstPool() {

265 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);	399 return LockedPtr<ConstantPool>(ConstPool.get(), &ConstPoolLock);

266 }	400 }

267 LockedPtr<CodeStats> getStatsCumulative() {	401 LockedPtr<CodeStats> getStatsCumulative() {

268 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);	402 return LockedPtr<CodeStats>(&StatsCumulative, &StatsLock);

269 }	403 }

270 LockedPtr<std::vector<TimerStack>> getTimers() {	404 LockedPtr<std::vector<TimerStack>> getTimers() {

271 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);	405 return LockedPtr<std::vector<TimerStack>>(&Timers, &TimerLock);

272 }	406 }

273	407

274 std::vector<ThreadContext *> AllThreadContexts;	408 std::vector<ThreadContext *> AllThreadContexts;

	409 std::vector<std::thread> TranslationThreads;

275 // Each thread has its own TLS pointer which is also held in	410 // Each thread has its own TLS pointer which is also held in

276 // AllThreadContexts.	411 // AllThreadContexts.

277 ICE_ATTRIBUTE_TLS static ThreadContext *TLS;	412 ICE_ATTRIBUTE_TLS static ThreadContext *TLS;

278	413

279 // Private helpers for mangleName()	414 // Private helpers for mangleName()

280 typedef llvm::SmallVector<char, 32> ManglerVector;	415 typedef llvm::SmallVector<char, 32> ManglerVector;

281 void incrementSubstitutions(ManglerVector &OldName) const;	416 void incrementSubstitutions(ManglerVector &OldName) const;

282 };	417 };

283	418

284 // Helper class to push and pop a timer marker. The constructor	419 // Helper class to push and pop a timer marker. The constructor

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
322 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }	457 explicit OstreamLocker(GlobalContext *Ctx) : Ctx(Ctx) { Ctx->lockStr(); }

323 ~OstreamLocker() { Ctx->unlockStr(); }	458 ~OstreamLocker() { Ctx->unlockStr(); }

324	459

325 private:	460 private:

326 GlobalContext *const Ctx;	461 GlobalContext *const Ctx;

327 };	462 };

328	463

329 } // end of namespace Ice	464 } // end of namespace Ice

330	465

331 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H	466 #endif // SUBZERO_SRC_ICEGLOBALCONTEXT_H

OLD	NEW

« no previous file with comments | « src/IceDefs.h ('k') | src/IceGlobalContext.cpp » ('j') | src/IceTranslator.h » ('J')