Chromium Code Reviews| Index: tools/pnacl-llc/ThreadedFunctionQueue.h |
| diff --git a/tools/pnacl-llc/ThreadedFunctionQueue.h b/tools/pnacl-llc/ThreadedFunctionQueue.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..54a0e8a378fc6ee58eecb35b5de1e81d307cd3cb |
| --- /dev/null |
| +++ b/tools/pnacl-llc/ThreadedFunctionQueue.h |
| @@ -0,0 +1,89 @@ |
| +//===- ThreadedFunctionQueue.h - Function work units for threads -*- C++ -*-==// |
| +// |
| +// The LLVM Compiler Infrastructure |
| +// |
| +// This file is distributed under the University of Illinois Open Source |
| +// License. See LICENSE.TXT for details. |
| +// |
| +//===----------------------------------------------------------------------===// |
| + |
| +#ifndef THREADEDFUNCTIONQUEUE_H |
| +#define THREADEDFUNCTIONQUEUE_H |
| + |
| +#include "llvm/IR/Module.h" |
| + |
| +// A "queue" that keeps track of which functions have been assigned to |
| +// threads and which functions have not yet been assigned. It does not |
| +// actually use a queue data structure and instead uses a number which |
| +// tracks the minimum unassigned function ID, expecting each thread |
| +// to have the same view of function IDs. |
| +class ThreadedFunctionQueue { |
| + public: |
| + ThreadedFunctionQueue(Module *mod, unsigned SplitModuleCount) |
| + : SplitModuleCount(SplitModuleCount), |
|
JF
2014/03/19 04:47:05
Silly corner case, but checking that SplitModuleCo
jvoung (off chromium)
2014/03/19 18:44:37
Done.
|
| + NumFunctions(mod->getFunctionList().size()), |
| + CurrentFunction(0) { |
| + // NOTE: NumFunctions is only the approximate number of functions. |
| + // Some additional declarations will be added by other passes: |
| + // AddPNaClExternalDeclsPass (for setjmp/longjmp and nacl variants), |
| + // and the NaCl Atomics pass adds declarations of NaCl atomic intrinsics. |
| + } |
| + ~ThreadedFunctionQueue() {} |
| + |
| + // Assign functions in a static manner between threads. |
| + bool GrabFunctionStatic(unsigned FuncIndex, unsigned ThreadIndex) const { |
| + return FuncIndex % SplitModuleCount == ThreadIndex; |
|
JF
2014/03/19 04:47:05
IIUC this won't work if SplitModuleCount < ThreadI
jvoung (off chromium)
2014/03/19 18:44:37
It's pretty unlikely SplitModuleCount < ThreadInde
JF
2014/03/19 20:26:08
It would be useful to assert on this value then, w
jvoung (off chromium)
2014/03/19 21:45:17
Done.
|
| + } |
| + |
| + // Assign functions between threads dynamically. |
| + // Returns true if FuncIndex is unassigned and the calling thread |
| + // is assigned functions [FuncIndex, FuncIndex + ChunkSize). |
| + // Returns false if the calling thread is not assigned functions |
| + // [FuncIndex, FuncIndex + ChunkSize). |
| + // |
| + // NextIndex will be set to the next unassigned function ID, so that |
| + // the calling thread will know which function ID to attempt to grab |
| + // next. Each thread may have a different value for the ideal ChunkSize |
| + // so it is hard to predict the next available function solely based |
| + // on incrementing by ChunkSize. |
| + bool GrabFunctionDynamic(unsigned FuncIndex, unsigned ChunkSize, |
| + unsigned &NextIndex) { |
| + unsigned Cur = CurrentFunction; |
| + if (FuncIndex < Cur) { |
| + NextIndex = Cur; |
| + return false; |
| + } |
| + NextIndex = Cur + ChunkSize; |
| + return __sync_bool_compare_and_swap(&CurrentFunction, Cur, NextIndex); |
|
JF
2014/03/19 04:47:05
The failure case here doesn't actually set NextInd
jvoung (off chromium)
2014/03/19 18:44:37
Good catch -- I'll see if the cmpxchange succeeded
JF
2014/03/19 20:26:08
That works, but it's cleaner to just use:
if (Cur
jvoung (off chromium)
2014/03/19 21:45:17
Done.
|
| + } |
| + |
| + // Returns a recommended ChunkSize for use in calling GrabFunctionDynamic(). |
| + // ChunkSize starts out "large" to reduce synchronization cost. However, |
| + // it cannot be too large, otherwise it will encompass too many bytes |
| + // and defeats streaming translation. Assigning too many functions to |
| + // a single thread also throws off load balancing, so the ChunkSize is |
| + // reduced when the remaining number of functions is low so that |
| + // load balancing can be achieved near the end. |
| + unsigned RecommendedChunkSize() const { |
| + // Since NumFunctions may be less than the actual number of functions |
| + // (see note in ctor), clamp remaining functions to 1. |
| + unsigned ApproxRemainingFuncs = |
| + std::max((int)(NumFunctions - CurrentFunction), 1); |
|
JF
2014/03/19 04:47:05
Converting an unsigned that's not representable as
jvoung (off chromium)
2014/03/19 18:44:37
Note that NumFunctions was unsigned to begin with
|
| + unsigned DynamicChunkSize = ApproxRemainingFuncs / (SplitModuleCount * 4); |
| + if (DynamicChunkSize > 8U) |
| + return 8U; |
| + else |
| + return std::max(1U, DynamicChunkSize); |
|
JF
2014/03/19 04:47:05
return max(1, min(8, DynamicChunkSize));
jvoung (off chromium)
2014/03/19 18:44:37
Done.
|
| + } |
| + |
| + private: |
| + const unsigned SplitModuleCount; |
| + const unsigned NumFunctions; |
| + volatile unsigned CurrentFunction; |
| + |
| + ThreadedFunctionQueue( |
| + const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; |
| + void operator=(const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; |
|
JF
2014/03/19 04:47:05
Not that it matters since it's deleted, but operat
jvoung (off chromium)
2014/03/19 18:44:37
LLVM's style is to have void, e.g.,
include/llvm/
|
| +}; |
| + |
| +#endif |