OLD | NEW |
---|---|
(Empty) | |
1 //===- ThreadedFunctionQueue.h - Function work units for threads -*- C++ -*-==// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 | |
10 #ifndef THREADEDFUNCTIONQUEUE_H | |
11 #define THREADEDFUNCTIONQUEUE_H | |
12 | |
13 #include "llvm/IR/Module.h" | |
14 | |
15 // A "queue" that keeps track of which functions have been assigned to | |
16 // threads and which functions have not yet been assigned. It does not | |
17 // actually use a queue data structure and instead uses a number which | |
18 // tracks the minimum unassigned function ID, expecting each thread | |
19 // to have the same view of function IDs. | |
20 class ThreadedFunctionQueue { | |
21 public: | |
22 ThreadedFunctionQueue(Module *mod, unsigned SplitModuleCount) | |
23 : SplitModuleCount(SplitModuleCount), | |
JF
2014/03/19 04:47:05
Silly corner case, but checking that SplitModuleCo
jvoung (off chromium)
2014/03/19 18:44:37
Done.
| |
24 NumFunctions(mod->getFunctionList().size()), | |
25 CurrentFunction(0) { | |
26 // NOTE: NumFunctions is only the approximate number of functions. | |
27 // Some additional declarations will be added by other passes: | |
28 // AddPNaClExternalDeclsPass (for setjmp/longjmp and nacl variants), | |
29 // and the NaCl Atomics pass adds declarations of NaCl atomic intrinsics. | |
30 } | |
31 ~ThreadedFunctionQueue() {} | |
32 | |
33 // Assign functions in a static manner between threads. | |
34 bool GrabFunctionStatic(unsigned FuncIndex, unsigned ThreadIndex) const { | |
35 return FuncIndex % SplitModuleCount == ThreadIndex; | |
JF
2014/03/19 04:47:05
IIUC this won't work if SplitModuleCount < ThreadI
jvoung (off chromium)
2014/03/19 18:44:37
It's pretty unlikely SplitModuleCount < ThreadInde
JF
2014/03/19 20:26:08
It would be useful to assert on this value then, w
jvoung (off chromium)
2014/03/19 21:45:17
Done.
| |
36 } | |
37 | |
38 // Assign functions between threads dynamically. | |
39 // Returns true if FuncIndex is unassigned and the calling thread | |
40 // is assigned functions [FuncIndex, FuncIndex + ChunkSize). | |
41 // Returns false if the calling thread is not assigned functions | |
42 // [FuncIndex, FuncIndex + ChunkSize). | |
43 // | |
44 // NextIndex will be set to the next unassigned function ID, so that | |
45 // the calling thread will know which function ID to attempt to grab | |
46 // next. Each thread may have a different value for the ideal ChunkSize | |
47 // so it is hard to predict the next available function solely based | |
48 // on incrementing by ChunkSize. | |
49 bool GrabFunctionDynamic(unsigned FuncIndex, unsigned ChunkSize, | |
50 unsigned &NextIndex) { | |
51 unsigned Cur = CurrentFunction; | |
52 if (FuncIndex < Cur) { | |
53 NextIndex = Cur; | |
54 return false; | |
55 } | |
56 NextIndex = Cur + ChunkSize; | |
57 return __sync_bool_compare_and_swap(&CurrentFunction, Cur, NextIndex); | |
JF
2014/03/19 04:47:05
The failure case here doesn't actually set NextInd
jvoung (off chromium)
2014/03/19 18:44:37
Good catch -- I'll see if the cmpxchange succeeded
JF
2014/03/19 20:26:08
That works, but it's cleaner to just use:
if (Cur
jvoung (off chromium)
2014/03/19 21:45:17
Done.
| |
58 } | |
59 | |
60 // Returns a recommended ChunkSize for use in calling GrabFunctionDynamic(). | |
61 // ChunkSize starts out "large" to reduce synchronization cost. However, | |
62 // it cannot be too large, otherwise it will encompass too many bytes | |
63 // and defeats streaming translation. Assigning too many functions to | |
64 // a single thread also throws off load balancing, so the ChunkSize is | |
65 // reduced when the remaining number of functions is low so that | |
66 // load balancing can be achieved near the end. | |
67 unsigned RecommendedChunkSize() const { | |
68 // Since NumFunctions may be less than the actual number of functions | |
69 // (see note in ctor), clamp remaining functions to 1. | |
70 unsigned ApproxRemainingFuncs = | |
71 std::max((int)(NumFunctions - CurrentFunction), 1); | |
JF
2014/03/19 04:47:05
Converting an unsigned that's not representable as
jvoung (off chromium)
2014/03/19 18:44:37
Note that NumFunctions was unsigned to begin with
| |
72 unsigned DynamicChunkSize = ApproxRemainingFuncs / (SplitModuleCount * 4); | |
73 if (DynamicChunkSize > 8U) | |
74 return 8U; | |
75 else | |
76 return std::max(1U, DynamicChunkSize); | |
JF
2014/03/19 04:47:05
return max(1, min(8, DynamicChunkSize));
jvoung (off chromium)
2014/03/19 18:44:37
Done.
| |
77 } | |
78 | |
79 private: | |
80 const unsigned SplitModuleCount; | |
81 const unsigned NumFunctions; | |
82 volatile unsigned CurrentFunction; | |
83 | |
84 ThreadedFunctionQueue( | |
85 const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; | |
86 void operator=(const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; | |
JF
2014/03/19 04:47:05
Not that it matters since it's deleted, but operat
jvoung (off chromium)
2014/03/19 18:44:37
LLVM's style is to have void, e.g.,
include/llvm/
| |
87 }; | |
88 | |
89 #endif | |
OLD | NEW |