OLD | NEW |
(Empty) | |
| 1 //===- ThreadedFunctionQueue.h - Function work units for threads -*- C++ -*-==// |
| 2 // |
| 3 // The LLVM Compiler Infrastructure |
| 4 // |
| 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. |
| 7 // |
| 8 //===----------------------------------------------------------------------===// |
| 9 |
| 10 #ifndef THREADEDFUNCTIONQUEUE_H |
| 11 #define THREADEDFUNCTIONQUEUE_H |
| 12 |
| 13 #include <limits> |
| 14 |
| 15 #include "llvm/IR/Module.h" |
| 16 #include "llvm/Support/ErrorHandling.h" |
| 17 |
| 18 // A "queue" that keeps track of which functions have been assigned to |
| 19 // threads and which functions have not yet been assigned. It does not |
| 20 // actually use a queue data structure and instead uses a number which |
| 21 // tracks the minimum unassigned function ID, expecting each thread |
| 22 // to have the same view of function IDs. |
| 23 class ThreadedFunctionQueue { |
| 24 public: |
| 25 ThreadedFunctionQueue(Module *mod, unsigned NumThreads) |
| 26 : NumThreads(NumThreads), |
| 27 NumFunctions(mod->getFunctionList().size()), |
| 28 CurrentFunction(0) { |
| 29 assert(NumThreads > 0); |
| 30 size_t size = mod->getFunctionList().size(); |
| 31 // NOTE: NumFunctions is only the approximate number of functions. |
| 32 // Some additional declarations will be added by other passes: |
| 33 // AddPNaClExternalDeclsPass (for setjmp/longjmp and nacl variants), |
| 34 // and the NaCl Atomics pass adds declarations of NaCl atomic intrinsics. |
| 35 // So make the unsigned -> signed range check a little more conservative, |
| 36 // because CurrentFunction may be incremented beyond NumFunctions. |
| 37 const int kSlack = 2048; |
| 38 if (size > static_cast<size_t>(std::numeric_limits<int>::max() - kSlack)) |
| 39 report_fatal_error("Too many functions"); |
| 40 NumFunctions = size; |
| 41 } |
| 42 ~ThreadedFunctionQueue() {} |
| 43 |
| 44 // Assign functions in a static manner between threads. |
| 45 bool GrabFunctionStatic(int FuncIndex, unsigned ThreadIndex) const { |
| 46 // Note: This assumes NumThreads == SplitModuleCount, so that |
| 47 // (a) every function of every module is covered by the NumThreads and |
| 48 // (b) no function is covered twice by the threads. |
| 49 assert(ThreadIndex < NumThreads); |
| 50 return FuncIndex % NumThreads == ThreadIndex; |
| 51 } |
| 52 |
| 53 // Assign functions between threads dynamically. |
| 54 // Returns true if FuncIndex is unassigned and the calling thread |
| 55 // is assigned functions [FuncIndex, FuncIndex + ChunkSize). |
| 56 // Returns false if the calling thread is not assigned functions |
| 57 // [FuncIndex, FuncIndex + ChunkSize). |
| 58 // |
| 59 // NextIndex will be set to the next unassigned function ID, so that |
| 60 // the calling thread will know which function ID to attempt to grab |
| 61 // next. Each thread may have a different value for the ideal ChunkSize |
| 62 // so it is hard to predict the next available function solely based |
| 63 // on incrementing by ChunkSize. |
| 64 bool GrabFunctionDynamic(int FuncIndex, unsigned ChunkSize, |
| 65 int &NextIndex) { |
| 66 int Cur = CurrentFunction; |
| 67 if (FuncIndex < Cur) { |
| 68 NextIndex = Cur; |
| 69 return false; |
| 70 } |
| 71 NextIndex = Cur + ChunkSize; |
| 72 int Index; |
| 73 if (Cur == (Index = __sync_val_compare_and_swap(&CurrentFunction, |
| 74 Cur, NextIndex))) { |
| 75 return true; |
| 76 } |
| 77 // If this thread did not grab the function, its idea of NextIndex |
| 78 // may be incorrect since ChunkSize can vary between threads. |
| 79 // Reset NextIndex in that case. |
| 80 NextIndex = Index; |
| 81 return false; |
| 82 } |
| 83 |
| 84 // Returns a recommended ChunkSize for use in calling GrabFunctionDynamic(). |
| 85 // ChunkSize starts out "large" to reduce synchronization cost. However, |
| 86 // it cannot be too large, otherwise it will encompass too many bytes |
| 87 // and defeats streaming translation. Assigning too many functions to |
| 88 // a single thread also throws off load balancing, so the ChunkSize is |
| 89 // reduced when the remaining number of functions is low so that |
| 90 // load balancing can be achieved near the end. |
| 91 unsigned RecommendedChunkSize() const { |
| 92 // Since NumFunctions may be less than the actual number of functions |
| 93 // (see note in ctor), clamp remaining functions to 1. |
| 94 int ApproxRemainingFuncs = std::max(NumFunctions - CurrentFunction, 1); |
| 95 int DynamicChunkSize = ApproxRemainingFuncs / (NumThreads * 4); |
| 96 return std::max(1, std::min(8, DynamicChunkSize)); |
| 97 } |
| 98 |
| 99 private: |
| 100 const unsigned NumThreads; |
| 101 int NumFunctions; |
| 102 volatile int CurrentFunction; |
| 103 |
| 104 ThreadedFunctionQueue( |
| 105 const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; |
| 106 void operator=(const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; |
| 107 }; |
| 108 |
| 109 #endif |
OLD | NEW |