Index: tools/pnacl-llc/ThreadedFunctionQueue.h |
diff --git a/tools/pnacl-llc/ThreadedFunctionQueue.h b/tools/pnacl-llc/ThreadedFunctionQueue.h |
new file mode 100644 |
index 0000000000000000000000000000000000000000..c199b9cf6b6517c2143c6d409363fd07a57ef620 |
--- /dev/null |
+++ b/tools/pnacl-llc/ThreadedFunctionQueue.h |
@@ -0,0 +1,109 @@ |
+//===- ThreadedFunctionQueue.h - Function work units for threads -*- C++ -*-==// |
+// |
+// The LLVM Compiler Infrastructure |
+// |
+// This file is distributed under the University of Illinois Open Source |
+// License. See LICENSE.TXT for details. |
+// |
+//===----------------------------------------------------------------------===// |
+ |
+#ifndef THREADEDFUNCTIONQUEUE_H |
+#define THREADEDFUNCTIONQUEUE_H |
+ |
+#include <limits> |
+ |
+#include "llvm/IR/Module.h" |
+#include "llvm/Support/ErrorHandling.h" |
+ |
+// A "queue" that keeps track of which functions have been assigned to |
+// threads and which functions have not yet been assigned. It does not |
+// actually use a queue data structure and instead uses a number which |
+// tracks the minimum unassigned function ID, expecting each thread |
+// to have the same view of function IDs. |
+class ThreadedFunctionQueue { |
+ public: |
+ ThreadedFunctionQueue(Module *mod, unsigned NumThreads) |
+ : NumThreads(NumThreads), |
+ NumFunctions(mod->getFunctionList().size()), |
+ CurrentFunction(0) { |
+ assert(NumThreads > 0); |
+ size_t size = mod->getFunctionList().size(); |
+ // NOTE: NumFunctions is only the approximate number of functions. |
+ // Some additional declarations will be added by other passes: |
+ // AddPNaClExternalDeclsPass (for setjmp/longjmp and nacl variants), |
+ // and the NaCl Atomics pass adds declarations of NaCl atomic intrinsics. |
+ // So make the unsigned -> signed range check a little more conservative, |
+ // because CurrentFunction may be incremented beyond NumFunctions. |
+ const int kSlack = 2048; |
+ if (size > static_cast<size_t>(std::numeric_limits<int>::max() - kSlack)) |
+ report_fatal_error("Too many functions"); |
+ NumFunctions = size; |
+ } |
+ ~ThreadedFunctionQueue() {} |
+ |
+ // Assign functions in a static manner between threads. |
+ bool GrabFunctionStatic(int FuncIndex, unsigned ThreadIndex) const { |
+ // Note: This assumes NumThreads == SplitModuleCount, so that |
+ // (a) every function of every module is covered by the NumThreads and |
+ // (b) no function is covered twice by the threads. |
+ assert(ThreadIndex < NumThreads); |
+ return FuncIndex % NumThreads == ThreadIndex; |
+ } |
+ |
+ // Assign functions between threads dynamically. |
+ // Returns true if FuncIndex is unassigned and the calling thread |
+ // is assigned functions [FuncIndex, FuncIndex + ChunkSize). |
+ // Returns false if the calling thread is not assigned functions |
+ // [FuncIndex, FuncIndex + ChunkSize). |
+ // |
+ // NextIndex will be set to the next unassigned function ID, so that |
+ // the calling thread will know which function ID to attempt to grab |
+ // next. Each thread may have a different value for the ideal ChunkSize |
+ // so it is hard to predict the next available function solely based |
+ // on incrementing by ChunkSize. |
+ bool GrabFunctionDynamic(int FuncIndex, unsigned ChunkSize, |
+ int &NextIndex) { |
+ int Cur = CurrentFunction; |
+ if (FuncIndex < Cur) { |
+ NextIndex = Cur; |
+ return false; |
+ } |
+ NextIndex = Cur + ChunkSize; |
+ int Index; |
+ if (Cur == (Index = __sync_val_compare_and_swap(&CurrentFunction, |
+ Cur, NextIndex))) { |
+ return true; |
+ } |
+ // If this thread did not grab the function, its idea of NextIndex |
+ // may be incorrect since ChunkSize can vary between threads. |
+ // Reset NextIndex in that case. |
+ NextIndex = Index; |
+ return false; |
+ } |
+ |
+ // Returns a recommended ChunkSize for use in calling GrabFunctionDynamic(). |
+ // ChunkSize starts out "large" to reduce synchronization cost. However, |
+ // it cannot be too large, otherwise it will encompass too many bytes |
+ // and defeats streaming translation. Assigning too many functions to |
+ // a single thread also throws off load balancing, so the ChunkSize is |
+ // reduced when the remaining number of functions is low so that |
+ // load balancing can be achieved near the end. |
+ unsigned RecommendedChunkSize() const { |
+ // Since NumFunctions may be less than the actual number of functions |
+ // (see note in ctor), clamp remaining functions to 1. |
+ int ApproxRemainingFuncs = std::max(NumFunctions - CurrentFunction, 1); |
+ int DynamicChunkSize = ApproxRemainingFuncs / (NumThreads * 4); |
+ return std::max(1, std::min(8, DynamicChunkSize)); |
+ } |
+ |
+ private: |
+ const unsigned NumThreads; |
+ int NumFunctions; |
+ volatile int CurrentFunction; |
+ |
+ ThreadedFunctionQueue( |
+ const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; |
+ void operator=(const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION; |
+}; |
+ |
+#endif |