tools/pnacl-llc/ThreadedFunctionQueue.h - Issue 196793026: Add self-scheduling to threaded translation (vs static)

Unified Diff: tools/pnacl-llc/ThreadedFunctionQueue.h

Issue 196793026: Add self-scheduling to threaded translation (vs static) (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master

Patch Set: reinstate check Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/pnacl-llc/ThreadedFunctionQueue.h

diff --git a/tools/pnacl-llc/ThreadedFunctionQueue.h b/tools/pnacl-llc/ThreadedFunctionQueue.h

new file mode 100644

index 0000000000000000000000000000000000000000..c199b9cf6b6517c2143c6d409363fd07a57ef620

--- /dev/null

+++ b/tools/pnacl-llc/ThreadedFunctionQueue.h

@@ -0,0 +1,109 @@

+//===- ThreadedFunctionQueue.h - Function work units for threads -*- C++ -*-==//

+//

+// The LLVM Compiler Infrastructure

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+#ifndef THREADEDFUNCTIONQUEUE_H

+#define THREADEDFUNCTIONQUEUE_H

+#include <limits>

+#include "llvm/IR/Module.h"

+#include "llvm/Support/ErrorHandling.h"

+// A "queue" that keeps track of which functions have been assigned to

+// threads and which functions have not yet been assigned. It does not

+// actually use a queue data structure and instead uses a number which

+// tracks the minimum unassigned function ID, expecting each thread

+// to have the same view of function IDs.

+class ThreadedFunctionQueue {

+ public:

+ ThreadedFunctionQueue(Module *mod, unsigned NumThreads)

+ : NumThreads(NumThreads),

+ NumFunctions(mod->getFunctionList().size()),

+ CurrentFunction(0) {

+ assert(NumThreads > 0);

+ size_t size = mod->getFunctionList().size();

+ // NOTE: NumFunctions is only the approximate number of functions.

+ // Some additional declarations will be added by other passes:

+ // AddPNaClExternalDeclsPass (for setjmp/longjmp and nacl variants),

+ // and the NaCl Atomics pass adds declarations of NaCl atomic intrinsics.

+ // So make the unsigned -> signed range check a little more conservative,

+ // because CurrentFunction may be incremented beyond NumFunctions.

+ const int kSlack = 2048;

+ if (size > static_cast<size_t>(std::numeric_limits<int>::max() - kSlack))

+ report_fatal_error("Too many functions");

+ NumFunctions = size;

+ }

+ ~ThreadedFunctionQueue() {}

+ // Assign functions in a static manner between threads.

+ bool GrabFunctionStatic(int FuncIndex, unsigned ThreadIndex) const {

+ // Note: This assumes NumThreads == SplitModuleCount, so that

+ // (a) every function of every module is covered by the NumThreads and

+ // (b) no function is covered twice by the threads.

+ assert(ThreadIndex < NumThreads);

+ return FuncIndex % NumThreads == ThreadIndex;

+ }

+ // Assign functions between threads dynamically.

+ // Returns true if FuncIndex is unassigned and the calling thread

+ // is assigned functions [FuncIndex, FuncIndex + ChunkSize).

+ // Returns false if the calling thread is not assigned functions

+ // [FuncIndex, FuncIndex + ChunkSize).

+ //

+ // NextIndex will be set to the next unassigned function ID, so that

+ // the calling thread will know which function ID to attempt to grab

+ // next. Each thread may have a different value for the ideal ChunkSize

+ // so it is hard to predict the next available function solely based

+ // on incrementing by ChunkSize.

+ bool GrabFunctionDynamic(int FuncIndex, unsigned ChunkSize,

+ int &NextIndex) {

+ int Cur = CurrentFunction;

+ if (FuncIndex < Cur) {

+ NextIndex = Cur;

+ return false;

+ }

+ NextIndex = Cur + ChunkSize;

+ int Index;

+ if (Cur == (Index = __sync_val_compare_and_swap(&CurrentFunction,

+ Cur, NextIndex))) {

+ return true;

+ }

+ // If this thread did not grab the function, its idea of NextIndex

+ // may be incorrect since ChunkSize can vary between threads.

+ // Reset NextIndex in that case.

+ NextIndex = Index;

+ return false;

+ }

+ // Returns a recommended ChunkSize for use in calling GrabFunctionDynamic().

+ // ChunkSize starts out "large" to reduce synchronization cost. However,

+ // it cannot be too large, otherwise it will encompass too many bytes

+ // and defeats streaming translation. Assigning too many functions to

+ // a single thread also throws off load balancing, so the ChunkSize is

+ // reduced when the remaining number of functions is low so that

+ // load balancing can be achieved near the end.

+ unsigned RecommendedChunkSize() const {

+ // Since NumFunctions may be less than the actual number of functions

+ // (see note in ctor), clamp remaining functions to 1.

+ int ApproxRemainingFuncs = std::max(NumFunctions - CurrentFunction, 1);

+ int DynamicChunkSize = ApproxRemainingFuncs / (NumThreads * 4);

+ return std::max(1, std::min(8, DynamicChunkSize));

+ }

+ private:

+ const unsigned NumThreads;

+ int NumFunctions;

+ volatile int CurrentFunction;

+ ThreadedFunctionQueue(

+ const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION;

+ void operator=(const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION;

+};

+#endif

« no previous file with comments | « no previous file | tools/pnacl-llc/pnacl-llc.cpp » ('j') | no next file with comments »