tools/pnacl-llc/ThreadedFunctionQueue.h - Issue 196793026: Add self-scheduling to threaded translation (vs static)

Side by Side Diff: tools/pnacl-llc/ThreadedFunctionQueue.h

Issue 196793026: Add self-scheduling to threaded translation (vs static) (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master

Patch Set: reinstate check Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 //===- ThreadedFunctionQueue.h - Function work units for threads -- C++ --==//

	2 //

	3 // The LLVM Compiler Infrastructure

	4 //

	5 // This file is distributed under the University of Illinois Open Source

	6 // License. See LICENSE.TXT for details.

	7 //

	8 //===----------------------------------------------------------------------===//

	9

	10 #ifndef THREADEDFUNCTIONQUEUE_H

	11 #define THREADEDFUNCTIONQUEUE_H

	12

	13 #include <limits>

	14

	15 #include "llvm/IR/Module.h"

	16 #include "llvm/Support/ErrorHandling.h"

	17

	18 // A "queue" that keeps track of which functions have been assigned to

	19 // threads and which functions have not yet been assigned. It does not

	20 // actually use a queue data structure and instead uses a number which

	21 // tracks the minimum unassigned function ID, expecting each thread

	22 // to have the same view of function IDs.

	23 class ThreadedFunctionQueue {

	24 public:

	25 ThreadedFunctionQueue(Module *mod, unsigned NumThreads)

	26 : NumThreads(NumThreads),

	27 NumFunctions(mod->getFunctionList().size()),

	28 CurrentFunction(0) {

	29 assert(NumThreads > 0);

	30 size_t size = mod->getFunctionList().size();

	31 // NOTE: NumFunctions is only the approximate number of functions.

	32 // Some additional declarations will be added by other passes:

	33 // AddPNaClExternalDeclsPass (for setjmp/longjmp and nacl variants),

	34 // and the NaCl Atomics pass adds declarations of NaCl atomic intrinsics.

	35 // So make the unsigned -> signed range check a little more conservative,

	36 // because CurrentFunction may be incremented beyond NumFunctions.

	37 const int kSlack = 2048;

	38 if (size > static_cast<size_t>(std::numeric_limits<int>::max() - kSlack))

	39 report_fatal_error("Too many functions");

	40 NumFunctions = size;

	41 }

	42 ~ThreadedFunctionQueue() {}

	43

	44 // Assign functions in a static manner between threads.

	45 bool GrabFunctionStatic(int FuncIndex, unsigned ThreadIndex) const {

	46 // Note: This assumes NumThreads == SplitModuleCount, so that

	47 // (a) every function of every module is covered by the NumThreads and

	48 // (b) no function is covered twice by the threads.

	49 assert(ThreadIndex < NumThreads);

	50 return FuncIndex % NumThreads == ThreadIndex;

	51 }

	52

	53 // Assign functions between threads dynamically.

	54 // Returns true if FuncIndex is unassigned and the calling thread

	55 // is assigned functions [FuncIndex, FuncIndex + ChunkSize).

	56 // Returns false if the calling thread is not assigned functions

	57 // [FuncIndex, FuncIndex + ChunkSize).

	58 //

	59 // NextIndex will be set to the next unassigned function ID, so that

	60 // the calling thread will know which function ID to attempt to grab

	61 // next. Each thread may have a different value for the ideal ChunkSize

	62 // so it is hard to predict the next available function solely based

	63 // on incrementing by ChunkSize.

	64 bool GrabFunctionDynamic(int FuncIndex, unsigned ChunkSize,

	65 int &NextIndex) {

	66 int Cur = CurrentFunction;

	67 if (FuncIndex < Cur) {

	68 NextIndex = Cur;

	69 return false;

	70 }

	71 NextIndex = Cur + ChunkSize;

	72 int Index;

	73 if (Cur == (Index = __sync_val_compare_and_swap(&CurrentFunction,

	74 Cur, NextIndex))) {

	75 return true;

	76 }

	77 // If this thread did not grab the function, its idea of NextIndex

	78 // may be incorrect since ChunkSize can vary between threads.

	79 // Reset NextIndex in that case.

	80 NextIndex = Index;

	81 return false;

	82 }

	83

	84 // Returns a recommended ChunkSize for use in calling GrabFunctionDynamic().

	85 // ChunkSize starts out "large" to reduce synchronization cost. However,

	86 // it cannot be too large, otherwise it will encompass too many bytes

	87 // and defeats streaming translation. Assigning too many functions to

	88 // a single thread also throws off load balancing, so the ChunkSize is

	89 // reduced when the remaining number of functions is low so that

	90 // load balancing can be achieved near the end.

	91 unsigned RecommendedChunkSize() const {

	92 // Since NumFunctions may be less than the actual number of functions

	93 // (see note in ctor), clamp remaining functions to 1.

	94 int ApproxRemainingFuncs = std::max(NumFunctions - CurrentFunction, 1);

	95 int DynamicChunkSize = ApproxRemainingFuncs / (NumThreads * 4);

	96 return std::max(1, std::min(8, DynamicChunkSize));

	97 }

	98

	99 private:

	100 const unsigned NumThreads;

	101 int NumFunctions;

	102 volatile int CurrentFunction;

	103

	104 ThreadedFunctionQueue(

	105 const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION;

	106 void operator=(const ThreadedFunctionQueue&) LLVM_DELETED_FUNCTION;

	107 };

	108

	109 #endif

OLD	NEW

« no previous file with comments | « no previous file | tools/pnacl-llc/pnacl-llc.cpp » ('j') | no next file with comments »