Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1570)

Unified Diff: tools/pnacl-llc/pnacl-llc.cpp

Issue 196793026: Add self-scheduling to threaded translation (vs static) (Closed) Base URL: http://git.chromium.org/native_client/pnacl-llvm.git@master
Patch Set: reinstate check Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/pnacl-llc/ThreadedFunctionQueue.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/pnacl-llc/pnacl-llc.cpp
diff --git a/tools/pnacl-llc/pnacl-llc.cpp b/tools/pnacl-llc/pnacl-llc.cpp
index 39f0ce314810038c5db18a2ae7f8375b7b0aa02e..ff15c55c8807f5e14c967421e087522adce434d3 100644
--- a/tools/pnacl-llc/pnacl-llc.cpp
+++ b/tools/pnacl-llc/pnacl-llc.cpp
@@ -45,11 +45,11 @@
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/NaCl.h"
+#include "ThreadedFunctionQueue.h"
#include "ThreadedStreamingCache.h"
#include <pthread.h>
#include <memory>
-
using namespace llvm;
// NOTE: When __native_client__ is defined it means pnacl-llc is built as a
@@ -131,6 +131,23 @@ cl::opt<unsigned>
SplitModuleCount("split-module",
cl::desc("Split PNaCl module"), cl::init(1U));
+enum SplitModuleSchedulerKind {
+ SplitModuleDynamic,
+ SplitModuleStatic
+};
+
+cl::opt<SplitModuleSchedulerKind>
+SplitModuleSched(
+ "split-module-sched",
+ cl::desc("Choose thread scheduler for split module compilation."),
+ cl::values(
+ clEnumValN(SplitModuleDynamic, "dynamic",
+ "Dynamic thread scheduling (default)"),
+ clEnumValN(SplitModuleStatic, "static",
+ "Static thread scheduling"),
+ clEnumValEnd),
+ cl::init(SplitModuleDynamic));
+
/// Compile the module provided to pnacl-llc. The file name for reading the
/// module and other options are taken from globals populated by command-line
/// option parsing.
@@ -335,6 +352,7 @@ static Module* getModule(StringRef ProgramName, LLVMContext &Context,
static int runCompilePasses(Module *mod,
unsigned ModuleIndex,
+ ThreadedFunctionQueue *FuncQueue,
const Triple &TheTriple,
TargetMachine &Target,
StringRef ProgramName,
@@ -431,13 +449,49 @@ static int runCompilePasses(Module *mod,
if (LazyBitcode) {
FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get());
P->doInitialization();
- unsigned FuncIndex = 0;
- for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) {
- if (FuncIndex++ % SplitModuleCount == ModuleIndex) {
- P->run(*I);
- CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
- I->Dematerialize();
+ int FuncIndex = 0;
+ switch (SplitModuleSched) {
+ case SplitModuleStatic:
+ for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) {
+ if (FuncQueue->GrabFunctionStatic(FuncIndex, ModuleIndex)) {
+ P->run(*I);
+ CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
+ I->Dematerialize();
+ }
+ ++FuncIndex;
+ }
+ break;
+ case SplitModuleDynamic:
+ unsigned ChunkSize = 0;
+ for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ) {
+ ChunkSize = FuncQueue->RecommendedChunkSize();
+ int NextIndex;
+ bool grabbed = FuncQueue->GrabFunctionDynamic(FuncIndex, ChunkSize,
+ NextIndex);
+ if (grabbed) {
+ while (FuncIndex < NextIndex && I != E) {
+ P->run(*I);
+ CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
+ I->Dematerialize();
+ ++FuncIndex;
+ ++I;
+ }
+ } else {
+ // Currently the ResolvePNaClIntrinsics function pass may
+ // add more declarations as we iterate. Some threads may get
+ // "lucky" and not add the related declarations, so those
+ // threads would have an earlier endpoint than other
+ // threads. the final NextIndex established by another
+ // thread would then be out of the bounds of the current thread.
+ // TODO(jvoung): Ensure that all declarations are added up front
+ // and uniformly so that we don't need this I != E check.
+ while (FuncIndex < NextIndex && I != E) {
+ ++FuncIndex;
+ ++I;
+ }
+ }
}
+ break;
}
P->doFinalization();
} else {
@@ -455,7 +509,8 @@ static int compileSplitModule(const TargetOptions &Options,
const StringRef &ProgramName,
Module *GlobalModule,
StreamingMemoryObject *StreamingObject,
- unsigned ModuleIndex) {
+ unsigned ModuleIndex,
+ ThreadedFunctionQueue *FuncQueue) {
std::auto_ptr<TargetMachine>
target(TheTarget->createTargetMachine(TheTriple.getTriple(),
MCPU, FeaturesStr, Options,
@@ -495,8 +550,8 @@ static int compileSplitModule(const TargetOptions &Options,
if (ModuleIndex > 0)
OutFileName << ".module" << ModuleIndex;
OwningPtr<tool_output_file> Out
- (GetOutputStream(TheTarget->getName(), TheTriple.getOS(),
- OutFileName.str()));
+ (GetOutputStream(TheTarget->getName(), TheTriple.getOS(),
+ OutFileName.str()));
if (!Out) return 1;
formatted_raw_ostream FOS(Out->os());
#else
@@ -504,7 +559,8 @@ static int compileSplitModule(const TargetOptions &Options,
ROS.SetBufferSize(1 << 20);
formatted_raw_ostream FOS(ROS);
#endif
- int ret = runCompilePasses(mod, ModuleIndex, TheTriple, Target, ProgramName,
+ int ret = runCompilePasses(mod, ModuleIndex, FuncQueue,
+ TheTriple, Target, ProgramName,
FOS);
if (ret)
return ret;
@@ -529,6 +585,7 @@ struct ThreadData {
Module *GlobalModule;
StreamingMemoryObject *StreamingObject;
unsigned ModuleIndex;
+ ThreadedFunctionQueue *FuncQueue;
};
@@ -542,7 +599,8 @@ static void *runCompileThread(void *arg) {
Data->ProgramName,
Data->GlobalModule,
Data->StreamingObject,
- Data->ModuleIndex);
+ Data->ModuleIndex,
+ Data->FuncQueue);
return reinterpret_cast<void *>(static_cast<intptr_t>(ret));
}
@@ -652,10 +710,14 @@ static int compileModule(StringRef ProgramName) {
SmallVector<pthread_t, 4> Pthreads(SplitModuleCount);
SmallVector<ThreadData, 4> ThreadDatas(SplitModuleCount);
+ ThreadedFunctionQueue FuncQueue(mod.get(), SplitModuleCount);
if (SplitModuleCount == 1) {
+ // No need for dynamic scheduling with one thread.
+ SplitModuleSched = SplitModuleStatic;
return compileSplitModule(Options, TheTriple, TheTarget, FeaturesStr,
- OLvl, ProgramName, mod.get(), NULL, 0);
+ OLvl, ProgramName, mod.get(), NULL, 0,
+ &FuncQueue);
}
for(unsigned ModuleIndex = 0; ModuleIndex < SplitModuleCount; ++ModuleIndex) {
@@ -668,6 +730,7 @@ static int compileModule(StringRef ProgramName) {
ThreadDatas[ModuleIndex].GlobalModule = mod.get();
ThreadDatas[ModuleIndex].StreamingObject = StreamingObject.get();
ThreadDatas[ModuleIndex].ModuleIndex = ModuleIndex;
+ ThreadDatas[ModuleIndex].FuncQueue = &FuncQueue;
if (pthread_create(&Pthreads[ModuleIndex], NULL, runCompileThread,
&ThreadDatas[ModuleIndex])) {
report_fatal_error("Failed to create thread");
« no previous file with comments | « tools/pnacl-llc/ThreadedFunctionQueue.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698