Chromium Code Reviews| Index: tools/pnacl-llc/pnacl-llc.cpp |
| diff --git a/tools/pnacl-llc/pnacl-llc.cpp b/tools/pnacl-llc/pnacl-llc.cpp |
| index 39f0ce314810038c5db18a2ae7f8375b7b0aa02e..54b7d4983f5b20aeb56809a8f927c71b65e52156 100644 |
| --- a/tools/pnacl-llc/pnacl-llc.cpp |
| +++ b/tools/pnacl-llc/pnacl-llc.cpp |
| @@ -45,11 +45,11 @@ |
| #include "llvm/Target/TargetLibraryInfo.h" |
| #include "llvm/Target/TargetMachine.h" |
| #include "llvm/Transforms/NaCl.h" |
| +#include "ThreadedFunctionQueue.h" |
| #include "ThreadedStreamingCache.h" |
| #include <pthread.h> |
| #include <memory> |
| - |
| using namespace llvm; |
| // NOTE: When __native_client__ is defined it means pnacl-llc is built as a |
| @@ -131,6 +131,23 @@ cl::opt<unsigned> |
| SplitModuleCount("split-module", |
| cl::desc("Split PNaCl module"), cl::init(1U)); |
| +enum SplitModuleSchedulerKind { |
| + SplitModuleDynamic, |
| + SplitModuleStatic |
| +}; |
| + |
| +cl::opt<SplitModuleSchedulerKind> |
| +SplitModuleSched( |
| + "split-module-sched", |
| + cl::desc("Choose thread scheduler for split module compilation."), |
| + cl::values( |
| + clEnumValN(SplitModuleDynamic, "dynamic", |
| + "Dynamic thread scheduling (default)"), |
| + clEnumValN(SplitModuleStatic, "static", |
| + "Static thread scheduling"), |
| + clEnumValEnd), |
| + cl::init(SplitModuleDynamic)); |
| + |
| /// Compile the module provided to pnacl-llc. The file name for reading the |
| /// module and other options are taken from globals populated by command-line |
| /// option parsing. |
| @@ -335,6 +352,7 @@ static Module* getModule(StringRef ProgramName, LLVMContext &Context, |
| static int runCompilePasses(Module *mod, |
| unsigned ModuleIndex, |
| + ThreadedFunctionQueue *FuncQueue, |
| const Triple &TheTriple, |
| TargetMachine &Target, |
| StringRef ProgramName, |
| @@ -432,11 +450,36 @@ static int runCompilePasses(Module *mod, |
| FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get()); |
| P->doInitialization(); |
| unsigned FuncIndex = 0; |
| - for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) { |
| - if (FuncIndex++ % SplitModuleCount == ModuleIndex) { |
| - P->run(*I); |
| - CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName()); |
| - I->Dematerialize(); |
| + if (SplitModuleSched == SplitModuleStatic) { |
|
JF
2014/03/19 04:47:05
It would be nicer if this were a switch on SplitMo
jvoung (off chromium)
2014/03/19 18:44:37
Done.
|
| + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) { |
| + if (FuncQueue->GrabFunctionStatic(FuncIndex, ModuleIndex)) { |
| + P->run(*I); |
| + CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName()); |
| + I->Dematerialize(); |
| + } |
| + ++FuncIndex; |
| + } |
| + } else { |
| + unsigned ChunkSize = 0; |
| + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ) { |
| + ChunkSize = FuncQueue->RecommendedChunkSize(); |
| + unsigned NextIndex; |
| + bool grabbed = FuncQueue->GrabFunctionDynamic(FuncIndex, ChunkSize, |
| + NextIndex); |
| + if (grabbed) { |
| + while (FuncIndex < NextIndex && I != E) { |
|
JF
2014/03/19 04:47:05
Can it happen that I == E? Shouldn't we have faile
jvoung (off chromium)
2014/03/19 18:44:37
It happened back when I was experimenting with Chu
jvoung (off chromium)
2014/03/20 15:50:31
Looks like not all threads agree on how many funct
|
| + P->run(*I); |
| + CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName()); |
| + I->Dematerialize(); |
| + ++FuncIndex; |
| + ++I; |
| + } |
| + } else { |
| + while (FuncIndex < NextIndex && I != E) { |
| + ++FuncIndex; |
| + ++I; |
| + } |
| + } |
| } |
| } |
| P->doFinalization(); |
| @@ -455,7 +498,8 @@ static int compileSplitModule(const TargetOptions &Options, |
| const StringRef &ProgramName, |
| Module *GlobalModule, |
| StreamingMemoryObject *StreamingObject, |
| - unsigned ModuleIndex) { |
| + unsigned ModuleIndex, |
| + ThreadedFunctionQueue *FuncQueue) { |
| std::auto_ptr<TargetMachine> |
| target(TheTarget->createTargetMachine(TheTriple.getTriple(), |
| MCPU, FeaturesStr, Options, |
| @@ -495,8 +539,8 @@ static int compileSplitModule(const TargetOptions &Options, |
| if (ModuleIndex > 0) |
| OutFileName << ".module" << ModuleIndex; |
| OwningPtr<tool_output_file> Out |
| - (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), |
| - OutFileName.str())); |
| + (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), |
| + OutFileName.str())); |
| if (!Out) return 1; |
| formatted_raw_ostream FOS(Out->os()); |
| #else |
| @@ -504,7 +548,8 @@ static int compileSplitModule(const TargetOptions &Options, |
| ROS.SetBufferSize(1 << 20); |
| formatted_raw_ostream FOS(ROS); |
| #endif |
| - int ret = runCompilePasses(mod, ModuleIndex, TheTriple, Target, ProgramName, |
| + int ret = runCompilePasses(mod, ModuleIndex, FuncQueue, |
| + TheTriple, Target, ProgramName, |
| FOS); |
| if (ret) |
| return ret; |
| @@ -529,6 +574,7 @@ struct ThreadData { |
| Module *GlobalModule; |
| StreamingMemoryObject *StreamingObject; |
| unsigned ModuleIndex; |
| + ThreadedFunctionQueue *FuncQueue; |
| }; |
| @@ -542,7 +588,8 @@ static void *runCompileThread(void *arg) { |
| Data->ProgramName, |
| Data->GlobalModule, |
| Data->StreamingObject, |
| - Data->ModuleIndex); |
| + Data->ModuleIndex, |
| + Data->FuncQueue); |
| return reinterpret_cast<void *>(static_cast<intptr_t>(ret)); |
| } |
| @@ -652,10 +699,14 @@ static int compileModule(StringRef ProgramName) { |
| SmallVector<pthread_t, 4> Pthreads(SplitModuleCount); |
| SmallVector<ThreadData, 4> ThreadDatas(SplitModuleCount); |
| + ThreadedFunctionQueue FuncQueue(mod.get(), SplitModuleCount); |
| if (SplitModuleCount == 1) { |
| + // No need for dynamic scheduling with one thread. |
| + SplitModuleSched = SplitModuleStatic; |
| return compileSplitModule(Options, TheTriple, TheTarget, FeaturesStr, |
| - OLvl, ProgramName, mod.get(), NULL, 0); |
| + OLvl, ProgramName, mod.get(), NULL, 0, |
| + &FuncQueue); |
| } |
| for(unsigned ModuleIndex = 0; ModuleIndex < SplitModuleCount; ++ModuleIndex) { |
| @@ -668,6 +719,7 @@ static int compileModule(StringRef ProgramName) { |
| ThreadDatas[ModuleIndex].GlobalModule = mod.get(); |
| ThreadDatas[ModuleIndex].StreamingObject = StreamingObject.get(); |
| ThreadDatas[ModuleIndex].ModuleIndex = ModuleIndex; |
| + ThreadDatas[ModuleIndex].FuncQueue = &FuncQueue; |
| if (pthread_create(&Pthreads[ModuleIndex], NULL, runCompileThread, |
| &ThreadDatas[ModuleIndex])) { |
| report_fatal_error("Failed to create thread"); |