| Index: tools/pnacl-llc/pnacl-llc.cpp
|
| diff --git a/tools/pnacl-llc/pnacl-llc.cpp b/tools/pnacl-llc/pnacl-llc.cpp
|
| index 39f0ce314810038c5db18a2ae7f8375b7b0aa02e..ff15c55c8807f5e14c967421e087522adce434d3 100644
|
| --- a/tools/pnacl-llc/pnacl-llc.cpp
|
| +++ b/tools/pnacl-llc/pnacl-llc.cpp
|
| @@ -45,11 +45,11 @@
|
| #include "llvm/Target/TargetLibraryInfo.h"
|
| #include "llvm/Target/TargetMachine.h"
|
| #include "llvm/Transforms/NaCl.h"
|
| +#include "ThreadedFunctionQueue.h"
|
| #include "ThreadedStreamingCache.h"
|
| #include <pthread.h>
|
| #include <memory>
|
|
|
| -
|
| using namespace llvm;
|
|
|
| // NOTE: When __native_client__ is defined it means pnacl-llc is built as a
|
| @@ -131,6 +131,23 @@ cl::opt<unsigned>
|
| SplitModuleCount("split-module",
|
| cl::desc("Split PNaCl module"), cl::init(1U));
|
|
|
| +enum SplitModuleSchedulerKind {
|
| + SplitModuleDynamic,
|
| + SplitModuleStatic
|
| +};
|
| +
|
| +cl::opt<SplitModuleSchedulerKind>
|
| +SplitModuleSched(
|
| + "split-module-sched",
|
| + cl::desc("Choose thread scheduler for split module compilation."),
|
| + cl::values(
|
| + clEnumValN(SplitModuleDynamic, "dynamic",
|
| + "Dynamic thread scheduling (default)"),
|
| + clEnumValN(SplitModuleStatic, "static",
|
| + "Static thread scheduling"),
|
| + clEnumValEnd),
|
| + cl::init(SplitModuleDynamic));
|
| +
|
| /// Compile the module provided to pnacl-llc. The file name for reading the
|
| /// module and other options are taken from globals populated by command-line
|
| /// option parsing.
|
| @@ -335,6 +352,7 @@ static Module* getModule(StringRef ProgramName, LLVMContext &Context,
|
|
|
| static int runCompilePasses(Module *mod,
|
| unsigned ModuleIndex,
|
| + ThreadedFunctionQueue *FuncQueue,
|
| const Triple &TheTriple,
|
| TargetMachine &Target,
|
| StringRef ProgramName,
|
| @@ -431,13 +449,49 @@ static int runCompilePasses(Module *mod,
|
| if (LazyBitcode) {
|
| FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get());
|
| P->doInitialization();
|
| - unsigned FuncIndex = 0;
|
| - for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) {
|
| - if (FuncIndex++ % SplitModuleCount == ModuleIndex) {
|
| - P->run(*I);
|
| - CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
|
| - I->Dematerialize();
|
| + int FuncIndex = 0;
|
| + switch (SplitModuleSched) {
|
| + case SplitModuleStatic:
|
| + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) {
|
| + if (FuncQueue->GrabFunctionStatic(FuncIndex, ModuleIndex)) {
|
| + P->run(*I);
|
| + CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
|
| + I->Dematerialize();
|
| + }
|
| + ++FuncIndex;
|
| + }
|
| + break;
|
| + case SplitModuleDynamic:
|
| + unsigned ChunkSize = 0;
|
| + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ) {
|
| + ChunkSize = FuncQueue->RecommendedChunkSize();
|
| + int NextIndex;
|
| + bool grabbed = FuncQueue->GrabFunctionDynamic(FuncIndex, ChunkSize,
|
| + NextIndex);
|
| + if (grabbed) {
|
| + while (FuncIndex < NextIndex && I != E) {
|
| + P->run(*I);
|
| + CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
|
| + I->Dematerialize();
|
| + ++FuncIndex;
|
| + ++I;
|
| + }
|
| + } else {
|
| + // Currently the ResolvePNaClIntrinsics function pass may
|
| + // add more declarations as we iterate. Some threads may get
|
| + // "lucky" and not add the related declarations, so those
|
| + // threads would have an earlier endpoint than other
|
| + // threads. the final NextIndex established by another
|
| + // thread would then be out of the bounds of the current thread.
|
| + // TODO(jvoung): Ensure that all declarations are added up front
|
| + // and uniformly so that we don't need this I != E check.
|
| + while (FuncIndex < NextIndex && I != E) {
|
| + ++FuncIndex;
|
| + ++I;
|
| + }
|
| + }
|
| }
|
| + break;
|
| }
|
| P->doFinalization();
|
| } else {
|
| @@ -455,7 +509,8 @@ static int compileSplitModule(const TargetOptions &Options,
|
| const StringRef &ProgramName,
|
| Module *GlobalModule,
|
| StreamingMemoryObject *StreamingObject,
|
| - unsigned ModuleIndex) {
|
| + unsigned ModuleIndex,
|
| + ThreadedFunctionQueue *FuncQueue) {
|
| std::auto_ptr<TargetMachine>
|
| target(TheTarget->createTargetMachine(TheTriple.getTriple(),
|
| MCPU, FeaturesStr, Options,
|
| @@ -495,8 +550,8 @@ static int compileSplitModule(const TargetOptions &Options,
|
| if (ModuleIndex > 0)
|
| OutFileName << ".module" << ModuleIndex;
|
| OwningPtr<tool_output_file> Out
|
| - (GetOutputStream(TheTarget->getName(), TheTriple.getOS(),
|
| - OutFileName.str()));
|
| + (GetOutputStream(TheTarget->getName(), TheTriple.getOS(),
|
| + OutFileName.str()));
|
| if (!Out) return 1;
|
| formatted_raw_ostream FOS(Out->os());
|
| #else
|
| @@ -504,7 +559,8 @@ static int compileSplitModule(const TargetOptions &Options,
|
| ROS.SetBufferSize(1 << 20);
|
| formatted_raw_ostream FOS(ROS);
|
| #endif
|
| - int ret = runCompilePasses(mod, ModuleIndex, TheTriple, Target, ProgramName,
|
| + int ret = runCompilePasses(mod, ModuleIndex, FuncQueue,
|
| + TheTriple, Target, ProgramName,
|
| FOS);
|
| if (ret)
|
| return ret;
|
| @@ -529,6 +585,7 @@ struct ThreadData {
|
| Module *GlobalModule;
|
| StreamingMemoryObject *StreamingObject;
|
| unsigned ModuleIndex;
|
| + ThreadedFunctionQueue *FuncQueue;
|
| };
|
|
|
|
|
| @@ -542,7 +599,8 @@ static void *runCompileThread(void *arg) {
|
| Data->ProgramName,
|
| Data->GlobalModule,
|
| Data->StreamingObject,
|
| - Data->ModuleIndex);
|
| + Data->ModuleIndex,
|
| + Data->FuncQueue);
|
| return reinterpret_cast<void *>(static_cast<intptr_t>(ret));
|
| }
|
|
|
| @@ -652,10 +710,14 @@ static int compileModule(StringRef ProgramName) {
|
|
|
| SmallVector<pthread_t, 4> Pthreads(SplitModuleCount);
|
| SmallVector<ThreadData, 4> ThreadDatas(SplitModuleCount);
|
| + ThreadedFunctionQueue FuncQueue(mod.get(), SplitModuleCount);
|
|
|
| if (SplitModuleCount == 1) {
|
| + // No need for dynamic scheduling with one thread.
|
| + SplitModuleSched = SplitModuleStatic;
|
| return compileSplitModule(Options, TheTriple, TheTarget, FeaturesStr,
|
| - OLvl, ProgramName, mod.get(), NULL, 0);
|
| + OLvl, ProgramName, mod.get(), NULL, 0,
|
| + &FuncQueue);
|
| }
|
|
|
| for(unsigned ModuleIndex = 0; ModuleIndex < SplitModuleCount; ++ModuleIndex) {
|
| @@ -668,6 +730,7 @@ static int compileModule(StringRef ProgramName) {
|
| ThreadDatas[ModuleIndex].GlobalModule = mod.get();
|
| ThreadDatas[ModuleIndex].StreamingObject = StreamingObject.get();
|
| ThreadDatas[ModuleIndex].ModuleIndex = ModuleIndex;
|
| + ThreadDatas[ModuleIndex].FuncQueue = &FuncQueue;
|
| if (pthread_create(&Pthreads[ModuleIndex], NULL, runCompileThread,
|
| &ThreadDatas[ModuleIndex])) {
|
| report_fatal_error("Failed to create thread");
|
|
|