src/wasm/wasm-module.cc - Issue 1961973002: [wasm] Implement parallel compilation.

Unified Diff: src/wasm/wasm-module.cc

Issue 1961973002: [wasm] Implement parallel compilation. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Add tests. Created 4 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/wasm/wasm-module.cc

diff --git a/src/wasm/wasm-module.cc b/src/wasm/wasm-module.cc

index 28fdb105dc834cd7a0caf8f26657fa4c5970692d..cb979d4fddf018854e5ed5df2f83d3649ce7b1bd 100644

--- a/src/wasm/wasm-module.cc

+++ b/src/wasm/wasm-module.cc

@@ -130,11 +130,11 @@ class WasmLinker {

// Create a placeholder code object and encode the corresponding index in

// the {constant_pool_offset} field of the code object.

// TODO(titzer): placeholder code objects are somewhat dangerous.

- Handle<Code> self(nullptr, isolate_);

byte buffer[] = {0, 0, 0, 0, 0, 0, 0, 0}; // fake instructions.

CodeDesc desc = {buffer, 8, 8, 0, 0, nullptr};

Handle<Code> code = isolate_->factory()->NewCode(

- desc, Code::KindField::encode(Code::WASM_FUNCTION), self);

+ desc, Code::KindField::encode(Code::WASM_FUNCTION),

+ Handle<Object>::null());

code->set_constant_pool_offset(index + kPlaceholderMarker);

placeholder_code_[index] = code;

function_code_[index] = code;

@@ -398,6 +398,68 @@ static MaybeHandle<JSFunction> LookupFunction(

return Handle<JSFunction>::cast(function);

}

+// Fetches the compilation unit of a wasm function and executes its parallel

+// phase.

+bool FetchAndExecuteCompilation(

+ Isolate* isolate,

+ std::vector<compiler::WasmCompilationUnit*>* compilation_units,

+ std::queue<compiler::WasmCompilationUnit*>* executed_units,

+ base::Mutex* result_mutex, base::Atomic32* next_unit) {

+ DisallowHeapAllocation no_allocation;

+ DisallowHandleAllocation no_handles;

+ DisallowHandleDereference no_deref;

+ DisallowCodeDependencyChange no_dependency_change;

+ // - 1 because AtomicIntrement returns the value after the atomic increment.

+ size_t index =

+ static_cast<size_t>(base::NoBarrier_AtomicIncrement(next_unit, 1)) - 1;

Michael Lippautz 2016/05/10 07:36:41 nit: Don't know if you require the use of a 32bit

ahaas 2016/05/10 09:45:49 Done, thanks.

+ if (index >= compilation_units->size()) {

+ return false;

+ }

+ compiler::WasmCompilationUnit* unit = compilation_units->at(index);

+ if (unit != nullptr) {

+ compiler::ExecuteCompilation(unit);

+ {

+ base::LockGuard<base::Mutex> guard(result_mutex);

+ executed_units->push(unit);

+ }

+ return true;

+class WasmCompilationTask : public CancelableTask {

+ public:

+ WasmCompilationTask(

+ Isolate* isolate,

+ std::vector<compiler::WasmCompilationUnit*>* compilation_units,

+ std::queue<compiler::WasmCompilationUnit*>* executed_units,

+ base::Semaphore* on_finished, base::Mutex* result_mutex,

+ base::Atomic32* next_unit)

+ : CancelableTask(isolate),

+ isolate_(isolate),

+ compilation_units_(compilation_units),

+ executed_units_(executed_units),

+ on_finished_(on_finished),

+ result_mutex_(result_mutex),

+ next_unit_(next_unit) {}

+ void RunInternal() override {

+ while (FetchAndExecuteCompilation(isolate_, compilation_units_,

+ executed_units_, result_mutex_,

+ next_unit_)) {

+ }

+ on_finished_->Signal();

+ }

+ Isolate* isolate_;

+ std::vector<compiler::WasmCompilationUnit*>* compilation_units_;

+ std::queue<compiler::WasmCompilationUnit*>* executed_units_;

+ base::Semaphore* on_finished_;

+ base::Mutex* result_mutex_;

+ base::Atomic32* next_unit_;

+};

// Instantiates a wasm module as a JSObject.

// * allocates a backing store of {mem_size} bytes.

// * installs a named property "memory" for that buffer if exported

@@ -507,46 +569,109 @@ MaybeHandle<JSObject> WasmModule::Instantiate(Isolate* isolate,

isolate->counters()->wasm_functions_per_module()->AddSample(

static_cast<int>(functions.size()));

+ //-----------------------------------------------------------------------

+ // For parallel compilation:

+ // 1) The main thread allocates a compilation unit for each wasm function

+ // and stores them in the vector compilation_units.

+ // 2) The main thread spawns WasmCompilationTasks which run on the

+ // background threads.

+ // 3.a) The background threads and the main thread pick one compilation unit

+ // at a time and execute the parallel phase of the compilation unit.

+ // After finishing the execution of the parallel phase, the result is

+ // enqueued in executed_units.

+ // 3.b) If executed_units contains a compilation unit, the main thread

+ // dequeues it and finishes the compilation.

+ // 4) After the parallel phase of all compilation units has started, the

+ // main thread waits for all WasmCompilationTasks to finish.

+ // 5) The main thread finishes the compilation.

std::vector<compiler::WasmCompilationUnit*> compilation_units(

functions.size());

std::queue<compiler::WasmCompilationUnit*> executed_units;

std::vector<Handle<Code>> results(functions.size());

- if (FLAG_wasm_parallel_compilation) {

+ if (FLAG_wasm_num_compilation_tasks != 0) {

+ CanonicalHandleScope canonical(isolate);

// Create a placeholder code object for all functions.

// TODO(ahaas): Maybe we could skip this for external functions.

for (uint32_t i = 0; i < functions.size(); i++) {

linker.GetFunctionCode(i);

}

+ // 1) The main thread allocates a compilation unit for each wasm function

+ // and stores them in the vector compilation_units.

for (uint32_t i = FLAG_skip_compiling_wasm_funcs; i < functions.size();

i++) {

if (!functions[i].external) {

compilation_units[i] = compiler::CreateWasmCompilationUnit(

&thrower, isolate, &module_env, &functions[i], i);

+ } else {

+ compilation_units[i] = nullptr;

}

+ // 2) The main thread spawns WasmCompilationTasks which run on the

+ // background threads.

+ const size_t max_num_tasks =

+ Min(static_cast<size_t>(FLAG_wasm_num_compilation_tasks),

+ V8::GetCurrentPlatform()->NumberOfAvailableBackgroundThreads());

+ base::SmartArrayPointer<uint32_t> task_ids(new uint32_t[max_num_tasks]);

+ base::Semaphore pending_tasks(0);

Michael Lippautz 2016/05/10 07:36:41 base:Semaphore and friends do not make sure the in

ahaas 2016/05/10 09:45:49 Done. I allocate the Semaphore on the heap now.

+ base::Mutex result_mutex;

+ base::Atomic32 next_unit = FLAG_skip_compiling_wasm_funcs;

+ for (size_t i = 0; i < max_num_tasks; i++) {

+ WasmCompilationTask* task = new WasmCompilationTask(

+ isolate, &compilation_units, &executed_units, &pending_tasks,

+ &result_mutex, &next_unit);

+ task_ids[i] = task->id();

+ V8::GetCurrentPlatform()->CallOnBackgroundThread(

+ task, v8::Platform::kShortRunningTask);

+ }

index = FLAG_skip_compiling_wasm_funcs;

- while (true) {

- while (!executed_units.empty()) {

- compiler::WasmCompilationUnit* unit = executed_units.front();

- executed_units.pop();

- int i = compiler::GetIndexOfWasmCompilationUnit(unit);

- results[i] = compiler::FinishCompilation(unit);

+ bool done = false;

Michael Lippautz 2016/05/10 07:36:41 nit: Wouldn't it make more sense to clearly separa

ahaas 2016/05/10 09:45:48 I don't see a way to clearly separate these phases

Michael Lippautz 2016/05/10 10:35:51 Alright, then memory consumption is the reason to

+ while (!done) {

+ // 3.a) The background threads and the main thread pick one compilation

+ // unit at a time and execute the parallel phase of the compilation

+ // unit. After finishing the execution of the parallel phase, the

+ // result is enqueued in executed_units.

+ if (!FetchAndExecuteCompilation(isolate, &compilation_units,

+ &executed_units, &result_mutex,

+ &next_unit)) {

+ // 4) After the parallel phase of all compilation units has started,

+ // the main thread waits for all WasmCompilationTasks to finish.

+ for (size_t i = 0; i < max_num_tasks; i++) {

+ if (!isolate->cancelable_task_manager()->TryAbort(task_ids[i])) {

+ pending_tasks.Wait();

+ }

+ done = true;

}

- if (index < functions.size()) {

- if (!functions[index].external) {

- compiler::ExecuteCompilation(compilation_units[index]);

- executed_units.push(compilation_units[index]);

- index++;

+ // 3.b) If executed_units contains a compilation unit, the main thread

+ // dequeues it and finishes the compilation.

+ while (!executed_units.empty()) {

+ compiler::WasmCompilationUnit* unit = nullptr;

+ {

+ base::LockGuard<base::Mutex> guard(&result_mutex);

+ if (!executed_units.empty()) {

+ unit = executed_units.front();

+ executed_units.pop();

+ }

+ if (unit != nullptr) {

+ int j = compiler::GetIndexOfWasmCompilationUnit(unit);

+ if (!functions[j].external) {

+ results[j] = compiler::FinishCompilation(unit);

+ }

}

- } else {

- break;

}

+ // 5) The main thread finishes the compilation.

// First pass: compile each function and initialize the code table.

for (uint32_t i = FLAG_skip_compiling_wasm_funcs; i < functions.size();

i++) {

@@ -568,10 +693,11 @@ MaybeHandle<JSObject> WasmModule::Instantiate(Isolate* isolate,

function.ToHandleChecked(),

func.sig, str, str_null);

} else {

- if (FLAG_wasm_parallel_compilation) {

+ if (FLAG_wasm_num_compilation_tasks != 0) {

code = results[i];

} else {

// Compile the function.

+ CanonicalHandleScope canonical(isolate);

code = compiler::CompileWasmFunction(&thrower, isolate, &module_env,

&func);

}

« src/compiler/wasm-compiler.cc ('K') | « src/flag-definitions.h ('k') | test/mjsunit/wasm/calls.js » ('j') | no next file with comments »