chrome/browser/conflicts/module_database_win.cc - Issue 2576843002: [win] Create ModuleDatabase and ModuleEventSinkImpl.

Unified Diff: chrome/browser/conflicts/module_database_win.cc

Issue 2576843002: [win] Create ModuleDatabase and ModuleEventSinkImpl. (Closed)

Patch Set: Rework OnProcessStarted. Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« chrome/browser/conflicts/module_database_win.h ('K') | « chrome/browser/conflicts/module_database_win.h ('k') | chrome/browser/conflicts/module_database_win_unittest.cc » ('j') | chrome/browser/conflicts/module_database_win_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/conflicts/module_database_win.cc

diff --git a/chrome/browser/conflicts/module_database_win.cc b/chrome/browser/conflicts/module_database_win.cc

new file mode 100644

index 0000000000000000000000000000000000000000..98872bd7eb2fafbcff8f048a73c320c00e86a886

--- /dev/null

+++ b/chrome/browser/conflicts/module_database_win.cc

@@ -0,0 +1,331 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/conflicts/module_database_win.h"

+#include "base/bind.h"

+#include "base/debug/leak_annotations.h"

grt (UTC plus 2) 2016/12/20 11:11:33 unused?

chrisha 2016/12/20 19:46:22 Done.

+#include "base/lazy_instance.h"

grt (UTC plus 2) 2016/12/20 11:11:33 unused

chrisha 2016/12/20 19:46:23 Done.

+#include "base/strings/utf_string_conversions.h"

grt (UTC plus 2) 2016/12/20 11:11:33 unused

chrisha 2016/12/20 19:46:23 Done. (I really wish we had automated analysis fo

grt (UTC plus 2) 2016/12/20 21:09:52 Amen to that, brother.

+namespace {

+// Document the assumptions made on the ProcessType enum in order to convert

+// them to bits.

+static_assert(1 == content::PROCESS_TYPE_UNKNOWN,

grt (UTC plus 2) 2016/12/20 11:11:34 reverse these!

chrisha 2016/12/20 19:46:22 Done.

+ "assumes unknown process type has value 1");

+static_assert(2 == content::PROCESS_TYPE_BROWSER,

+ "assumes browser process type has value 2");

+static constexpr uint32_t kMinProcessType = content::PROCESS_TYPE_BROWSER;

grt (UTC plus 2) 2016/12/20 11:11:34 omit "static" in an unnamed namespace

chrisha 2016/12/20 19:46:23 Done.

+} // namespace

+ModuleDatabase::ModuleDatabase(

+ scoped_refptr<base::SequencedTaskRunner> task_runner)

+ : task_runner_(task_runner), weak_ptr_factory_(this) {}

grt (UTC plus 2) 2016/12/20 11:11:33 std::move(task_runner)?

chrisha 2016/12/20 19:46:23 Done.

+ModuleDatabase::~ModuleDatabase() = default;

+void ModuleDatabase::OnProcessStarted(uint32_t process_id,

+ content::ProcessType process_type) {

+ DCHECK(task_runner_->RunsTasksOnCurrentThread());

+ auto* process_info = CreateProcessInfo(process_id, process_type);

+ // If each client is sending messages in the appropriate order then this

+ // should always be true.

+ DCHECK(process_info);

+void ModuleDatabase::OnModuleEvent(uint32_t process_id,

+ const ModuleWatcher::ModuleEvent& event) {

+ // Messages can arrive from any thread (UI thread for calls over IPC, and

+ // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

+ if (!task_runner_->RunsTasksOnCurrentThread()) {

+ task_runner_->PostTask(FROM_HERE, base::Bind(&ModuleDatabase::OnModuleEvent,

+ weak_ptr_factory_.GetWeakPtr(),

+ process_id, event));

+ return;

+ }

+ // In theory this should always succeed. However, it is possible for a client

+ // to misbehave and sent out-of-order messages. It is easy to be tolerant of

grt (UTC plus 2) 2016/12/20 11:11:33 sent -> send

chrisha 2016/12/20 19:46:23 Done.

+ // this by simply not updating the process info in this case. It's not worth

+ // crashing if this data is slightly out of sync as this is purely

+ // informational.

+ auto* process_info = GetProcessInfo(process_id);

+ DCHECK(process_info);

+ if (!process_info)

+ return;

+ auto* module_info = FindOrCreateModuleInfo(event.module_path);

+ // Update the list of process types that this module has been seen in.

+ module_info->process_types |= ProcessTypeToBit(process_info->process_type);

+ uintptr_t load_address =

+ reinterpret_cast<uintptr_t>(event.module_load_address);

grt (UTC plus 2) 2016/12/20 11:11:34 wdyt of making module_load_address a uintptr_t thr

chrisha 2016/12/20 19:46:23 There's one boundary I can't get rid of: the IPC m

grt (UTC plus 2) 2016/12/20 21:09:52 Naah. Thanks for clarifying the reasoning.

+ // Update the module lists for this process.

+ switch (event.event_type) {

+ case mojom::ModuleEventType::MODULE_ALREADY_LOADED:

+ case mojom::ModuleEventType::MODULE_LOADED:

+ InsertLoadAddress(module_info->module_id, load_address,

+ &process_info->loaded_modules);

+ RemoveLoadAddressById(module_info->module_id,

+ &process_info->unloaded_modules);

+ break;

+ case mojom::ModuleEventType::MODULE_UNLOADED:

grt (UTC plus 2) 2016/12/20 11:11:33 i find it a bit confusing that unloads are handled

chrisha 2016/12/20 19:46:23 OnModuleUnload is expensive, costing an O(n) searc

grt (UTC plus 2) 2016/12/20 21:09:52 Can you clarify in comments which is trusted and w

chrisha 2016/12/21 20:14:59 I've reworked the API so there's only one way to l

+ RemoveLoadAddressById(module_info->module_id,

+ &process_info->loaded_modules);

+ InsertLoadAddress(module_info->module_id, load_address,

+ &process_info->unloaded_modules);

+ break;

+ }

+void ModuleDatabase::OnModuleUnload(uint32_t process_id,

+ uintptr_t load_address) {

+ // Messages can arrive from any thread (UI thread for calls over IPC, and

+ // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

+ if (!task_runner_->RunsTasksOnCurrentThread()) {

+ task_runner_->PostTask(

+ FROM_HERE,

+ base::Bind(&ModuleDatabase::OnModuleUnload,

+ weak_ptr_factory_.GetWeakPtr(), process_id, load_address));

+ return;

+ }

+ // See the long-winded comment in OnModuleEvent about reasons why this can

+ // fail (but shouldn't normally).

+ auto* process_info = GetProcessInfo(process_id);

+ DCHECK(process_info);

grt (UTC plus 2) 2016/12/20 11:11:34 remove this one, too?

chrisha 2016/12/20 19:46:23 Oops, should have been deleted. Done.

+ if (!process_info)

+ return;

+ // Find the module corresponding to this load address.

+ int i =

+ FindLoadAddressIndexByAddress(load_address, process_info->loaded_modules);

+ // No such module found. This shouldn't happen either, unless messages are

+ // malformed or out of order. Gracefully fail in this case.

+ if (i == kInvalidIndex)

+ return;

+ ModuleId module_id = process_info->loaded_modules[i].first;

+ // Remove from the loaded module list and insert into the unloaded module

+ // list.

+ RemoveLoadAddressByIndex(i, &process_info->loaded_modules);

+ InsertLoadAddress(module_id, load_address, &process_info->unloaded_modules);

+void ModuleDatabase::OnProcessEnded(uint32_t process_id) {

+ // Messages can arrive from any thread (UI thread for calls over IPC, and

+ // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

+ if (!task_runner_->RunsTasksOnCurrentThread()) {

+ task_runner_->PostTask(

+ FROM_HERE, base::Bind(&ModuleDatabase::OnProcessEnded,

+ weak_ptr_factory_.GetWeakPtr(), process_id));

+ return;

+ }

+ bool deleted = DeleteProcessInfo(process_id);

+ // If each client is sending messages in the appropriate order then this

+ // should always be true.

+ DCHECK(deleted);

+// static

+uint32_t ModuleDatabase::ProcessTypeToBit(content::ProcessType process_type) {

+ uint32_t bit_index = static_cast<uint32_t>(process_type) - kMinProcessType;

+ DCHECK_LE(0u, bit_index);

+ DCHECK_GE(31u, bit_index);

+ uint32_t bit = (1 << bit_index);

+ return bit;

+// static

+content::ProcessType ModuleDatabase::BitIndexToProcessType(uint32_t bit_index) {

+ DCHECK_LE(0u, bit_index);

+ DCHECK_GE(31u, bit_index);

+ return static_cast<content::ProcessType>(bit_index + kMinProcessType);

+// static

+int ModuleDatabase::FindLoadAddressIndexById(

grt (UTC plus 2) 2016/12/20 11:11:33 looks like using size_t for the index would be muc

chrisha 2016/12/20 19:46:23 Indeed it would be. Done.

+ ModuleId module_id,

+ const ModuleLoadAddresses& load_addresses) {

+ for (size_t i = 0; i < load_addresses.size(); ++i) {

+ if (load_addresses[i].first == module_id)

+ return static_cast<int>(i);

+ }

+ return kInvalidIndex;

+// static

+int ModuleDatabase::FindLoadAddressIndexByAddress(

+ uintptr_t load_address,

+ const ModuleLoadAddresses& load_addresses) {

+ for (size_t i = 0; i < load_addresses.size(); ++i) {

+ if (load_addresses[i].second == load_address)

+ return static_cast<int>(i);

+ }

+ return kInvalidIndex;

+// static

+void ModuleDatabase::InsertLoadAddress(ModuleId module_id,

chrisha 2016/12/19 20:15:37 grt: I was bored, so went a little overboard here

+ uintptr_t load_address,

+ ModuleLoadAddresses* load_addresses) {

+ // A very small optimization: the largest module_id is always placed at the

+ // end of the array. This is the most common case, and allows O(1)

+ // determination that a |module_id| isn't present when it's bigger than the

+ // maximum already in the array. This keeps insertions to O(1) in the usual

+ // case.

+ if (load_addresses->size() == 0 || module_id > load_addresses->back().first) {

grt (UTC plus 2) 2016/12/20 11:11:34 load_addresses->size() == 0 -> load_addresses->emp

chrisha 2016/12/20 19:46:23 Done.

+ load_addresses->push_back(std::make_pair(module_id, load_address));

grt (UTC plus 2) 2016/12/20 11:11:34 load_addresses->emplace_back(module_id, load_addre

chrisha 2016/12/20 19:46:23 Holy jebus batman, I've never seen "emplace". TIL.

grt (UTC plus 2) 2016/12/20 21:09:52 emplace was the toy I got on my birthday from the

+ return;

+ }

+ // If the module exists in the collection then update the load address and

+ // return.

+ int i = FindLoadAddressIndexById(module_id, *load_addresses);

+ if (i != kInvalidIndex) {

+ load_addresses->at(i).second = load_address;

grt (UTC plus 2) 2016/12/20 11:11:34 curious: why would a module's load address change?

chrisha 2016/12/20 19:46:23 A vain attempt at maintaining long term consistenc

grt (UTC plus 2) 2016/12/20 21:09:52 If unload/reload could potentially be processed ou

chrisha 2016/12/21 20:14:59 I did some experiments here. 1. LoadLibrary("foo.

+ return;

+ }

+ // The module does not exist, and by definition is smaller in value than

+ // the largest module ID already present. Add it, and ensure the largest

+ // module stays at the end.

+ load_addresses->push_back(load_addresses->back());

grt (UTC plus 2) 2016/12/20 11:11:33 is this the same thing? load_addresses->emplace(

chrisha 2016/12/20 19:46:23 It is indeed.

+ auto it = load_addresses->rbegin();

+ it++;

+ it->first = module_id;

+ it->second = load_address;

+ return;

+// static

+void ModuleDatabase::RemoveLoadAddressById(

+ ModuleId module_id,

+ ModuleLoadAddresses* load_addresses) {

+ if (load_addresses->empty())

+ return;

+ // Special case: removing the maximum index module. Need to find the new

grt (UTC plus 2) 2016/12/20 11:11:34 silly idea: how about changing FindLoadAddressInde

chrisha 2016/12/20 19:46:22 No, not silly at all! Done.

+ // maximum element and ensure it goes to the end.

+ if (load_addresses->size() > 2 && load_addresses->back().first == module_id)

+ return RemoveLoadAddressByIndex(load_addresses->size() - 1, load_addresses);

+ // The element to be removed is not the last one.

+ int i = FindLoadAddressIndexById(module_id, *load_addresses);

+ RemoveLoadAddressByIndex(i, load_addresses);

+// static

+void ModuleDatabase::RemoveLoadAddressByIndex(

+ int index,

+ ModuleLoadAddresses* load_addresses) {

+ DCHECK_LE(0, index);

+ DCHECK_GT(load_addresses->size(), static_cast<size_t>(index));

+ // Special case: removing the last module (with maximum id). Need to find the

+ // new maximum element and ensure it goes to the end.

+ if (load_addresses->size() > 2 &&

+ static_cast<size_t>(index + 1) == load_addresses->size()) {

+ // Find the index of the new maximum element.

+ ModuleId max_id = -1; // These start at zero.

+ int max_index = kInvalidIndex;

+ for (size_t i = 0; i < load_addresses->size() - 1; ++i) {

+ if (load_addresses->at(i).first > max_id) {

+ max_id = load_addresses->at(i).first;

grt (UTC plus 2) 2016/12/20 11:11:34 avoid at() -- it does range checking and throws ex

chrisha 2016/12/20 19:46:23 Another TIL... Done.

+ max_index = i;

+ }

+ // Remove the max element.

+ load_addresses->resize(load_addresses->size() - 1);

+ // If the new max element isn't in the last position, then swap it so it is.

+ int last_index = load_addresses->size() - 1;

+ if (max_index != last_index)

+ std::swap((*load_addresses)[max_index], (*load_addresses)[last_index]);

grt (UTC plus 2) 2016/12/20 11:11:34 #include <algorithm>

chrisha 2016/12/20 19:46:23 Done.

+ return;

+ }

+ // If the element to be removed is second last then a single swap is

grt (UTC plus 2) 2016/12/20 11:11:33 swap -> copy

chrisha 2016/12/20 19:46:23 Done.

+ // sufficient.

+ if (static_cast<size_t>(index + 2) == load_addresses->size()) {

+ (*load_addresses)[index] = (*load_addresses)[index + 1];

+ } else {

+ // In the general case two swaps are necessary.

grt (UTC plus 2) 2016/12/20 11:11:33 swaps -> copies

chrisha 2016/12/20 19:46:23 Done.

+ int max_index = load_addresses->size() - 1;

+ (*load_addresses)[index] = (*load_addresses)[max_index - 1];

+ (*load_addresses)[max_index - 1] = (*load_addresses)[max_index];

+ }

+ // Remove the last element, which is now duplicated.

+ load_addresses->resize(load_addresses->size() - 1);

+ModuleDatabase::ModuleInfo* ModuleDatabase::FindOrCreateModuleInfo(

+ const base::FilePath& module_path) {

+ ModuleInfo key(module_path, modules_.size());

+ auto result = modules_.insert(key);

+ return const_cast<ModuleInfo*>(&(*result.first));

grt (UTC plus 2) 2016/12/20 11:11:34 is this cast really needed? |this| isn't const, an

chrisha 2016/12/20 19:46:23 std::set always returns elements as "const", as th

grt (UTC plus 2) 2016/12/20 21:09:52 Ah. Grn. The critical piece, then, is that the obj

chrisha 2016/12/21 20:14:59 Tried to make this more clear.

+ModuleDatabase::ProcessInfo* ModuleDatabase::GetProcessInfo(

+ uint32_t process_id) {

+ ProcessInfo key(process_id, content::PROCESS_TYPE_UNKNOWN);

+ auto it = processes_.find(key);

+ if (it == processes_.end())

+ return nullptr;

+ return const_cast<ProcessInfo*>(&(*it));

grt (UTC plus 2) 2016/12/20 11:11:34 same comment about cast

chrisha 2016/12/20 19:46:23 Ditto.

+const ModuleDatabase::ProcessInfo* ModuleDatabase::CreateProcessInfo(

grt (UTC plus 2) 2016/12/20 11:11:34 the only caller DCHECKS the result. how about doin

chrisha 2016/12/20 19:46:22 Actually, that DCHECK is meant to be elided. Made

+ uint32_t process_id,

+ content::ProcessType process_type) {

+ ProcessInfo key(process_id, process_type);

+ auto result = processes_.insert(key);

+ // If the element was inserted then return it.

+ if (result.second)

+ return &(*result.first);

+ // Otherwise it already existed, so return nullptr.

+ return nullptr;

+bool ModuleDatabase::DeleteProcessInfo(uint32_t process_id) {

grt (UTC plus 2) 2016/12/20 11:11:34 similar comment: why not DCHECK in here?

chrisha 2016/12/20 19:46:23 Made void, removed the unneeded DCHECK.

+ ProcessInfo key(process_id, content::PROCESS_TYPE_UNKNOWN);

+ auto it = processes_.find(key);

+ if (it == processes_.end())

+ return false;

+ processes_.erase(it);

+ return true;

+// ModuleDatabase::ModuleInfo --------------------------------------------------

+ModuleDatabase::ModuleInfo::ModuleInfo(const base::FilePath& module_path,

+ uint32_t module_id)

+ : module_path(module_path), module_id(module_id), process_types(0) {}

+bool ModuleDatabase::ModuleInfo::operator<(const ModuleInfo& mi) const {

+ return module_path < mi.module_path;

+// ModuleDatabase::ProcessInfo -------------------------------------------------

+ModuleDatabase::ProcessInfo::ProcessInfo(uint32_t process_id,

+ content::ProcessType process_type)

+ : process_id(process_id), process_type(process_type) {}

+bool ModuleDatabase::ProcessInfo::operator<(const ProcessInfo& pi) const {

+ return process_id < pi.process_id;