chrome/browser/conflicts/module_database_win.cc - Issue 2576843002: [win] Create ModuleDatabase and ModuleEventSinkImpl.

Unified Diff: chrome/browser/conflicts/module_database_win.cc

Issue 2576843002: [win] Create ModuleDatabase and ModuleEventSinkImpl. (Closed)

Patch Set: Fix grt's nits on patchset 8. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« chrome/browser/conflicts/module_database_win.h ('K') | « chrome/browser/conflicts/module_database_win.h ('k') | chrome/browser/conflicts/module_database_win_unittest.cc » ('j') | chrome/browser/conflicts/module_database_win_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/conflicts/module_database_win.cc

diff --git a/chrome/browser/conflicts/module_database_win.cc b/chrome/browser/conflicts/module_database_win.cc

new file mode 100644

index 0000000000000000000000000000000000000000..a336affbdf5b3bff2094a9adb65ac2a7235803e8

--- /dev/null

+++ b/chrome/browser/conflicts/module_database_win.cc

@@ -0,0 +1,335 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/conflicts/module_database_win.h"

+#include <algorithm>

+#include <tuple>

+#include "base/bind.h"

+namespace {

+// Document the assumptions made on the ProcessType enum in order to convert

+// them to bits.

+static_assert(content::PROCESS_TYPE_UNKNOWN == 1,

+ "assumes unknown process type has value 1");

+static_assert(content::PROCESS_TYPE_BROWSER == 2,

+ "assumes browser process type has value 2");

+constexpr uint32_t kMinProcessType = content::PROCESS_TYPE_BROWSER;

+} // namespace

+ModuleDatabase::ModuleDatabase(

+ scoped_refptr<base::SequencedTaskRunner> task_runner)

+ : task_runner_(std::move(task_runner)), weak_ptr_factory_(this) {}

+ModuleDatabase::~ModuleDatabase() = default;

+void ModuleDatabase::OnProcessStarted(uint32_t process_id,

+ uint64_t creation_time,

+ content::ProcessType process_type) {

+ DCHECK(task_runner_->RunsTasksOnCurrentThread());

+ CreateProcessInfo(process_id, creation_time, process_type);

+void ModuleDatabase::OnModuleLoad(uint32_t process_id,

+ uint64_t creation_time,

+ const base::FilePath& module_path,

+ uint32_t module_size,

+ uint32_t module_time_date_stamp,

+ uintptr_t module_load_address) {

+ // Messages can arrive from any thread (UI thread for calls over IPC, and

+ // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

+ if (!task_runner_->RunsTasksOnCurrentThread()) {

+ task_runner_->PostTask(

+ FROM_HERE, base::Bind(&ModuleDatabase::OnModuleLoad,

+ weak_ptr_factory_.GetWeakPtr(), process_id,

+ creation_time, module_path, module_size,

+ module_time_date_stamp, module_load_address));

+ return;

+ }

+ // In theory this should always succeed. However, it is possible for a client

+ // to misbehave and send out-of-order messages. It is easy to be tolerant of

+ // this by simply not updating the process info in this case. It's not worth

+ // crashing if this data is slightly out of sync as this is purely

+ // informational.

+ auto* process_info = GetProcessInfo(process_id, creation_time);

+ if (!process_info)

+ return;

+ auto* module_info =

+ FindOrCreateModuleInfo(module_path, module_size, module_time_date_stamp);

+ // Update the list of process types that this module has been seen in.

+ module_info->process_types |= ProcessTypeToBit(process_info->process_type);

+ // Update the load address maps.

+ InsertLoadAddress(module_info->module_id, module_load_address,

+ &process_info->loaded_modules);

+ RemoveLoadAddressById(module_info->module_id,

+ &process_info->unloaded_modules);

+void ModuleDatabase::OnModuleUnload(uint32_t process_id,

+ uint64_t creation_time,

+ uintptr_t module_load_address) {

+ // Messages can arrive from any thread (UI thread for calls over IPC, and

+ // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

+ if (!task_runner_->RunsTasksOnCurrentThread()) {

+ task_runner_->PostTask(

+ FROM_HERE, base::Bind(&ModuleDatabase::OnModuleUnload,

+ weak_ptr_factory_.GetWeakPtr(), process_id,

+ creation_time, module_load_address));

+ return;

+ }

+ // See the long-winded comment in OnModuleLoad about reasons why this can

+ // fail (but shouldn't normally).

+ auto* process_info = GetProcessInfo(process_id, creation_time);

+ if (!process_info)

+ return;

+ // Find the module corresponding to this load address. This is O(1) in the

+ // common case of removing a recently removed module, but O(n) worst case.

+ // Thankfully, unload events occur far less often and n is quite small.

+ size_t i = FindLoadAddressIndexByAddress(module_load_address,

sky 2017/01/05 00:30:44 Did you consider returning an iterator? It would m

chrisha 2017/01/06 20:35:47 I did consider using an iterator. About the only t

+ process_info->loaded_modules);

+ // No such module found. This shouldn't happen either, unless messages are

+ // malformed or out of order. Gracefully fail in this case.

+ if (i == kInvalidIndex)

+ return;

+ ModuleId module_id = process_info->loaded_modules[i].first;

+ // Remove from the loaded module list and insert into the unloaded module

+ // list.

+ RemoveLoadAddressByIndex(i, &process_info->loaded_modules);

+ InsertLoadAddress(module_id, module_load_address,

+ &process_info->unloaded_modules);

+void ModuleDatabase::OnProcessEnded(uint32_t process_id,

+ uint64_t creation_time) {

+ // Messages can arrive from any thread (UI thread for calls over IPC, and

+ // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

+ if (!task_runner_->RunsTasksOnCurrentThread()) {

+ task_runner_->PostTask(

+ FROM_HERE,

+ base::Bind(&ModuleDatabase::OnProcessEnded,

+ weak_ptr_factory_.GetWeakPtr(), process_id, creation_time));

+ return;

+ }

+ DeleteProcessInfo(process_id, creation_time);

+// static

+uint32_t ModuleDatabase::ProcessTypeToBit(content::ProcessType process_type) {

+ uint32_t bit_index = static_cast<uint32_t>(process_type) - kMinProcessType;

+ DCHECK_LE(0u, bit_index);

sky 2017/01/05 00:30:45 How about a compile time assert that PROCESS_TYPE_

chrisha 2017/01/06 20:35:47 The less than 0 check is actually completely bogus

sky 2017/01/06 21:05:40 Good idea.

+ DCHECK_GE(31u, bit_index);

+ uint32_t bit = (1 << bit_index);

+ return bit;

+// static

+content::ProcessType ModuleDatabase::BitIndexToProcessType(uint32_t bit_index) {

+ DCHECK_LE(0u, bit_index);

+ DCHECK_GE(31u, bit_index);

+ return static_cast<content::ProcessType>(bit_index + kMinProcessType);

+// static

+size_t ModuleDatabase::FindLoadAddressIndexById(

+ ModuleId module_id,

+ const ModuleLoadAddresses& load_addresses) {

+ // Process elements in reverse order so that RemoveLoadAddressById can handle

+ // the more common case of removing the maximum element in O(1).

+ for (size_t i = load_addresses.size() - 1; i < load_addresses.size(); --i) {

+ if (load_addresses[i].first == module_id)

+ return i;

+ }

+ return kInvalidIndex;

+// static

+size_t ModuleDatabase::FindLoadAddressIndexByAddress(

+ uintptr_t load_address,

+ const ModuleLoadAddresses& load_addresses) {

+ for (size_t i = 0; i < load_addresses.size(); ++i) {

+ if (load_addresses[i].second == load_address)

+ return i;

+ }

+ return kInvalidIndex;

+// static

+void ModuleDatabase::InsertLoadAddress(ModuleId module_id,

+ uintptr_t load_address,

+ ModuleLoadAddresses* load_addresses) {

+ // A very small optimization: the largest module_id is always placed at the

+ // end of the array. This is the most common case, and allows O(1)

+ // determination that a |module_id| isn't present when it's bigger than the

+ // maximum already in the array. This keeps insertions to O(1) in the usual

+ // case.

+ if (load_addresses->empty() || module_id > load_addresses->back().first) {

+ load_addresses->emplace_back(module_id, load_address);

+ return;

+ }

+ // If the module exists in the collection then update the load address and

+ // return. This should never really occur, unless the client is deliberately

+ // misbehaving or a race causes a reload event (at a different address) to be

+ // processed before the corresponding unload. This is very unlikely.

+ size_t i = FindLoadAddressIndexById(module_id, *load_addresses);

+ if (i != kInvalidIndex) {

+ (*load_addresses)[i].second = load_address;

+ return;

+ }

+ // The module does not exist, and by definition is smaller in value than

+ // the largest module ID already present. Add it, ensuring that the largest

+ // module ID stays at the end.

+ load_addresses->emplace(--load_addresses->end(), module_id, load_address);

+// static

+void ModuleDatabase::RemoveLoadAddressById(

+ ModuleId module_id,

+ ModuleLoadAddresses* load_addresses) {

+ if (load_addresses->empty())

+ return;

+ // This handles the special case of removing the max element in O(1), as

+ // FindLoadAddressIndexById processes the elements in reverse order.

+ size_t i = FindLoadAddressIndexById(module_id, *load_addresses);

+ RemoveLoadAddressByIndex(i, load_addresses);

+// static

+void ModuleDatabase::RemoveLoadAddressByIndex(

+ size_t index,

+ ModuleLoadAddresses* load_addresses) {

+ DCHECK_LT(index, load_addresses->size());

+ // Special case: removing the last module (with maximum id). Need to find the

+ // new maximum element and ensure it goes to the end.

+ if (load_addresses->size() > 2 && index + 1 == load_addresses->size()) {

+ // Note that |index| == load_addresses->size() - 1, and is the last

+ // indexable element in the vector.

+ // Find the index of the new maximum element.

+ ModuleId max_id = -1; // These start at zero.

+ size_t max_index = kInvalidIndex;

+ for (size_t i = 0; i < load_addresses->size() - 1; ++i) {

+ if ((*load_addresses)[i].first > max_id) {

+ max_id = (*load_addresses)[i].first;

+ max_index = i;

+ }

+ // Remove the last (max) element.

+ load_addresses->resize(index);

+ // If the new max element isn't in the last position, then swap it so it is.

+ size_t last_index = load_addresses->size() - 1;

+ if (max_index != last_index)

+ std::swap((*load_addresses)[max_index], (*load_addresses)[last_index]);

+ return;

+ }

+ // If the element to be removed is second last then a single copy is

+ // sufficient.

+ if (index + 2 == load_addresses->size()) {

+ (*load_addresses)[index] = (*load_addresses)[index + 1];

+ } else {

+ // In the general case two copies are necessary.

+ int max_index = load_addresses->size() - 1;

+ (*load_addresses)[index] = (*load_addresses)[max_index - 1];

+ (*load_addresses)[max_index - 1] = (*load_addresses)[max_index];

+ }

+ // Remove the last element, which is now duplicated.

+ load_addresses->resize(load_addresses->size() - 1);

+ModuleDatabase::ModuleInfo* ModuleDatabase::FindOrCreateModuleInfo(

+ const base::FilePath& module_path,

+ uint32_t module_size,

+ uint32_t module_time_date_stamp) {

+ auto result = modules_.emplace(module_path, module_size,

+ module_time_date_stamp, modules_.size());

+ // Cast away constness so that the non-key portions of the object can be

+ // modified. The key portions of the object are themselves marked const, so

+ // this causes no trouble with std::set.

+ return const_cast<ModuleInfo*>(&(*result.first));

sky 2017/01/05 00:30:45 Similar comment has as with the other const_cast.

chrisha 2017/01/06 20:35:47 Acknowledged.

+ModuleDatabase::ProcessInfo* ModuleDatabase::GetProcessInfo(

+ uint32_t process_id,

+ uint64_t creation_time) {

+ ProcessInfo key(process_id, creation_time, content::PROCESS_TYPE_UNKNOWN);

+ auto it = processes_.find(key);

+ if (it == processes_.end())

+ return nullptr;

+ // Cast away constness so that the non-key portions of the object can be

+ // modified. The key portions of the object are themselves marked const, so

+ // this causes no trouble with std::set.

+ return const_cast<ProcessInfo*>(&(*it));

sky 2017/01/05 00:30:45 IMO this is a good indication you shouldn't be usi

chrisha 2017/01/06 20:35:47 As you point out, I'm effectively manually doing w

sky 2017/01/06 21:05:40 The problem is this code is being clever and while

chrisha 2017/01/11 16:34:17 The "first" and "second" refer to the std::map::va

The "first" and "second" refer to the std::map::value_type, so there's no way around it. I bit the bullet and refactored things this way. It's much less "clever", but slightly more onerous to read. The impact wasn't as bad as I as expecting, mostly confined to the unit tests.

grt (UTC plus 2) 2017/01/12 10:54:29 My $.02: the "first." and "second." sprinkled arou

On 2017/01/11 16:34:17, chrisha (slow) wrote: > On 2017/01/06 21:05:40, sky wrote: > > On 2017/01/06 20:35:47, chrisha (slow) wrote: > > > On 2017/01/05 00:30:45, sky wrote: > > > > IMO this is a good indication you shouldn't be using set. Once you resort > to > > a > > > > const_cast you can't ensure you don't accidentally do the wrong thing. Did > > you > > > > consider separating out the key aspects from the data that changes and > using > > a > > > > map? That way you wouldn't need a const_cast and you are better enforcing > > the > > > > key won't change. > > > > > > As you point out, I'm effectively manually doing what a std::map would do > for > > > me. Because my keys are actually tuples, I would need two structures for > each > > > entry: ProcessInfoKey, ProcessInfoData, ModuleInfoKey and ModuleInfoData. > And > > > then internally I'd be dealing with either iterators or *Info = > > std::pair<const > > > *InfoKey, *InfoData>, with mildly opaque ->first.some_key_member and > > > ->second.some_data_member accesses. > > > > > > Since the access to these things is behind two gateway functions, I figured > > the > > > mental overhead of the const_cast was less than the overhead of dealing with > > > std::pair, and two structs rather than one. > > > > > > grt@ had also suggested using "mutable" for the things that are able to be > > > modified, but I dislike that as well as it means you can't have > > > const-correctness for simple non-modifying accesses. > > > > > > So yes, I did consider it, and preferred the std::set approach, with liberal > > > comments and use of "const" for members that are indeed part of the key. > > > > The problem is this code is being clever and while it works now, subtlety like > > this is hard to maintain going forward. If you don't like > > ->first.some_key_member and ->second.some_other_member couldn't you define a > > struct? > > The "first" and "second" refer to the std::map::value_type, so there's no way > around it. I bit the bullet and refactored things this way. It's much less > "clever", but slightly more onerous to read. The impact wasn't as bad as I as > expecting, mostly confined to the unit tests.

My $.02: the "first." and "second." sprinkled around to make this work are somewhat opaque -- it takes a far amount of scrolling around to understand what's being accessed. That said, I like that std::map is being used appropriately now.

+void ModuleDatabase::CreateProcessInfo(uint32_t process_id,

+ uint64_t creation_time,

+ content::ProcessType process_type) {

+ processes_.emplace(process_id, creation_time, process_type);

sky 2017/01/05 00:30:45 Do you care if there was a ProcessInfo that matche

chrisha 2017/01/06 20:35:47 On windows a (pid, creation_time) pair uniquely id

+void ModuleDatabase::DeleteProcessInfo(uint32_t process_id,

+ uint64_t creation_time) {

+ ProcessInfo key(process_id, creation_time, content::PROCESS_TYPE_UNKNOWN);

+ auto it = processes_.find(key);

sky 2017/01/05 00:30:44 processes_.erase(key) ?

chrisha 2017/01/06 20:35:47 Errr... yeah. I'm sure there's some logical explan

+ if (it != processes_.end())

+ processes_.erase(it);

+// ModuleDatabase::ModuleInfo --------------------------------------------------

+ModuleDatabase::ModuleInfo::ModuleInfo(const base::FilePath& module_path,

+ uint32_t module_size,

+ uint32_t module_time_date_stamp,

+ uint32_t module_id)

+ : module_path(module_path),

+ module_size(module_size),

+ module_time_date_stamp(module_time_date_stamp),

+ module_id(module_id),

+ process_types(0) {}

+bool ModuleDatabase::ModuleInfo::operator<(const ModuleInfo& mi) const {

+ // The key consists of the triplet of

+ // (module_path, module_size, module_time_date_stamp).

+ // Use the std::tuple lexicographic comparison operator.

+ return std::make_tuple(module_path, module_size, module_time_date_stamp) <

+ std::make_tuple(mi.module_path, mi.module_size,

+ mi.module_time_date_stamp);

+// ModuleDatabase::ProcessInfo -------------------------------------------------

+ModuleDatabase::ProcessInfo::ProcessInfo(uint32_t process_id,

+ uint64_t creation_time,

+ content::ProcessType process_type)

+ : process_id(process_id),

+ creation_time(creation_time),

+ process_type(process_type) {}

+bool ModuleDatabase::ProcessInfo::operator<(const ProcessInfo& pi) const {

+ // The key consists of the pair of (process_id, creation_time).

+ // Use the std::tuple lexicographic comparison operator.

+ return std::make_tuple(process_id, creation_time) <

+ std::make_tuple(pi.process_id, pi.creation_time);