chrome/browser/conflicts/module_database_win.cc - Issue 2576843002: [win] Create ModuleDatabase and ModuleEventSinkImpl.

Side by Side Diff: chrome/browser/conflicts/module_database_win.cc

Issue 2576843002: [win] Create ModuleDatabase and ModuleEventSinkImpl. (Closed)

Patch Set: Rework OnProcessStarted. Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« chrome/browser/conflicts/module_database_win.h ('K') | « chrome/browser/conflicts/module_database_win.h ('k') | chrome/browser/conflicts/module_database_win_unittest.cc » ('j') | chrome/browser/conflicts/module_database_win_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2016 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "chrome/browser/conflicts/module_database_win.h"

	6

	7 #include "base/bind.h"

	8 #include "base/debug/leak_annotations.h"
	grt (UTC plus 2) 2016/12/20 11:11:33 unused? unused? chrisha 2016/12/20 19:46:22 Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > unused? Done.
	9 #include "base/lazy_instance.h"
	grt (UTC plus 2) 2016/12/20 11:11:33 unused unused chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > unused Done.
	10 #include "base/strings/utf_string_conversions.h"
	grt (UTC plus 2) 2016/12/20 11:11:33 unused unused chrisha 2016/12/20 19:46:23 Done. (I really wish we had automated analysis fo Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > unused Done. (I really wish we had automated analysis for this kind of thing... why oh why do we need to manually do this kind of crap in 2016!?!) grt (UTC plus 2) 2016/12/20 21:09:52 Amen to that, brother. Show quoted text On 2016/12/20 19:46:23, chrisha (slow) wrote: > On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > > unused > > Done. > > (I really wish we had automated analysis for this kind of thing... why oh why do > we need to manually do this kind of crap in 2016!?!) Amen to that, brother.
	11

	12 namespace {

	13

	14 // Document the assumptions made on the ProcessType enum in order to convert

	15 // them to bits.

	16 static_assert(1 == content::PROCESS_TYPE_UNKNOWN,
	grt (UTC plus 2) 2016/12/20 11:11:34 reverse these! reverse these! chrisha 2016/12/20 19:46:22 Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > reverse these! Done.
	17 "assumes unknown process type has value 1");

	18 static_assert(2 == content::PROCESS_TYPE_BROWSER,

	19 "assumes browser process type has value 2");

	20 static constexpr uint32_t kMinProcessType = content::PROCESS_TYPE_BROWSER;
	grt (UTC plus 2) 2016/12/20 11:11:34 omit "static" in an unnamed namespace omit "static" in an unnamed namespace chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > omit "static" in an unnamed namespace Done.
	21

	22 } // namespace

	23

	24 ModuleDatabase::ModuleDatabase(

	25 scoped_refptr<base::SequencedTaskRunner> task_runner)

	26 : task_runner_(task_runner), weak_ptr_factory_(this) {}
	grt (UTC plus 2) 2016/12/20 11:11:33 std::move(task_runner)? std::move(task_runner)? chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > std::move(task_runner)? Done.
	27

	28 ModuleDatabase::~ModuleDatabase() = default;

	29

	30 void ModuleDatabase::OnProcessStarted(uint32_t process_id,

	31 content::ProcessType process_type) {

	32 DCHECK(task_runner_->RunsTasksOnCurrentThread());

	33

	34 auto* process_info = CreateProcessInfo(process_id, process_type);

	35 // If each client is sending messages in the appropriate order then this

	36 // should always be true.

	37 DCHECK(process_info);

	38 }

	39

	40 void ModuleDatabase::OnModuleEvent(uint32_t process_id,

	41 const ModuleWatcher::ModuleEvent& event) {

	42 // Messages can arrive from any thread (UI thread for calls over IPC, and

	43 // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

	44 if (!task_runner_->RunsTasksOnCurrentThread()) {

	45 task_runner_->PostTask(FROM_HERE, base::Bind(&ModuleDatabase::OnModuleEvent,

	46 weak_ptr_factory_.GetWeakPtr(),

	47 process_id, event));

	48 return;

	49 }

	50

	51 // In theory this should always succeed. However, it is possible for a client

	52 // to misbehave and sent out-of-order messages. It is easy to be tolerant of
	grt (UTC plus 2) 2016/12/20 11:11:33 sent -> send sent -> send chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > sent -> send Done.
	53 // this by simply not updating the process info in this case. It's not worth

	54 // crashing if this data is slightly out of sync as this is purely

	55 // informational.

	56 auto* process_info = GetProcessInfo(process_id);

	57 DCHECK(process_info);

	58 if (!process_info)

	59 return;

	60

	61 auto* module_info = FindOrCreateModuleInfo(event.module_path);

	62

	63 // Update the list of process types that this module has been seen in.

	64 module_info->process_types \|= ProcessTypeToBit(process_info->process_type);

	65

	66 uintptr_t load_address =

	67 reinterpret_cast<uintptr_t>(event.module_load_address);
	grt (UTC plus 2) 2016/12/20 11:11:34 wdyt of making module_load_address a uintptr_t thr wdyt of making module_load_address a uintptr_t throughout so that casting isn't needed? chrisha 2016/12/20 19:46:23 There's one boundary I can't get rid of: the IPC m Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > wdyt of making module_load_address a uintptr_t throughout so that casting isn't > needed? There's one boundary I can't get rid of: the IPC mechanism uses a uint64, as there's no "uintptr_t" or naked pointer equivalent in Mojo. IMO, ModuleWatcher makes sense to use pointers because in the context of it, these are real pointers that can be dereferenced, as well as fed into various OS functions as HMODULEs. However, in the ModuleDatabase, these aren't semantically pointers anymore, hence the desire to not store them as void. I felt this mechanism slightly more externally consistent from the point of view of each class and its individual role. I don't find the casts too onerous, but if you feel strongly about this let me know. grt (UTC plus 2)* 2016/12/20 21:09:52 Naah. Thanks for clarifying the reasoning. Show quoted text On 2016/12/20 19:46:23, chrisha (slow) wrote: > On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > > wdyt of making module_load_address a uintptr_t throughout so that casting > isn't > > needed? > > There's one boundary I can't get rid of: the IPC mechanism uses a uint64, as > there's no "uintptr_t" or naked pointer equivalent in Mojo. > > IMO, ModuleWatcher makes sense to use pointers because in the context of it, > these are real pointers that can be dereferenced, as well as fed into various OS > functions as HMODULEs. However, in the ModuleDatabase, these aren't semantically > pointers anymore, hence the desire to not store them as void*. > > I felt this mechanism slightly more externally consistent from the point of view > of each class and its individual role. I don't find the casts too onerous, but > if you feel strongly about this let me know. Naah. Thanks for clarifying the reasoning.
	68

	69 // Update the module lists for this process.

	70 switch (event.event_type) {

	71 case mojom::ModuleEventType::MODULE_ALREADY_LOADED:

	72 case mojom::ModuleEventType::MODULE_LOADED:

	73 InsertLoadAddress(module_info->module_id, load_address,

	74 &process_info->loaded_modules);

	75 RemoveLoadAddressById(module_info->module_id,

	76 &process_info->unloaded_modules);

	77 break;

	78 case mojom::ModuleEventType::MODULE_UNLOADED:
	grt (UTC plus 2) 2016/12/20 11:11:33 i find it a bit confusing that unloads are handled i find it a bit confusing that unloads are handled here and in OnModuleUnload. can there be only one codepath? chrisha 2016/12/20 19:46:23 OnModuleUnload is expensive, costing an O(n) searc Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > i find it a bit confusing that unloads are handled here and in OnModuleUnload. > can there be only one codepath? OnModuleUnload is expensive, costing an O(n) search. It is meant to handle untrusted data from out of process that has to be validated. OnModuleEvent handles trusted data from in process, and can be handled in O(lg n) without additional lookup. So, slightly annoying, but both more efficient and more secure. grt (UTC plus 2) 2016/12/20 21:09:52 Can you clarify in comments which is trusted and w Show quoted text On 2016/12/20 19:46:23, chrisha (slow) wrote: > On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > > i find it a bit confusing that unloads are handled here and in OnModuleUnload. > > can there be only one codepath? > > OnModuleUnload is expensive, costing an O(n) search. It is meant to handle > untrusted data from out of process that has to be validated. > > OnModuleEvent handles trusted data from in process, and can be handled in O(lg > n) without additional lookup. > > So, slightly annoying, but both more efficient and more secure. Can you clarify in comments which is trusted and which isn't? As it is, each function has a comment saying that messages can come over IPC, so it's not really clear to me without looking into how they're called. chrisha 2016/12/21 20:14:59 I've reworked the API so there's only one way to l Show quoted text On 2016/12/20 21:09:52, grt (UTC plus 1) wrote: > On 2016/12/20 19:46:23, chrisha (slow) wrote: > > On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > > > i find it a bit confusing that unloads are handled here and in > OnModuleUnload. > > > can there be only one codepath? > > > > OnModuleUnload is expensive, costing an O(n) search. It is meant to handle > > untrusted data from out of process that has to be validated. > > > > OnModuleEvent handles trusted data from in process, and can be handled in O(lg > > n) without additional lookup. > > > > So, slightly annoying, but both more efficient and more secure. > > Can you clarify in comments which is trusted and which isn't? As it is, each > function has a comment saying that messages can come over IPC, so it's not > really clear to me without looking into how they're called. I've reworked the API so there's only one way to load, and one way to unload. This is slightly less efficient for "local" module events where the metadata is fully trusted and could be used for guaranteed O(1) lookup, but easier to reason about. I've also tried to rework the comments.
	79 RemoveLoadAddressById(module_info->module_id,

	80 &process_info->loaded_modules);

	81 InsertLoadAddress(module_info->module_id, load_address,

	82 &process_info->unloaded_modules);

	83 break;

	84 }

	85 }

	86

	87 void ModuleDatabase::OnModuleUnload(uint32_t process_id,

	88 uintptr_t load_address) {

	89 // Messages can arrive from any thread (UI thread for calls over IPC, and

	90 // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

	91 if (!task_runner_->RunsTasksOnCurrentThread()) {

	92 task_runner_->PostTask(

	93 FROM_HERE,

	94 base::Bind(&ModuleDatabase::OnModuleUnload,

	95 weak_ptr_factory_.GetWeakPtr(), process_id, load_address));

	96 return;

	97 }

	98

	99 // See the long-winded comment in OnModuleEvent about reasons why this can

	100 // fail (but shouldn't normally).

	101 auto* process_info = GetProcessInfo(process_id);

	102 DCHECK(process_info);
	grt (UTC plus 2) 2016/12/20 11:11:34 remove this one, too? remove this one, too? chrisha 2016/12/20 19:46:23 Oops, should have been deleted. Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > remove this one, too? Oops, should have been deleted. Done.
	103 if (!process_info)

	104 return;

	105

	106 // Find the module corresponding to this load address.

	107 int i =

	108 FindLoadAddressIndexByAddress(load_address, process_info->loaded_modules);

	109

	110 // No such module found. This shouldn't happen either, unless messages are

	111 // malformed or out of order. Gracefully fail in this case.

	112 if (i == kInvalidIndex)

	113 return;

	114

	115 ModuleId module_id = process_info->loaded_modules[i].first;

	116

	117 // Remove from the loaded module list and insert into the unloaded module

	118 // list.

	119 RemoveLoadAddressByIndex(i, &process_info->loaded_modules);

	120 InsertLoadAddress(module_id, load_address, &process_info->unloaded_modules);

	121 }

	122

	123 void ModuleDatabase::OnProcessEnded(uint32_t process_id) {

	124 // Messages can arrive from any thread (UI thread for calls over IPC, and

	125 // anywhere at all for calls from ModuleWatcher), so bounce if necessary.

	126 if (!task_runner_->RunsTasksOnCurrentThread()) {

	127 task_runner_->PostTask(

	128 FROM_HERE, base::Bind(&ModuleDatabase::OnProcessEnded,

	129 weak_ptr_factory_.GetWeakPtr(), process_id));

	130 return;

	131 }

	132

	133 bool deleted = DeleteProcessInfo(process_id);

	134 // If each client is sending messages in the appropriate order then this

	135 // should always be true.

	136 DCHECK(deleted);

	137 }

	138

	139 // static

	140 uint32_t ModuleDatabase::ProcessTypeToBit(content::ProcessType process_type) {

	141 uint32_t bit_index = static_cast<uint32_t>(process_type) - kMinProcessType;

	142 DCHECK_LE(0u, bit_index);

	143 DCHECK_GE(31u, bit_index);

	144 uint32_t bit = (1 << bit_index);

	145 return bit;

	146 }

	147

	148 // static

	149 content::ProcessType ModuleDatabase::BitIndexToProcessType(uint32_t bit_index) {

	150 DCHECK_LE(0u, bit_index);

	151 DCHECK_GE(31u, bit_index);

	152 return static_cast<content::ProcessType>(bit_index + kMinProcessType);

	153 }

	154

	155 // static

	156 int ModuleDatabase::FindLoadAddressIndexById(
	grt (UTC plus 2) 2016/12/20 11:11:33 looks like using size_t for the index would be muc looks like using size_t for the index would be much more convenient than int. this is allowed by my interpretation of the style guide... chrisha 2016/12/20 19:46:23 Indeed it would be. Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > looks like using size_t for the index would be much more convenient than int. > this is allowed by my interpretation of the style guide... Indeed it would be. Done.
	157 ModuleId module_id,

	158 const ModuleLoadAddresses& load_addresses) {

	159 for (size_t i = 0; i < load_addresses.size(); ++i) {

	160 if (load_addresses[i].first == module_id)

	161 return static_cast<int>(i);

	162 }

	163 return kInvalidIndex;

	164 }

	165

	166 // static

	167 int ModuleDatabase::FindLoadAddressIndexByAddress(

	168 uintptr_t load_address,

	169 const ModuleLoadAddresses& load_addresses) {

	170 for (size_t i = 0; i < load_addresses.size(); ++i) {

	171 if (load_addresses[i].second == load_address)

	172 return static_cast<int>(i);

	173 }

	174 return kInvalidIndex;

	175 }

	176

	177 // static

	178 void ModuleDatabase::InsertLoadAddress(ModuleId module_id,
	chrisha 2016/12/19 20:15:37 grt: I was bored, so went a little overboard here grt: I was bored, so went a little overboard here to try to guarantee O(1) insertion in the usual case of a module being inserted for the first time. Let me know if you wish me to simplify.
	179 uintptr_t load_address,

	180 ModuleLoadAddresses* load_addresses) {

	181 // A very small optimization: the largest module_id is always placed at the

	182 // end of the array. This is the most common case, and allows O(1)

	183 // determination that a \|module_id\| isn't present when it's bigger than the

	184 // maximum already in the array. This keeps insertions to O(1) in the usual

	185 // case.

	186 if (load_addresses->size() == 0 \|\| module_id > load_addresses->back().first) {
	grt (UTC plus 2) 2016/12/20 11:11:34 load_addresses->size() == 0 -> load_addresses->emp load_addresses->size() == 0 -> load_addresses->empty() chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > load_addresses->size() == 0 -> load_addresses->empty() Done.
	187 load_addresses->push_back(std::make_pair(module_id, load_address));
	grt (UTC plus 2) 2016/12/20 11:11:34 load_addresses->emplace_back(module_id, load_addre load_addresses->emplace_back(module_id, load_address); chrisha 2016/12/20 19:46:23 Holy jebus batman, I've never seen "emplace". TIL. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > load_addresses->emplace_back(module_id, load_address); Holy jebus batman, I've never seen "emplace". TIL... Done. grt (UTC plus 2) 2016/12/20 21:09:52 emplace was the toy I got on my birthday from the Show quoted text On 2016/12/20 19:46:23, chrisha (slow) wrote: > On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > > load_addresses->emplace_back(module_id, load_address); > > Holy jebus batman, I've never seen "emplace". TIL... emplace was the toy I got on my birthday from the overwhelming amount of "please can I create an object in a vector's memory" wishes I silent sent to the standards gods. I've wanted this for almost as long as I've touched the STL. Show quoted text > Done.
	188 return;

	189 }

	190

	191 // If the module exists in the collection then update the load address and

	192 // return.

	193 int i = FindLoadAddressIndexById(module_id, *load_addresses);

	194 if (i != kInvalidIndex) {

	195 load_addresses->at(i).second = load_address;
	grt (UTC plus 2) 2016/12/20 11:11:34 curious: why would a module's load address change? curious: why would a module's load address change? chrisha 2016/12/20 19:46:23 A vain attempt at maintaining long term consistenc Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > curious: why would a module's load address change? A vain attempt at maintaining long term consistency? This could happen if a module was loaded, unloaded and reloaded, and forced to load at a different address because something else occupied it's previous address. And if the unload/reload were processed out of order. This is admittedly very very unlikely. I think there are 3 options: 1. Keep the existing address. If a reload really did occur before the unload was processed, then this means that the module will appear to be unloaded when it is in fact in memory. Unlikely, but strictly possible. 2. Update the address. If the same race happens, this is slightly more consistent in that the module will appear to be loaded, and the out-of-order unload will be silently ignored. 3. Expect things to be strictly in order and explode. I've already decided this isn't realistic, and it's not worth tearing down the browser if this "informational" service is slightly incorrect. So I think (1) is the best option (what I've gone with). I've added more comments. grt (UTC plus 2) 2016/12/20 21:09:52 If unload/reload could potentially be processed ou Show quoted text On 2016/12/20 19:46:23, chrisha (slow) wrote: > On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > > curious: why would a module's load address change? > > A vain attempt at maintaining long term consistency? > > This could happen if a module was loaded, unloaded and reloaded, and forced to > load at a different address because something else occupied it's previous > address. And if the unload/reload were processed out of order. If unload/reload could potentially be processed out of order, could process destroy/create also be processed out of order? I'm still thinking about PID reuse. I don't think the OS makes any guarantee beyond "a PID is valid as long as a process is alive", so I'm scared about aggressive reuse. Is there a statement somewhere saying "I swear I will never reuse a PID for the next N CreateProcess's"? Show quoted text > This is admittedly very very unlikely. > > I think there are 3 options: > > 1. Keep the existing address. If a reload really did occur before the unload was > processed, then this means that the module will appear to be unloaded when it is > in fact in memory. Unlikely, but strictly possible. Is it possible for a module to be loaded into process twice? LoadLibrary on a symlink? I suppose the paths would be different in that case. Could a module be loaded twice with the same path? UwS/malware? Show quoted text > 2. Update the address. If the same race happens, this is slightly more > consistent in that the module will appear to be loaded, and the out-of-order > unload will be silently ignored. > 3. Expect things to be strictly in order and explode. I've already decided this > isn't realistic, and it's not worth tearing down the browser if this > "informational" service is slightly incorrect. > > So I think (1) is the best option (what I've gone with). Ack. Show quoted text > I've added more comments. Tak. chrisha 2016/12/21 20:14:59 I did some experiments here. 1. LoadLibrary("foo. Show quoted text > Is it possible for a module to be loaded into process twice? LoadLibrary on a > symlink? I suppose the paths would be different in that case. Could a module be > loaded twice with the same path? UwS/malware? I did some experiments here. 1. LoadLibrary("foo.dll") twice in a row. Get the same HMODULE both times. Enumerating and there's one occurrence. 2. LoadLibrary("foo.dll"). Rename "foo.dll" to "bar.dll", and copy another module to "foo.dll". LoadLibrary("foo.dll") again. I get the same HMODULE back. Enumerating and there's one occurrence. So modules are unique up to their filename as far as the loader is concerned. 3. LoadLibrary("foo.dll"). Rename "foo.dll" to "bar.dll". LoadLibrary("bar.dll"). I get the same HMODULE back, and only "foo.dll" is listed when enumerating modules. So modules are unique up to their "inode" equivalent in the filesystem. Malware may have ways of working around this, but we can't possibly be robust to them all. The best I can do is work correctly for expected use, and be resilient otherwise. Since you can load different multiple modules (not at the same time) with the same name, I've also added module_size and module_time_date_stamp to the key that uniquely identifies a module. This should reduce the probability of collisions, and is the same data that MS uses for things like symbol servers.
	196 return;

	197 }

	198

	199 // The module does not exist, and by definition is smaller in value than

	200 // the largest module ID already present. Add it, and ensure the largest

	201 // module stays at the end.

	202 load_addresses->push_back(load_addresses->back());
	grt (UTC plus 2) 2016/12/20 11:11:33 is this the same thing? load_addresses->emplace( is this the same thing? load_addresses->emplace(--load_addresses->end(), module_id, load_address); chrisha 2016/12/20 19:46:23 It is indeed. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > is this the same thing? > load_addresses->emplace(--load_addresses->end(), module_id, load_address); It is indeed.
	203 auto it = load_addresses->rbegin();

	204 it++;

	205 it->first = module_id;

	206 it->second = load_address;

	207

	208 return;

	209 }

	210

	211 // static

	212 void ModuleDatabase::RemoveLoadAddressById(

	213 ModuleId module_id,

	214 ModuleLoadAddresses* load_addresses) {

	215 if (load_addresses->empty())

	216 return;

	217

	218 // Special case: removing the maximum index module. Need to find the new
	grt (UTC plus 2) 2016/12/20 11:11:34 silly idea: how about changing FindLoadAddressInde silly idea: how about changing FindLoadAddressIndexById so that it goes back to front. then this special case is handled automagically, no? chrisha 2016/12/20 19:46:22 No, not silly at all! Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > silly idea: how about changing FindLoadAddressIndexById so that it goes back to > front. then this special case is handled automagically, no? No, not silly at all! Done.
	219 // maximum element and ensure it goes to the end.

	220 if (load_addresses->size() > 2 && load_addresses->back().first == module_id)

	221 return RemoveLoadAddressByIndex(load_addresses->size() - 1, load_addresses);

	222

	223 // The element to be removed is not the last one.

	224 int i = FindLoadAddressIndexById(module_id, *load_addresses);

	225 RemoveLoadAddressByIndex(i, load_addresses);

	226 }

	227

	228 // static

	229 void ModuleDatabase::RemoveLoadAddressByIndex(

	230 int index,

	231 ModuleLoadAddresses* load_addresses) {

	232 DCHECK_LE(0, index);

	233 DCHECK_GT(load_addresses->size(), static_cast<size_t>(index));

	234

	235 // Special case: removing the last module (with maximum id). Need to find the

	236 // new maximum element and ensure it goes to the end.

	237 if (load_addresses->size() > 2 &&

	238 static_cast<size_t>(index + 1) == load_addresses->size()) {

	239 // Find the index of the new maximum element.

	240 ModuleId max_id = -1; // These start at zero.

	241 int max_index = kInvalidIndex;

	242 for (size_t i = 0; i < load_addresses->size() - 1; ++i) {

	243 if (load_addresses->at(i).first > max_id) {

	244 max_id = load_addresses->at(i).first;
	grt (UTC plus 2) 2016/12/20 11:11:34 avoid at() -- it does range checking and throws ex avoid at() -- it does range checking and throws exceptions chrisha 2016/12/20 19:46:23 Another TIL... Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > avoid at() -- it does range checking and throws exceptions Another TIL... Done.
	245 max_index = i;

	246 }

	247 }

	248

	249 // Remove the max element.

	250 load_addresses->resize(load_addresses->size() - 1);

	251

	252 // If the new max element isn't in the last position, then swap it so it is.

	253 int last_index = load_addresses->size() - 1;

	254 if (max_index != last_index)

	255 std::swap((load_addresses)[max_index], (load_addresses)[last_index]);
	grt (UTC plus 2) 2016/12/20 11:11:34 #include <algorithm> #include <algorithm> chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > #include <algorithm> Done.
	256

	257 return;

	258 }

	259

	260 // If the element to be removed is second last then a single swap is
	grt (UTC plus 2) 2016/12/20 11:11:33 swap -> copy swap -> copy chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > swap -> copy Done.
	261 // sufficient.

	262 if (static_cast<size_t>(index + 2) == load_addresses->size()) {

	263 (load_addresses)[index] = (load_addresses)[index + 1];

	264 } else {

	265 // In the general case two swaps are necessary.
	grt (UTC plus 2) 2016/12/20 11:11:33 swaps -> copies swaps -> copies chrisha 2016/12/20 19:46:23 Done. Show quoted text On 2016/12/20 11:11:33, grt (UTC plus 1) wrote: > swaps -> copies Done.
	266 int max_index = load_addresses->size() - 1;

	267 (load_addresses)[index] = (load_addresses)[max_index - 1];

	268 (load_addresses)[max_index - 1] = (load_addresses)[max_index];

	269 }

	270

	271 // Remove the last element, which is now duplicated.

	272 load_addresses->resize(load_addresses->size() - 1);

	273 }

	274

	275 ModuleDatabase::ModuleInfo* ModuleDatabase::FindOrCreateModuleInfo(

	276 const base::FilePath& module_path) {

	277 ModuleInfo key(module_path, modules_.size());

	278 auto result = modules_.insert(key);

	279 return const_cast<ModuleInfo>(&(result.first));
	grt (UTC plus 2) 2016/12/20 11:11:34 is this cast really needed? \|this\| isn't const, an is this cast really needed? \|this\| isn't const, and ModuleSet is not a set of const ModuleInfos. chrisha 2016/12/20 19:46:23 std::set always returns elements as "const", as th Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > is this cast really needed? \|this\| isn't const, and ModuleSet is not a set of > const ModuleInfos. std::set always returns elements as "const", as they are considered immutable, so yes, this is needed. grt (UTC plus 2) 2016/12/20 21:09:52 Ah. Grn. The critical piece, then, is that the obj Show quoted text On 2016/12/20 19:46:23, chrisha (slow) wrote: > On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > > is this cast really needed? \|this\| isn't const, and ModuleSet is not a set of > > const ModuleInfos. > > std::set always returns elements as "const", as they are considered immutable, > so yes, this is needed. Ah. Grn. The critical piece, then, is that the object not be mutated in a way that would change the ordering in the container, correct? Could you make this clear in the struct's doc comments so that an unwitting future committer doesn't modify the path/pid (comment applies to both containers)? chrisha 2016/12/21 20:14:59 Tried to make this more clear. Show quoted text On 2016/12/20 21:09:52, grt (UTC plus 1) wrote: > On 2016/12/20 19:46:23, chrisha (slow) wrote: > > On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > > > is this cast really needed? \|this\| isn't const, and ModuleSet is not a set > of > > > const ModuleInfos. > > > > std::set always returns elements as "const", as they are considered immutable, > > so yes, this is needed. > > Ah. Grn. The critical piece, then, is that the object not be mutated in a way > that would change the ordering in the container, correct? Could you make this > clear in the struct's doc comments so that an unwitting future committer doesn't > modify the path/pid (comment applies to both containers)? Tried to make this more clear.
	280 }

	281

	282 ModuleDatabase::ProcessInfo* ModuleDatabase::GetProcessInfo(

	283 uint32_t process_id) {

	284 ProcessInfo key(process_id, content::PROCESS_TYPE_UNKNOWN);

	285 auto it = processes_.find(key);

	286 if (it == processes_.end())

	287 return nullptr;

	288 return const_cast<ProcessInfo>(&(it));
	grt (UTC plus 2) 2016/12/20 11:11:34 same comment about cast same comment about cast chrisha 2016/12/20 19:46:23 Ditto. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > same comment about cast Ditto.
	289 }

	290

	291 const ModuleDatabase::ProcessInfo* ModuleDatabase::CreateProcessInfo(
	grt (UTC plus 2) 2016/12/20 11:11:34 the only caller DCHECKS the result. how about doin the only caller DCHECKS the result. how about doing that in here and making this a void func? chrisha 2016/12/20 19:46:22 Actually, that DCHECK is meant to be elided. Made Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > the only caller DCHECKS the result. how about doing that in here and making this > a void func? Actually, that DCHECK is meant to be elided. Made this void and removed it altogether.
	292 uint32_t process_id,

	293 content::ProcessType process_type) {

	294 ProcessInfo key(process_id, process_type);

	295 auto result = processes_.insert(key);

	296

	297 // If the element was inserted then return it.

	298 if (result.second)

	299 return &(*result.first);

	300 // Otherwise it already existed, so return nullptr.

	301 return nullptr;

	302 }

	303

	304 bool ModuleDatabase::DeleteProcessInfo(uint32_t process_id) {
	grt (UTC plus 2) 2016/12/20 11:11:34 similar comment: why not DCHECK in here? similar comment: why not DCHECK in here? chrisha 2016/12/20 19:46:23 Made void, removed the unneeded DCHECK. Show quoted text On 2016/12/20 11:11:34, grt (UTC plus 1) wrote: > similar comment: why not DCHECK in here? Made void, removed the unneeded DCHECK.
	305 ProcessInfo key(process_id, content::PROCESS_TYPE_UNKNOWN);

	306 auto it = processes_.find(key);

	307 if (it == processes_.end())

	308 return false;

	309 processes_.erase(it);

	310 return true;

	311 }

	312

	313 // ModuleDatabase::ModuleInfo --------------------------------------------------

	314

	315 ModuleDatabase::ModuleInfo::ModuleInfo(const base::FilePath& module_path,

	316 uint32_t module_id)

	317 : module_path(module_path), module_id(module_id), process_types(0) {}

	318

	319 bool ModuleDatabase::ModuleInfo::operator<(const ModuleInfo& mi) const {

	320 return module_path < mi.module_path;

	321 }

	322

	323 // ModuleDatabase::ProcessInfo -------------------------------------------------

	324

	325 ModuleDatabase::ProcessInfo::ProcessInfo(uint32_t process_id,

	326 content::ProcessType process_type)

	327 : process_id(process_id), process_type(process_type) {}

	328

	329 bool ModuleDatabase::ProcessInfo::operator<(const ProcessInfo& pi) const {

	330 return process_id < pi.process_id;

	331 }

OLD	NEW