components/safe_browsing_db/v4_store.cc - Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Minor changes to the test inputs Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« components/safe_browsing_db/v4_store.h ('K') | « components/safe_browsing_db/v4_store.h ('k') | components/safe_browsing_db/v4_store_unittest.cc » ('j') | components/safe_browsing_db/v4_store_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/base64.h"	5 #include "base/base64.h"

6 #include "base/bind.h"	6 #include "base/bind.h"

7 #include "base/files/file_util.h"	7 #include "base/files/file_util.h"

8 #include "base/metrics/histogram_macros.h"	8 #include "base/metrics/histogram_macros.h"

9 #include "base/metrics/sparse_histogram.h"	9 #include "base/metrics/sparse_histogram.h"

10 #include "base/strings/stringprintf.h"	10 #include "base/strings/stringprintf.h"

11 #include "components/safe_browsing_db/v4_store.h"	11 #include "components/safe_browsing_db/v4_store.h"

12 #include "components/safe_browsing_db/v4_store.pb.h"	12 #include "components/safe_browsing_db/v4_store.pb.h"

13	13

14 namespace safe_browsing {	14 namespace safe_browsing {

15	15

16 namespace {	16 namespace {

17 const uint32_t kFileMagic = 0x600D71FE;	17 const uint32_t kFileMagic = 0x600D71FE;

18	18

19 const uint32_t kFileVersion = 9;	19 const uint32_t kFileVersion = 9;

20	20

	21 // The minimum expected size of a hash-prefix.

	22 const uint32_t kMinHashPrefixLength = 4;
	Nathan Parker 2016/07/11 18:09:58 Nit: Would it be easier to read if the code didn't Nit: Would it be easier to read if the code didn't use the word prefix, and just called things "hashes" (of different length)? vakh (use Gerrit instead) 2016/07/12 07:34:19 Yes, it would be slightly more compact but at the Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > Nit: Would it be easier to read if the code didn't use the word prefix, and just > called things "hashes" (of different length)? Yes, it would be slightly more compact but at the risk of being misleading since these are not hashes (which conveys full hashes). I feel that it is better to be more explicit with this. In fact, I think it might be better to avoid just "hashes" altogether to make it clear whether we mean hash prefixes or full hashes.
	23

	24 // The maximum expected size of a hash-prefix. This represents a full SHA256

	25 // hash.

	26 const uint32_t kMaxHashPrefixLength = 32;

	27

21 void RecordStoreReadResult(StoreReadResult result) {	28 void RecordStoreReadResult(StoreReadResult result) {

22 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,	29 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,

23 STORE_READ_RESULT_MAX);	30 STORE_READ_RESULT_MAX);

24 }	31 }

25	32

26 void RecordStoreWriteResult(StoreWriteResult result) {	33 void RecordStoreWriteResult(StoreWriteResult result) {

27 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result,	34 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result,

28 STORE_WRITE_RESULT_MAX);	35 STORE_WRITE_RESULT_MAX);

29 }	36 }

30	37

	38 void RecordMergeUpdateResult(MergeUpdateResult result) {

	39 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeUpdateResult", result,

	40 MERGE_UPDATE_RESULT_MAX);

	41 }

	42

31 // Returns the name of the temporary file used to buffer data for	43 // Returns the name of the temporary file used to buffer data for

32 // \|filename\|. Exported for unit tests.	44 // \|filename\|. Exported for unit tests.

33 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {	45 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {

34 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));	46 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));

35 }	47 }

36	48

	49 // Returns true if that \|first\| string is lexicographically smaller than

	50 // \|second\|.

	51 bool CompareHashPrefixes(const char* first,

	52 const PrefixSize first_size,

	53 const char* second,

	54 const PrefixSize second_size) {

	55 return std::lexicographical_compare(first, first + first_size, second,

	56 second + second_size);

	57 }

	58

37 } // namespace	59 } // namespace

38	60

39 std::ostream& operator<<(std::ostream& os, const V4Store& store) {	61 std::ostream& operator<<(std::ostream& os, const V4Store& store) {

40 os << store.DebugString();	62 os << store.DebugString();

41 return os;	63 return os;

42 }	64 }

43	65

44 V4Store* V4StoreFactory::CreateV4Store(	66 V4Store* V4StoreFactory::CreateV4Store(

45 const scoped_refptr<base::SequencedTaskRunner>& task_runner,	67 const scoped_refptr<base::SequencedTaskRunner>& task_runner,

46 const base::FilePath& store_path) {	68 const base::FilePath& store_path) {

(...skipping 29 matching lines...) Expand all Loading...
76 state_ = "";	98 state_ = "";

77 return true;	99 return true;

78 }	100 }

79	101

80 void V4Store::ApplyUpdate(	102 void V4Store::ApplyUpdate(

81 std::unique_ptr<ListUpdateResponse> response,	103 std::unique_ptr<ListUpdateResponse> response,

82 const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner,	104 const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner,

83 UpdatedStoreReadyCallback callback) {	105 UpdatedStoreReadyCallback callback) {

84 std::unique_ptr<V4Store> new_store(	106 std::unique_ptr<V4Store> new_store(

85 new V4Store(this->task_runner_, this->store_path_));	107 new V4Store(this->task_runner_, this->store_path_));

86

87 new_store->state_ = response->new_client_state();	108 new_store->state_ = response->new_client_state();

88 // TODO(vakh):	109 // TODO(vakh):

89 // 1. Merge the old store and the new update in new_store.	110 // 1. Merge the old store and the new update in new_store.

90 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and	111 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and

91 // response_type as FULL_UPDATE, and write that to disk.	112 // response_type as FULL_UPDATE, and write that to disk.

92 // 3. Remove this if condition after completing 1. and 2.	113 // 3. Remove this if condition after completing 1. and 2.

93 if (response->has_response_type() &&	114 if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {

94 response->response_type() == ListUpdateResponse::FULL_UPDATE) {	115 for (const auto& removal : response->removals()) {

	116 // TODO(vakh): Allow other compression types.

	117 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

	118 DCHECK_EQ(RAW, removal.compression_type());

	119 }

	120

	121 HashPrefixMap additions_map =

	122 GetHashPrefixMapFromAdditions(response->additions());

	123

	124 new_store->MergeUpdate(hash_prefix_map_, additions_map);

	125

	126 // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.

	127 } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {

95 StoreWriteResult result = new_store->WriteToDisk(std::move(response));	128 StoreWriteResult result = new_store->WriteToDisk(std::move(response));

96 RecordStoreWriteResult(result);	129 RecordStoreWriteResult(result);

97 }	130 }
	Nathan Parker 2016/07/11 18:09:58 todo: Do something reasonable if response_type is todo: Do something reasonable if response_type is something else. vakh (use Gerrit instead) 2016/07/12 07:34:18 Done. Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > todo: Do something reasonable if response_type is something else. Done.
98	131

99 // new_store is done updating, pass it to the callback.	132 // new_store is done updating, pass it to the callback.

100 callback_task_runner->PostTask(	133 callback_task_runner->PostTask(

101 FROM_HERE, base::Bind(callback, base::Passed(&new_store)));	134 FROM_HERE, base::Bind(callback, base::Passed(&new_store)));

102 }	135 }

103	136

	137 // static

	138 HashPrefixMap V4Store::GetHashPrefixMapFromAdditions(

	139 const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions) {

	140 HashPrefixMap additions_map;

	141 for (const auto& addition : additions) {

	142 // TODO(vakh): Allow other compression types.

	143 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

	144 DCHECK_EQ(RAW, addition.compression_type());

	145

	146 DCHECK(addition.has_raw_hashes());

	147 DCHECK(addition.raw_hashes().has_raw_hashes());

	148

	149 PrefixSize prefix_size = addition.raw_hashes().prefix_size();

	150 DCHECK(kMinHashPrefixLength <= prefix_size);
	Nathan Parker 2016/07/11 18:09:58 todo: skip additions with invalid metadata (i.e. m todo: skip additions with invalid metadata (i.e. more than just dcheck). vakh (use Gerrit instead) 2016/07/12 07:34:19 Done. Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > todo: skip additions with invalid metadata (i.e. more than just dcheck). Done.
	151 DCHECK(kMaxHashPrefixLength >= prefix_size);

	152

	153 MergeUpdateResult result = AddUnlumpedHashes(

	154 prefix_size, addition.raw_hashes().raw_hashes(), &additions_map);

	155 if (result != MERGE_SUCCESS) {

	156 RecordMergeUpdateResult(result);
	Nathan Parker 2016/07/11 18:09:58 Might as well record successes as well, unless tha Might as well record successes as well, unless that's recorded elsewhere. vakh (use Gerrit instead) 2016/07/12 07:34:18 Done. Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > Might as well record successes as well, unless that's recorded elsewhere. Done.
	157 }

	158 }

	159

	160 return additions_map;

	161 }

	162

	163 // static

	164 MergeUpdateResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,

	165 const std::string& lumped_hashes,

	166 HashPrefixMap* prefix_map) {

	167 if (lumped_hashes.size() % prefix_size != 0) {

	168 return ADDITIONS_SIZE_UNEXPECTED_FAILURE;

	169 }

	170

	171 for (std::string::const_iterator iter = lumped_hashes.begin();

	172 iter != lumped_hashes.end(); iter += prefix_size) {

	173 HashPrefix prefix(new char[prefix_size + 1]);

	174 memcpy(prefix.get(), std::string(iter, iter + prefix_size).data(),

	175 prefix_size + 1);

	176 (*prefix_map)[prefix_size].push_back(std::move(prefix));

	177 }

	178

	179 return MERGE_SUCCESS;

	180 }

	181

	182 // static

	183 bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,

	184 const CounterMap& counter_map,

	185 PrefixSize* smallest_prefix_size) {

	186 bool found = false;

	187 char* smallest_prefix = nullptr;

	188

	189 for (const auto& counter_pair : counter_map) {

	190 PrefixSize prefix_size = counter_pair.first;

	191 size_t index = counter_pair.second;

	192

	193 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);

	194 if (index < hash_prefixes.size()) {

	195 const HashPrefix& this_prefix = hash_prefixes[index];

	196 if (!found \|\|
	Nathan Parker 2016/07/11 18:09:58 Replace !found with !smallest_prefix, and then you Replace !found with !smallest_prefix, and then you don't need found. vakh (use Gerrit instead) 2016/07/12 07:34:18 Done. Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > Replace !found with !smallest_prefix, and then you don't need found. Done.
	197 !CompareHashPrefixes(smallest_prefix, *smallest_prefix_size,

	198 this_prefix.get(), prefix_size)) {

	199 found = true;

	200 smallest_prefix = this_prefix.get();

	201 *smallest_prefix_size = prefix_size;

	202 }

	203 }

	204 }

	205 return found;

	206 }

	207

	208 // static

	209 CounterMap V4Store::GetInitializedCounterMap(

	210 const HashPrefixMap& hash_prefix_map) {

	211 CounterMap counter_map;

	212 for (const auto& map_pair : hash_prefix_map) {

	213 counter_map[map_pair.first] = 0;

	214 }

	215 return counter_map;

	216 }

	217

	218 // static

	219 HashPrefix& V4Store::GetNextUnmergedPrefixForSize(

	220 PrefixSize prefix_size,

	221 HashPrefixMap& hash_prefix_map,

	222 const CounterMap& counter_map) {

	223 HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
	Nathan Parker 2016/07/11 18:09:58 nit (Feel free to ignore): Might be a bit more rea nit (Feel free to ignore): Might be a bit more readable just all stuck together: hash_prefix_map.at(prefix_size).at( counter_map.at(prefix_size)) vakh (use Gerrit instead) 2016/07/12 07:34:18 This code has changed so NA. Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > nit (Feel free to ignore): Might be a bit more readable just all stuck together: > > hash_prefix_map.at(prefix_size).at( > counter_map.at(prefix_size)) This code has changed so NA.
	224 size_t index_within_list = counter_map.at(prefix_size);

	225 HashPrefix& next_unmerged_prefix = hash_prefixes[index_within_list];

	226 return next_unmerged_prefix;

	227 }

	228

	229 void V4Store::MergeUpdate(HashPrefixMap& old_prefixes_map,

	230 HashPrefixMap& additions_map) {

	231 PrefixSize next_size_for_old;

	232 CounterMap old_counter_map = GetInitializedCounterMap(old_prefixes_map);

	233 bool found_in_old = GetNextSmallestPrefixSize(

	234 old_prefixes_map, old_counter_map, &next_size_for_old);

	235

	236 PrefixSize next_size_for_additions;

	237 CounterMap additions_counter_map = GetInitializedCounterMap(additions_map);

	238 bool found_in_additions = GetNextSmallestPrefixSize(

	239 additions_map, additions_counter_map, &next_size_for_additions);

	240

	241 // Classical merge sort.

	242 // The two constructs to merge are maps: old_prefixes_map, additions_map.

	243

	244 // At least one of the maps still has elements that need to be merged into the

	245 // new store.

	246 while (found_in_old \|\| found_in_additions) {

	247 // Both maps still have elements that need to be merged into the new store.

	248 if (found_in_old && found_in_additions) {
	Nathan Parker 2016/07/11 18:09:58 To remove repeated code here, could you do if (fo To remove repeated code here, could you do if (found_in_old) next_smallest_prefix_old = GetNextUnmergedPrefixForSize( next_size_for_old, old_prefixes_map, old_counter_map); if (found_in_additions) next_smallest_prefix_additions = GetNextUnmergedPrefixForSize( next_size_for_additions, additions_map, additions_counter_map); Then you can de-dup the moving/advancing by computing a variable that says which set to pull from: bool take_from_old = found_in_old && found_in_additions && compare(..); if (take_from_old) { //move from old set, increment, get next smallest. } else { //move from additions set, increment, get next smallest. } I might s/found_in_old/old_has_next/, and similar for additions. "Found" sounds like you're searching for something. Or flip it and call it "old_is_empty"? vakh (use Gerrit instead) 2016/07/12 21:57:02 Done. Show quoted text On 2016/07/11 18:09:58, Nathan Parker wrote: > To remove repeated code here, could you do > > if (found_in_old) > next_smallest_prefix_old = GetNextUnmergedPrefixForSize( > next_size_for_old, old_prefixes_map, old_counter_map); > > if (found_in_additions) > next_smallest_prefix_additions = GetNextUnmergedPrefixForSize( > next_size_for_additions, additions_map, additions_counter_map); > > Then you can de-dup the moving/advancing by computing a variable that says which > set to pull from: > > bool take_from_old = found_in_old && found_in_additions && compare(..); > if (take_from_old) { > //move from old set, increment, get next smallest. > } else { > //move from additions set, increment, get next smallest. > } > > I might s/found_in_old/old_has_next/, and similar for additions. "Found" sounds > like you're searching for something. Or flip it and call it "old_is_empty"? > > > Done.
	249 // Get the lexographically smallest hash prefix from the old store.

	250 HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

	251 next_size_for_old, old_prefixes_map, old_counter_map);

	252 // Get the lexographically smallest hash prefix from the additions in the

	253 // latest update from the server.

	254 HashPrefix& next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(

	255 next_size_for_additions, additions_map, additions_counter_map);

	256

	257 // If the smallest unmerged hash prefix in old store is smaller than that

	258 // in the update, add that to the new store.

	259 if (CompareHashPrefixes(next_smallest_prefix_old.get(), next_size_for_old,

	260 next_smallest_prefix_additions.get(),

	261 next_size_for_additions)) {

	262 hash_prefix_map_[next_size_for_old].push_back(

	263 std::move(next_smallest_prefix_old));

	264

	265 // Update the counter map, which means that we have merged one hash

	266 // prefix of size \|next_size_for_old\| from the old store.

	267 old_counter_map[next_size_for_old]++;

	268

	269 // Find the next smallest unmerged element in the old store's map.

	270 found_in_old = GetNextSmallestPrefixSize(

	271 old_prefixes_map, old_counter_map, &next_size_for_old);

	272 } else {

	273 // This means that the smallest unmerged hash prefix in the update was

	274 // smaller than that in the old store, so add that hash prefix (from the

	275 // update) into the new store.

	276 hash_prefix_map_[next_size_for_additions].push_back(

	277 std::move(next_smallest_prefix_additions));

	278

	279 // Update the counter map, which means that we have merged one hash

	280 // prefix of size \|next_size_for_additions\| from the update.

	281 additions_counter_map[next_size_for_additions]++;

	282

	283 // Find the next smallest unmerged element in the additions map.

	284 found_in_additions = GetNextSmallestPrefixSize(

	285 additions_map, additions_counter_map, &next_size_for_additions);

	286 }

	287 } else {

	288 // At least one of the maps has become empty.

	289 if (!found_in_old) {

	290 // The map for the old store has been completely merged. Now only merge

	291 // the hash prefixes from the additions map.

	292 HashPrefix& next_smallest_prefix_additions =

	293 GetNextUnmergedPrefixForSize(next_size_for_additions, additions_map,

	294 additions_counter_map);

	295 hash_prefix_map_[next_size_for_additions].push_back(

	296 std::move(next_smallest_prefix_additions));

	297 additions_counter_map[next_size_for_additions]++;

	298 found_in_additions = GetNextSmallestPrefixSize(

	299 additions_map, additions_counter_map, &next_size_for_additions);

	300 } else {

	301 // The additions map has been completely merged. Now only merge

	302 // the hash prefixes from the old store map.

	303 HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

	304 next_size_for_old, old_prefixes_map, old_counter_map);

	305 hash_prefix_map_[next_size_for_old].push_back(

	306 std::move(next_smallest_prefix_old));

	307 old_counter_map[next_size_for_old]++;

	308 found_in_old = GetNextSmallestPrefixSize(

	309 old_prefixes_map, old_counter_map, &next_size_for_old);

	310 }

	311 }

	312 }

	313 }

	314

104 StoreReadResult V4Store::ReadFromDisk() {	315 StoreReadResult V4Store::ReadFromDisk() {

105 DCHECK(task_runner_->RunsTasksOnCurrentThread());	316 DCHECK(task_runner_->RunsTasksOnCurrentThread());

106	317

107 std::string contents;	318 std::string contents;

108 bool read_success = base::ReadFileToString(store_path_, &contents);	319 bool read_success = base::ReadFileToString(store_path_, &contents);

109 if (!read_success) {	320 if (!read_success) {

110 return FILE_UNREADABLE_FAILURE;	321 return FILE_UNREADABLE_FAILURE;

111 }	322 }

112	323

113 if (contents.empty()) {	324 if (contents.empty()) {

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
174	385

175 if (!base::Move(new_filename, store_path_)) {	386 if (!base::Move(new_filename, store_path_)) {

176 DVLOG(1) << "store_path_: " << store_path_.value();	387 DVLOG(1) << "store_path_: " << store_path_.value();

177 return UNABLE_TO_RENAME_FAILURE;	388 return UNABLE_TO_RENAME_FAILURE;

178 }	389 }

179	390

180 return WRITE_SUCCESS;	391 return WRITE_SUCCESS;

181 }	392 }

182	393

183 } // namespace safe_browsing	394 } // namespace safe_browsing

OLD	NEW