Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(133)

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Minor changes to the test inputs Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/base64.h" 5 #include "base/base64.h"
6 #include "base/bind.h" 6 #include "base/bind.h"
7 #include "base/files/file_util.h" 7 #include "base/files/file_util.h"
8 #include "base/metrics/histogram_macros.h" 8 #include "base/metrics/histogram_macros.h"
9 #include "base/metrics/sparse_histogram.h" 9 #include "base/metrics/sparse_histogram.h"
10 #include "base/strings/stringprintf.h" 10 #include "base/strings/stringprintf.h"
11 #include "components/safe_browsing_db/v4_store.h" 11 #include "components/safe_browsing_db/v4_store.h"
12 #include "components/safe_browsing_db/v4_store.pb.h" 12 #include "components/safe_browsing_db/v4_store.pb.h"
13 13
14 namespace safe_browsing { 14 namespace safe_browsing {
15 15
16 namespace { 16 namespace {
17 const uint32_t kFileMagic = 0x600D71FE; 17 const uint32_t kFileMagic = 0x600D71FE;
18 18
19 const uint32_t kFileVersion = 9; 19 const uint32_t kFileVersion = 9;
20 20
21 // The minimum expected size of a hash-prefix.
22 const uint32_t kMinHashPrefixLength = 4;
Nathan Parker 2016/07/11 18:09:58 Nit: Would it be easier to read if the code didn't
vakh (use Gerrit instead) 2016/07/12 07:34:19 Yes, it would be slightly more compact but at the
23
24 // The maximum expected size of a hash-prefix. This represents a full SHA256
25 // hash.
26 const uint32_t kMaxHashPrefixLength = 32;
27
21 void RecordStoreReadResult(StoreReadResult result) { 28 void RecordStoreReadResult(StoreReadResult result) {
22 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result, 29 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,
23 STORE_READ_RESULT_MAX); 30 STORE_READ_RESULT_MAX);
24 } 31 }
25 32
26 void RecordStoreWriteResult(StoreWriteResult result) { 33 void RecordStoreWriteResult(StoreWriteResult result) {
27 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result, 34 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result,
28 STORE_WRITE_RESULT_MAX); 35 STORE_WRITE_RESULT_MAX);
29 } 36 }
30 37
38 void RecordMergeUpdateResult(MergeUpdateResult result) {
39 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeUpdateResult", result,
40 MERGE_UPDATE_RESULT_MAX);
41 }
42
31 // Returns the name of the temporary file used to buffer data for 43 // Returns the name of the temporary file used to buffer data for
32 // |filename|. Exported for unit tests. 44 // |filename|. Exported for unit tests.
33 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) { 45 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {
34 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); 46 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));
35 } 47 }
36 48
49 // Returns true if that |first| string is lexicographically smaller than
50 // |second|.
51 bool CompareHashPrefixes(const char* first,
52 const PrefixSize first_size,
53 const char* second,
54 const PrefixSize second_size) {
55 return std::lexicographical_compare(first, first + first_size, second,
56 second + second_size);
57 }
58
37 } // namespace 59 } // namespace
38 60
39 std::ostream& operator<<(std::ostream& os, const V4Store& store) { 61 std::ostream& operator<<(std::ostream& os, const V4Store& store) {
40 os << store.DebugString(); 62 os << store.DebugString();
41 return os; 63 return os;
42 } 64 }
43 65
44 V4Store* V4StoreFactory::CreateV4Store( 66 V4Store* V4StoreFactory::CreateV4Store(
45 const scoped_refptr<base::SequencedTaskRunner>& task_runner, 67 const scoped_refptr<base::SequencedTaskRunner>& task_runner,
46 const base::FilePath& store_path) { 68 const base::FilePath& store_path) {
(...skipping 29 matching lines...) Expand all
76 state_ = ""; 98 state_ = "";
77 return true; 99 return true;
78 } 100 }
79 101
80 void V4Store::ApplyUpdate( 102 void V4Store::ApplyUpdate(
81 std::unique_ptr<ListUpdateResponse> response, 103 std::unique_ptr<ListUpdateResponse> response,
82 const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner, 104 const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner,
83 UpdatedStoreReadyCallback callback) { 105 UpdatedStoreReadyCallback callback) {
84 std::unique_ptr<V4Store> new_store( 106 std::unique_ptr<V4Store> new_store(
85 new V4Store(this->task_runner_, this->store_path_)); 107 new V4Store(this->task_runner_, this->store_path_));
86
87 new_store->state_ = response->new_client_state(); 108 new_store->state_ = response->new_client_state();
88 // TODO(vakh): 109 // TODO(vakh):
89 // 1. Merge the old store and the new update in new_store. 110 // 1. Merge the old store and the new update in new_store.
90 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and 111 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and
91 // response_type as FULL_UPDATE, and write that to disk. 112 // response_type as FULL_UPDATE, and write that to disk.
92 // 3. Remove this if condition after completing 1. and 2. 113 // 3. Remove this if condition after completing 1. and 2.
93 if (response->has_response_type() && 114 if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {
94 response->response_type() == ListUpdateResponse::FULL_UPDATE) { 115 for (const auto& removal : response->removals()) {
116 // TODO(vakh): Allow other compression types.
117 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567
118 DCHECK_EQ(RAW, removal.compression_type());
119 }
120
121 HashPrefixMap additions_map =
122 GetHashPrefixMapFromAdditions(response->additions());
123
124 new_store->MergeUpdate(hash_prefix_map_, additions_map);
125
126 // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.
127 } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {
95 StoreWriteResult result = new_store->WriteToDisk(std::move(response)); 128 StoreWriteResult result = new_store->WriteToDisk(std::move(response));
96 RecordStoreWriteResult(result); 129 RecordStoreWriteResult(result);
97 } 130 }
Nathan Parker 2016/07/11 18:09:58 todo: Do something reasonable if response_type is
vakh (use Gerrit instead) 2016/07/12 07:34:18 Done.
98 131
99 // new_store is done updating, pass it to the callback. 132 // new_store is done updating, pass it to the callback.
100 callback_task_runner->PostTask( 133 callback_task_runner->PostTask(
101 FROM_HERE, base::Bind(callback, base::Passed(&new_store))); 134 FROM_HERE, base::Bind(callback, base::Passed(&new_store)));
102 } 135 }
103 136
137 // static
138 HashPrefixMap V4Store::GetHashPrefixMapFromAdditions(
139 const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions) {
140 HashPrefixMap additions_map;
141 for (const auto& addition : additions) {
142 // TODO(vakh): Allow other compression types.
143 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567
144 DCHECK_EQ(RAW, addition.compression_type());
145
146 DCHECK(addition.has_raw_hashes());
147 DCHECK(addition.raw_hashes().has_raw_hashes());
148
149 PrefixSize prefix_size = addition.raw_hashes().prefix_size();
150 DCHECK(kMinHashPrefixLength <= prefix_size);
Nathan Parker 2016/07/11 18:09:58 todo: skip additions with invalid metadata (i.e. m
vakh (use Gerrit instead) 2016/07/12 07:34:19 Done.
151 DCHECK(kMaxHashPrefixLength >= prefix_size);
152
153 MergeUpdateResult result = AddUnlumpedHashes(
154 prefix_size, addition.raw_hashes().raw_hashes(), &additions_map);
155 if (result != MERGE_SUCCESS) {
156 RecordMergeUpdateResult(result);
Nathan Parker 2016/07/11 18:09:58 Might as well record successes as well, unless tha
vakh (use Gerrit instead) 2016/07/12 07:34:18 Done.
157 }
158 }
159
160 return additions_map;
161 }
162
163 // static
164 MergeUpdateResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,
165 const std::string& lumped_hashes,
166 HashPrefixMap* prefix_map) {
167 if (lumped_hashes.size() % prefix_size != 0) {
168 return ADDITIONS_SIZE_UNEXPECTED_FAILURE;
169 }
170
171 for (std::string::const_iterator iter = lumped_hashes.begin();
172 iter != lumped_hashes.end(); iter += prefix_size) {
173 HashPrefix prefix(new char[prefix_size + 1]);
174 memcpy(prefix.get(), std::string(iter, iter + prefix_size).data(),
175 prefix_size + 1);
176 (*prefix_map)[prefix_size].push_back(std::move(prefix));
177 }
178
179 return MERGE_SUCCESS;
180 }
181
182 // static
183 bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,
184 const CounterMap& counter_map,
185 PrefixSize* smallest_prefix_size) {
186 bool found = false;
187 char* smallest_prefix = nullptr;
188
189 for (const auto& counter_pair : counter_map) {
190 PrefixSize prefix_size = counter_pair.first;
191 size_t index = counter_pair.second;
192
193 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
194 if (index < hash_prefixes.size()) {
195 const HashPrefix& this_prefix = hash_prefixes[index];
196 if (!found ||
Nathan Parker 2016/07/11 18:09:58 Replace !found with !smallest_prefix, and then you
vakh (use Gerrit instead) 2016/07/12 07:34:18 Done.
197 !CompareHashPrefixes(smallest_prefix, *smallest_prefix_size,
198 this_prefix.get(), prefix_size)) {
199 found = true;
200 smallest_prefix = this_prefix.get();
201 *smallest_prefix_size = prefix_size;
202 }
203 }
204 }
205 return found;
206 }
207
208 // static
209 CounterMap V4Store::GetInitializedCounterMap(
210 const HashPrefixMap& hash_prefix_map) {
211 CounterMap counter_map;
212 for (const auto& map_pair : hash_prefix_map) {
213 counter_map[map_pair.first] = 0;
214 }
215 return counter_map;
216 }
217
218 // static
219 HashPrefix& V4Store::GetNextUnmergedPrefixForSize(
220 PrefixSize prefix_size,
221 HashPrefixMap& hash_prefix_map,
222 const CounterMap& counter_map) {
223 HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
Nathan Parker 2016/07/11 18:09:58 nit (Feel free to ignore): Might be a bit more rea
vakh (use Gerrit instead) 2016/07/12 07:34:18 This code has changed so NA.
224 size_t index_within_list = counter_map.at(prefix_size);
225 HashPrefix& next_unmerged_prefix = hash_prefixes[index_within_list];
226 return next_unmerged_prefix;
227 }
228
229 void V4Store::MergeUpdate(HashPrefixMap& old_prefixes_map,
230 HashPrefixMap& additions_map) {
231 PrefixSize next_size_for_old;
232 CounterMap old_counter_map = GetInitializedCounterMap(old_prefixes_map);
233 bool found_in_old = GetNextSmallestPrefixSize(
234 old_prefixes_map, old_counter_map, &next_size_for_old);
235
236 PrefixSize next_size_for_additions;
237 CounterMap additions_counter_map = GetInitializedCounterMap(additions_map);
238 bool found_in_additions = GetNextSmallestPrefixSize(
239 additions_map, additions_counter_map, &next_size_for_additions);
240
241 // Classical merge sort.
242 // The two constructs to merge are maps: old_prefixes_map, additions_map.
243
244 // At least one of the maps still has elements that need to be merged into the
245 // new store.
246 while (found_in_old || found_in_additions) {
247 // Both maps still have elements that need to be merged into the new store.
248 if (found_in_old && found_in_additions) {
Nathan Parker 2016/07/11 18:09:58 To remove repeated code here, could you do if (fo
vakh (use Gerrit instead) 2016/07/12 21:57:02 Done.
249 // Get the lexographically smallest hash prefix from the old store.
250 HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize(
251 next_size_for_old, old_prefixes_map, old_counter_map);
252 // Get the lexographically smallest hash prefix from the additions in the
253 // latest update from the server.
254 HashPrefix& next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(
255 next_size_for_additions, additions_map, additions_counter_map);
256
257 // If the smallest unmerged hash prefix in old store is smaller than that
258 // in the update, add that to the new store.
259 if (CompareHashPrefixes(next_smallest_prefix_old.get(), next_size_for_old,
260 next_smallest_prefix_additions.get(),
261 next_size_for_additions)) {
262 hash_prefix_map_[next_size_for_old].push_back(
263 std::move(next_smallest_prefix_old));
264
265 // Update the counter map, which means that we have merged one hash
266 // prefix of size |next_size_for_old| from the old store.
267 old_counter_map[next_size_for_old]++;
268
269 // Find the next smallest unmerged element in the old store's map.
270 found_in_old = GetNextSmallestPrefixSize(
271 old_prefixes_map, old_counter_map, &next_size_for_old);
272 } else {
273 // This means that the smallest unmerged hash prefix in the update was
274 // smaller than that in the old store, so add that hash prefix (from the
275 // update) into the new store.
276 hash_prefix_map_[next_size_for_additions].push_back(
277 std::move(next_smallest_prefix_additions));
278
279 // Update the counter map, which means that we have merged one hash
280 // prefix of size |next_size_for_additions| from the update.
281 additions_counter_map[next_size_for_additions]++;
282
283 // Find the next smallest unmerged element in the additions map.
284 found_in_additions = GetNextSmallestPrefixSize(
285 additions_map, additions_counter_map, &next_size_for_additions);
286 }
287 } else {
288 // At least one of the maps has become empty.
289 if (!found_in_old) {
290 // The map for the old store has been completely merged. Now only merge
291 // the hash prefixes from the additions map.
292 HashPrefix& next_smallest_prefix_additions =
293 GetNextUnmergedPrefixForSize(next_size_for_additions, additions_map,
294 additions_counter_map);
295 hash_prefix_map_[next_size_for_additions].push_back(
296 std::move(next_smallest_prefix_additions));
297 additions_counter_map[next_size_for_additions]++;
298 found_in_additions = GetNextSmallestPrefixSize(
299 additions_map, additions_counter_map, &next_size_for_additions);
300 } else {
301 // The additions map has been completely merged. Now only merge
302 // the hash prefixes from the old store map.
303 HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize(
304 next_size_for_old, old_prefixes_map, old_counter_map);
305 hash_prefix_map_[next_size_for_old].push_back(
306 std::move(next_smallest_prefix_old));
307 old_counter_map[next_size_for_old]++;
308 found_in_old = GetNextSmallestPrefixSize(
309 old_prefixes_map, old_counter_map, &next_size_for_old);
310 }
311 }
312 }
313 }
314
104 StoreReadResult V4Store::ReadFromDisk() { 315 StoreReadResult V4Store::ReadFromDisk() {
105 DCHECK(task_runner_->RunsTasksOnCurrentThread()); 316 DCHECK(task_runner_->RunsTasksOnCurrentThread());
106 317
107 std::string contents; 318 std::string contents;
108 bool read_success = base::ReadFileToString(store_path_, &contents); 319 bool read_success = base::ReadFileToString(store_path_, &contents);
109 if (!read_success) { 320 if (!read_success) {
110 return FILE_UNREADABLE_FAILURE; 321 return FILE_UNREADABLE_FAILURE;
111 } 322 }
112 323
113 if (contents.empty()) { 324 if (contents.empty()) {
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
174 385
175 if (!base::Move(new_filename, store_path_)) { 386 if (!base::Move(new_filename, store_path_)) {
176 DVLOG(1) << "store_path_: " << store_path_.value(); 387 DVLOG(1) << "store_path_: " << store_path_.value();
177 return UNABLE_TO_RENAME_FAILURE; 388 return UNABLE_TO_RENAME_FAILURE;
178 } 389 }
179 390
180 return WRITE_SUCCESS; 391 return WRITE_SUCCESS;
181 } 392 }
182 393
183 } // namespace safe_browsing 394 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698