Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(60)

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Simplified merge logic. +Nit: Fix a failing test Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/base64.h" 5 #include "base/base64.h"
6 #include "base/bind.h" 6 #include "base/bind.h"
7 #include "base/files/file_util.h" 7 #include "base/files/file_util.h"
8 #include "base/metrics/histogram_macros.h" 8 #include "base/metrics/histogram_macros.h"
9 #include "base/metrics/sparse_histogram.h" 9 #include "base/metrics/sparse_histogram.h"
10 #include "base/strings/stringprintf.h" 10 #include "base/strings/stringprintf.h"
11 #include "components/safe_browsing_db/v4_store.h" 11 #include "components/safe_browsing_db/v4_store.h"
12 #include "components/safe_browsing_db/v4_store.pb.h" 12 #include "components/safe_browsing_db/v4_store.pb.h"
13 13
14 namespace safe_browsing { 14 namespace safe_browsing {
15 15
16 namespace { 16 namespace {
17 const uint32_t kFileMagic = 0x600D71FE; 17 const uint32_t kFileMagic = 0x600D71FE;
18 18
19 const uint32_t kFileVersion = 9; 19 const uint32_t kFileVersion = 9;
20 20
21 // The minimum expected size of a hash-prefix.
22 const uint32_t kMinHashPrefixLength = 4;
23
24 // The maximum expected size of a hash-prefix. This represents a full SHA256
25 // hash.
26 const uint32_t kMaxHashPrefixLength = 32;
27
21 void RecordStoreReadResult(StoreReadResult result) { 28 void RecordStoreReadResult(StoreReadResult result) {
22 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result, 29 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,
23 STORE_READ_RESULT_MAX); 30 STORE_READ_RESULT_MAX);
24 } 31 }
25 32
26 void RecordStoreWriteResult(StoreWriteResult result) { 33 void RecordStoreWriteResult(StoreWriteResult result) {
27 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result, 34 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreWriteResult", result,
28 STORE_WRITE_RESULT_MAX); 35 STORE_WRITE_RESULT_MAX);
29 } 36 }
30 37
38 void RecordMergeResult(MergeResult result) {
39 UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeResult", result,
40 MERGE_RESULT_MAX);
41 }
42
31 // Returns the name of the temporary file used to buffer data for 43 // Returns the name of the temporary file used to buffer data for
32 // |filename|. Exported for unit tests. 44 // |filename|. Exported for unit tests.
33 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) { 45 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {
34 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); 46 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));
35 } 47 }
36 48
37 } // namespace 49 } // namespace
38 50
39 std::ostream& operator<<(std::ostream& os, const V4Store& store) { 51 std::ostream& operator<<(std::ostream& os, const V4Store& store) {
40 os << store.DebugString(); 52 os << store.DebugString();
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
76 state_ = ""; 88 state_ = "";
77 return true; 89 return true;
78 } 90 }
79 91
80 void V4Store::ApplyUpdate( 92 void V4Store::ApplyUpdate(
81 std::unique_ptr<ListUpdateResponse> response, 93 std::unique_ptr<ListUpdateResponse> response,
82 const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner, 94 const scoped_refptr<base::SingleThreadTaskRunner>& callback_task_runner,
83 UpdatedStoreReadyCallback callback) { 95 UpdatedStoreReadyCallback callback) {
84 std::unique_ptr<V4Store> new_store( 96 std::unique_ptr<V4Store> new_store(
85 new V4Store(this->task_runner_, this->store_path_)); 97 new V4Store(this->task_runner_, this->store_path_));
86
87 new_store->state_ = response->new_client_state(); 98 new_store->state_ = response->new_client_state();
88 // TODO(vakh): 99 // TODO(vakh):
89 // 1. Merge the old store and the new update in new_store. 100 // 1. Merge the old store and the new update in new_store.
90 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and 101 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and
91 // response_type as FULL_UPDATE, and write that to disk. 102 // response_type as FULL_UPDATE, and write that to disk.
92 // 3. Remove this if condition after completing 1. and 2. 103 // 3. Remove this if condition after completing 1. and 2.
93 if (response->has_response_type() && 104 if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {
94 response->response_type() == ListUpdateResponse::FULL_UPDATE) { 105 for (const auto& removal : response->removals()) {
106 // TODO(vakh): Allow other compression types.
107 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567
108 DCHECK_EQ(RAW, removal.compression_type());
109 }
110
111 HashPrefixMap additions_map;
112 UpdateHashPrefixMapFromAdditions(response->additions(), &additions_map);
113
114 new_store->MergeUpdate(hash_prefix_map_, additions_map);
115
116 // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.
117 } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {
95 StoreWriteResult result = new_store->WriteToDisk(std::move(response)); 118 StoreWriteResult result = new_store->WriteToDisk(std::move(response));
96 RecordStoreWriteResult(result); 119 RecordStoreWriteResult(result);
120 } else {
121 NOTREACHED() << "Unexpected response type: " << response->response_type();
97 } 122 }
98 123
99 // new_store is done updating, pass it to the callback. 124 // new_store is done updating, pass it to the callback.
100 callback_task_runner->PostTask( 125 callback_task_runner->PostTask(
101 FROM_HERE, base::Bind(callback, base::Passed(&new_store))); 126 FROM_HERE, base::Bind(callback, base::Passed(&new_store)));
102 } 127 }
103 128
129 // static
130 void V4Store::UpdateHashPrefixMapFromAdditions(
131 const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions,
132 HashPrefixMap* additions_map) {
133 for (const auto& addition : additions) {
134 // TODO(vakh): Allow other compression types.
135 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567
136 DCHECK_EQ(RAW, addition.compression_type());
137
138 DCHECK(addition.has_raw_hashes());
139 DCHECK(addition.raw_hashes().has_raw_hashes());
140
141 PrefixSize prefix_size = addition.raw_hashes().prefix_size();
142 RecordMergeResult(AddUnlumpedHashes(
143 prefix_size, addition.raw_hashes().raw_hashes(), additions_map));
144 }
145 }
146
147 // static
148 MergeResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,
149 const std::string& lumped_hashes,
150 HashPrefixMap* additions_map) {
151 if (prefix_size < kMinHashPrefixLength) {
152 NOTREACHED();
153 return PREFIX_SIZE_TOO_SMALL_FAILURE;
154 } else if (prefix_size > kMaxHashPrefixLength) {
155 NOTREACHED();
156 return PREFIX_SIZE_TOO_LARGE_FAILURE;
157 } else {
158 if (lumped_hashes.size() % prefix_size != 0) {
159 return ADDITIONS_SIZE_UNEXPECTED_FAILURE;
160 } else {
161 (*additions_map)[prefix_size] = lumped_hashes;
162 }
163 }
164 return MERGE_SUCCESS;
165 }
166
167 // static
168 bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,
169 const CounterMap& counter_map,
170 PrefixSize* smallest_prefix_size) {
171 HashPrefix smallest_prefix;
172 for (const auto& counter_pair : counter_map) {
173 PrefixSize prefix_size = counter_pair.first;
174 size_t index = counter_pair.second;
175 size_t sized_index = prefix_size * index;
176
177 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
178 if (sized_index < hash_prefixes.size()) {
179 HashPrefix this_prefix = hash_prefixes.substr(sized_index, prefix_size);
180 if (smallest_prefix.empty() || smallest_prefix > this_prefix) {
181 smallest_prefix = this_prefix;
182 *smallest_prefix_size = prefix_size;
183 }
184 }
185 }
186 return !smallest_prefix.empty();
187 }
188
189 // static
190 void V4Store::InitializeCounterMap(const HashPrefixMap& hash_prefix_map,
191 CounterMap* counter_map) {
192 for (const auto& map_pair : hash_prefix_map) {
193 (*counter_map)[map_pair.first] = 0;
194 }
195 }
196
197 // static
198 HashPrefix V4Store::GetNextUnmergedPrefixForSize(
199 PrefixSize prefix_size,
200 const HashPrefixMap& hash_prefix_map,
201 const CounterMap& counter_map) {
202 HashPrefixes hash_prefixes = hash_prefix_map.at(prefix_size);
203 size_t index_within_list = counter_map.at(prefix_size);
204 size_t sized_index = prefix_size * index_within_list;
205 return hash_prefixes.substr(sized_index, prefix_size);
206 }
207
208 // static
209 void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,
210 HashPrefixMap* prefix_map_to_update) {
211 for (const auto& pair : other_prefixes_map) {
212 PrefixSize prefix_size = pair.first;
213 size_t prefix_length_to_add = pair.second.length();
214
215 HashPrefixes existing_prefixes = (*prefix_map_to_update)[prefix_size];
216 size_t existing_capacity = existing_prefixes.capacity();
217
218 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +
219 prefix_length_to_add);
220 }
221 }
222
223 void V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,
Nathan Parker 2016/07/12 20:35:06 Nice, this function is easier to read!
vakh (use Gerrit instead) 2016/07/12 21:57:03 Acknowledged.
224 const HashPrefixMap& additions_map) {
225 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);
226 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);
227
228 PrefixSize next_size_for_old;
229 CounterMap old_counter_map;
230 InitializeCounterMap(old_prefixes_map, &old_counter_map);
231 bool old_has_unmerged = GetNextSmallestPrefixSize(
232 old_prefixes_map, old_counter_map, &next_size_for_old);
233
234 PrefixSize next_size_for_additions;
235 CounterMap additions_counter_map;
236 InitializeCounterMap(additions_map, &additions_counter_map);
237 bool additions_has_unmerged = GetNextSmallestPrefixSize(
238 additions_map, additions_counter_map, &next_size_for_additions);
239
240 // Classical merge sort.
241 // The two constructs to merge are maps: old_prefixes_map, additions_map.
242
243 // At least one of the maps still has elements that need to be merged into the
244 // new store.
245 while (old_has_unmerged || additions_has_unmerged) {
246 HashPrefix next_smallest_prefix_old, next_smallest_prefix_additions;
Nathan Parker 2016/07/12 20:35:06 I _think_ it's more efficient if you define these
vakh (use Gerrit instead) 2016/07/12 21:57:03 Done
247 // Both maps still have elements that need to be merged into the new store.
248 if (old_has_unmerged) {
249 // Get the lexographically smallest hash prefix from the old store.
250 next_smallest_prefix_old = GetNextUnmergedPrefixForSize(
251 next_size_for_old, old_prefixes_map, old_counter_map);
252 }
253
254 if (additions_has_unmerged) {
255 // Get the lexographically smallest hash prefix from the additions in the
256 // latest update from the server.
257 next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(
258 next_size_for_additions, additions_map, additions_counter_map);
259 }
260
261 bool pick_from_old =
262 old_has_unmerged &&
263 (!additions_has_unmerged ||
264 (next_smallest_prefix_old < next_smallest_prefix_additions));
265
266 if (pick_from_old) {
267 hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old;
268
269 // Update the counter map, which means that we have merged one hash
270 // prefix of size |next_size_for_old| from the old store.
271 old_counter_map[next_size_for_old]++;
272
273 // Find the next smallest unmerged element in the old store's map.
274 old_has_unmerged = GetNextSmallestPrefixSize(
275 old_prefixes_map, old_counter_map, &next_size_for_old);
276 } else {
277 // This means that the smallest unmerged hash prefix in the update was
278 // smaller than that in the old store, so add that hash prefix (from the
279 // update) into the new store.
280 hash_prefix_map_[next_size_for_additions] +=
281 next_smallest_prefix_additions;
282
283 // Update the counter map, which means that we have merged one hash
284 // prefix of size |next_size_for_additions| from the update.
285 additions_counter_map[next_size_for_additions]++;
286
287 // Find the next smallest unmerged element in the additions map.
288 additions_has_unmerged = GetNextSmallestPrefixSize(
289 additions_map, additions_counter_map, &next_size_for_additions);
290 }
291 }
292 }
293
104 StoreReadResult V4Store::ReadFromDisk() { 294 StoreReadResult V4Store::ReadFromDisk() {
105 DCHECK(task_runner_->RunsTasksOnCurrentThread()); 295 DCHECK(task_runner_->RunsTasksOnCurrentThread());
106 296
107 std::string contents; 297 std::string contents;
108 bool read_success = base::ReadFileToString(store_path_, &contents); 298 bool read_success = base::ReadFileToString(store_path_, &contents);
109 if (!read_success) { 299 if (!read_success) {
110 return FILE_UNREADABLE_FAILURE; 300 return FILE_UNREADABLE_FAILURE;
111 } 301 }
112 302
113 if (contents.empty()) { 303 if (contents.empty()) {
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
174 364
175 if (!base::Move(new_filename, store_path_)) { 365 if (!base::Move(new_filename, store_path_)) {
176 DVLOG(1) << "store_path_: " << store_path_.value(); 366 DVLOG(1) << "store_path_: " << store_path_.value();
177 return UNABLE_TO_RENAME_FAILURE; 367 return UNABLE_TO_RENAME_FAILURE;
178 } 368 }
179 369
180 return WRITE_SUCCESS; 370 return WRITE_SUCCESS;
181 } 371 }
182 372
183 } // namespace safe_browsing 373 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698