Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(434)

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2145163003: PVer4: Keep track of the smallest hashes not their sizes. Replace counters with iterators. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@01_read_map_from_disk
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/base64.h" 5 #include "base/base64.h"
6 #include "base/bind.h" 6 #include "base/bind.h"
7 #include "base/files/file_util.h" 7 #include "base/files/file_util.h"
8 #include "base/metrics/histogram_macros.h" 8 #include "base/metrics/histogram_macros.h"
9 #include "base/metrics/sparse_histogram.h" 9 #include "base/metrics/sparse_histogram.h"
10 #include "base/strings/stringprintf.h" 10 #include "base/strings/stringprintf.h"
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
185 if (lumped_hashes.size() % prefix_size != 0) { 185 if (lumped_hashes.size() % prefix_size != 0) {
186 return ADDITIONS_SIZE_UNEXPECTED_FAILURE; 186 return ADDITIONS_SIZE_UNEXPECTED_FAILURE;
187 } 187 }
188 // TODO(vakh): Figure out a way to avoid the following copy operation. 188 // TODO(vakh): Figure out a way to avoid the following copy operation.
189 (*additions_map)[prefix_size] = lumped_hashes; 189 (*additions_map)[prefix_size] = lumped_hashes;
190 return APPLY_UPDATE_SUCCESS; 190 return APPLY_UPDATE_SUCCESS;
191 } 191 }
192 192
193 // static 193 // static
194 bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map, 194 bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,
195 const CounterMap& counter_map, 195 const IteratorMap& iterator_map,
196 PrefixSize* smallest_prefix_size) { 196 PrefixSize* smallest_prefix_size) {
197 HashPrefix smallest_prefix, current_prefix; 197 HashPrefix smallest_prefix, current_prefix;
198 for (const auto& counter_pair : counter_map) { 198 for (const auto& iterator_pair : iterator_map) {
199 PrefixSize prefix_size = counter_pair.first; 199 PrefixSize prefix_size = iterator_pair.first;
200 size_t index = counter_pair.second; 200 HashPrefixes::const_iterator start = iterator_pair.second;
201 size_t sized_index = prefix_size * index; 201 HashPrefixes::const_iterator end = start + prefix_size;
202 202
203 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size); 203 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
204 if (sized_index < hash_prefixes.size()) { 204 if (end <= hash_prefixes.end()) {
205 current_prefix = hash_prefixes.substr(sized_index, prefix_size); 205 current_prefix = std::string(start, end);
206 if (smallest_prefix.empty() || smallest_prefix > current_prefix) { 206 if (smallest_prefix.empty() || smallest_prefix > current_prefix) {
207 smallest_prefix = current_prefix; 207 smallest_prefix = current_prefix;
Nathan Parker 2016/07/14 22:09:32 could do smallest_prefix.swap(current_prefix)
vakh (use Gerrit instead) 2016/07/14 23:10:41 Done.
208 *smallest_prefix_size = prefix_size; 208 *smallest_prefix_size = prefix_size;
209 } 209 }
210 } 210 }
211 } 211 }
212 return !smallest_prefix.empty(); 212 return !smallest_prefix.empty();
213 } 213 }
214 214
215 // static 215 // static
216 void V4Store::InitializeCounterMap(const HashPrefixMap& hash_prefix_map, 216 void V4Store::InitializeIteratorMap(const HashPrefixMap& hash_prefix_map,
217 CounterMap* counter_map) { 217 IteratorMap* iterator_map) {
218 for (const auto& map_pair : hash_prefix_map) { 218 for (const auto& map_pair : hash_prefix_map) {
219 (*counter_map)[map_pair.first] = 0; 219 (*iterator_map)[map_pair.first] = map_pair.second.begin();
220 } 220 }
221 } 221 }
222 222
223 // static 223 // static
224 HashPrefix V4Store::GetNextUnmergedPrefixForSize( 224 HashPrefix V4Store::GetNextUnmergedPrefixForSize(
225 PrefixSize prefix_size, 225 PrefixSize prefix_size,
226 const HashPrefixMap& hash_prefix_map, 226 const HashPrefixMap& hash_prefix_map,
227 const CounterMap& counter_map) { 227 const IteratorMap& iterator_map) {
228 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size); 228 HashPrefixes::const_iterator start = iterator_map.at(prefix_size);
229 size_t index_within_list = counter_map.at(prefix_size); 229 HashPrefixes::const_iterator end = start + prefix_size;
230 size_t sized_index = prefix_size * index_within_list; 230 return std::string(start, end);
Nathan Parker 2016/07/14 22:09:32 could add a dcheck that end <= hash_prefix_map.at(
vakh (use Gerrit instead) 2016/07/14 23:10:41 Even better. Removed the method entirely and made
231 return hash_prefixes.substr(sized_index, prefix_size);
232 } 231 }
233 232
234 // static 233 // static
235 void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map, 234 void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,
236 HashPrefixMap* prefix_map_to_update) { 235 HashPrefixMap* prefix_map_to_update) {
237 for (const auto& pair : other_prefixes_map) { 236 for (const auto& pair : other_prefixes_map) {
238 PrefixSize prefix_size = pair.first; 237 PrefixSize prefix_size = pair.first;
239 size_t prefix_length_to_add = pair.second.length(); 238 size_t prefix_length_to_add = pair.second.length();
240 239
241 const HashPrefixes& existing_prefixes = 240 const HashPrefixes& existing_prefixes =
242 (*prefix_map_to_update)[prefix_size]; 241 (*prefix_map_to_update)[prefix_size];
243 size_t existing_capacity = existing_prefixes.capacity(); 242 size_t existing_capacity = existing_prefixes.capacity();
244 243
245 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity + 244 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +
246 prefix_length_to_add); 245 prefix_length_to_add);
247 } 246 }
248 } 247 }
249 248
250 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map, 249 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,
251 const HashPrefixMap& additions_map) { 250 const HashPrefixMap& additions_map) {
252 DCHECK(hash_prefix_map_.empty()); 251 DCHECK(hash_prefix_map_.empty());
253 hash_prefix_map_.clear(); 252 hash_prefix_map_.clear();
254 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_); 253 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);
255 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_); 254 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);
256 255
257 PrefixSize next_size_for_old; 256 PrefixSize next_size_for_old;
258 CounterMap old_counter_map; 257 IteratorMap old_iterator_map;
259 InitializeCounterMap(old_prefixes_map, &old_counter_map); 258 InitializeIteratorMap(old_prefixes_map, &old_iterator_map);
260 bool old_has_unmerged = GetNextSmallestPrefixSize( 259 bool old_has_unmerged = GetNextSmallestPrefixSize(
261 old_prefixes_map, old_counter_map, &next_size_for_old); 260 old_prefixes_map, old_iterator_map, &next_size_for_old);
262 261
263 PrefixSize next_size_for_additions; 262 PrefixSize next_size_for_additions;
264 CounterMap additions_counter_map; 263 IteratorMap additions_iterator_map;
265 InitializeCounterMap(additions_map, &additions_counter_map); 264 InitializeIteratorMap(additions_map, &additions_iterator_map);
266 bool additions_has_unmerged = GetNextSmallestPrefixSize( 265 bool additions_has_unmerged = GetNextSmallestPrefixSize(
267 additions_map, additions_counter_map, &next_size_for_additions); 266 additions_map, additions_iterator_map, &next_size_for_additions);
268 267
269 // Classical merge sort. 268 // Classical merge sort.
270 // The two constructs to merge are maps: old_prefixes_map, additions_map. 269 // The two constructs to merge are maps: old_prefixes_map, additions_map.
271 270
272 HashPrefix next_smallest_prefix_old, next_smallest_prefix_additions; 271 HashPrefix next_smallest_prefix_old, next_smallest_prefix_additions;
273 // At least one of the maps still has elements that need to be merged into the 272 // At least one of the maps still has elements that need to be merged into the
274 // new store. 273 // new store.
275 while (old_has_unmerged || additions_has_unmerged) { 274 while (old_has_unmerged || additions_has_unmerged) {
276 // Old map still has elements that need to be merged. 275 // Old map still has elements that need to be merged.
277 if (old_has_unmerged) { 276 if (old_has_unmerged) {
278 // Get the lexographically smallest hash prefix from the old store. 277 // Get the lexographically smallest hash prefix from the old store.
279 next_smallest_prefix_old = GetNextUnmergedPrefixForSize( 278 next_smallest_prefix_old = GetNextUnmergedPrefixForSize(
280 next_size_for_old, old_prefixes_map, old_counter_map); 279 next_size_for_old, old_prefixes_map, old_iterator_map);
281 } 280 }
282 281
283 // Additions map still has elements that need to be merged. 282 // Additions map still has elements that need to be merged.
284 if (additions_has_unmerged) { 283 if (additions_has_unmerged) {
285 // Get the lexographically smallest hash prefix from the additions in the 284 // Get the lexographically smallest hash prefix from the additions in the
286 // latest update from the server. 285 // latest update from the server.
287 next_smallest_prefix_additions = GetNextUnmergedPrefixForSize( 286 next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(
288 next_size_for_additions, additions_map, additions_counter_map); 287 next_size_for_additions, additions_map, additions_iterator_map);
289 } 288 }
290 289
291 // If the same hash prefix appears in the existing store and the additions 290 // If the same hash prefix appears in the existing store and the additions
292 // list, something is clearly wrong. Discard the update. 291 // list, something is clearly wrong. Discard the update.
293 if (old_has_unmerged && additions_has_unmerged && 292 if (old_has_unmerged && additions_has_unmerged &&
294 next_smallest_prefix_old == next_smallest_prefix_additions) { 293 next_smallest_prefix_old == next_smallest_prefix_additions) {
295 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE; 294 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;
296 } 295 }
297 296
298 // Select which map to pick the next hash prefix from to keep the result in 297 // Select which map to pick the next hash prefix from to keep the result in
299 // lexographically sorted order. 298 // lexographically sorted order.
300 bool pick_from_old = 299 bool pick_from_old =
301 old_has_unmerged && 300 old_has_unmerged &&
302 (!additions_has_unmerged || 301 (!additions_has_unmerged ||
303 (next_smallest_prefix_old < next_smallest_prefix_additions)); 302 (next_smallest_prefix_old < next_smallest_prefix_additions));
304 303
305 if (pick_from_old) { 304 if (pick_from_old) {
306 hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old; 305 hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old;
307 306
308 // Update the counter map, which means that we have merged one hash 307 // Update the iterator map, which means that we have merged one hash
309 // prefix of size |next_size_for_old| from the old store. 308 // prefix of size |next_size_for_old| from the old store.
310 old_counter_map[next_size_for_old]++; 309 old_iterator_map[next_size_for_old] += next_size_for_old;
311 310
312 // Find the next smallest unmerged element in the old store's map. 311 // Find the next smallest unmerged element in the old store's map.
313 old_has_unmerged = GetNextSmallestPrefixSize( 312 old_has_unmerged = GetNextSmallestPrefixSize(
314 old_prefixes_map, old_counter_map, &next_size_for_old); 313 old_prefixes_map, old_iterator_map, &next_size_for_old);
315 } else { 314 } else {
316 hash_prefix_map_[next_size_for_additions] += 315 hash_prefix_map_[next_size_for_additions] +=
317 next_smallest_prefix_additions; 316 next_smallest_prefix_additions;
318 317
319 // Update the counter map, which means that we have merged one hash 318 // Update the iterator map, which means that we have merged one hash
320 // prefix of size |next_size_for_additions| from the update. 319 // prefix of size |next_size_for_additions| from the update.
321 additions_counter_map[next_size_for_additions]++; 320 additions_iterator_map[next_size_for_additions] +=
321 next_size_for_additions;
322 322
323 // Find the next smallest unmerged element in the additions map. 323 // Find the next smallest unmerged element in the additions map.
324 additions_has_unmerged = GetNextSmallestPrefixSize( 324 additions_has_unmerged = GetNextSmallestPrefixSize(
325 additions_map, additions_counter_map, &next_size_for_additions); 325 additions_map, additions_iterator_map, &next_size_for_additions);
326 } 326 }
327 } 327 }
328 328
329 return APPLY_UPDATE_SUCCESS; 329 return APPLY_UPDATE_SUCCESS;
330 } 330 }
331 331
332 StoreReadResult V4Store::ReadFromDisk() { 332 StoreReadResult V4Store::ReadFromDisk() {
333 DCHECK(task_runner_->RunsTasksOnCurrentThread()); 333 DCHECK(task_runner_->RunsTasksOnCurrentThread());
334 334
335 std::string contents; 335 std::string contents;
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 409
410 if (!base::Move(new_filename, store_path_)) { 410 if (!base::Move(new_filename, store_path_)) {
411 DVLOG(1) << "store_path_: " << store_path_.value(); 411 DVLOG(1) << "store_path_: " << store_path_.value();
412 return UNABLE_TO_RENAME_FAILURE; 412 return UNABLE_TO_RENAME_FAILURE;
413 } 413 }
414 414
415 return WRITE_SUCCESS; 415 return WRITE_SUCCESS;
416 } 416 }
417 417
418 } // namespace safe_browsing 418 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « components/safe_browsing_db/v4_store.h ('k') | components/safe_browsing_db/v4_store_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698