Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(574)

Unified Diff: components/safe_browsing_db/v4_store.cc

Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Simplified merge logic. +Nit: Fix a failing test Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/safe_browsing_db/v4_store.cc
diff --git a/components/safe_browsing_db/v4_store.cc b/components/safe_browsing_db/v4_store.cc
index 2ef5c039683ef2bb14780aa67f8b69f066cc1470..d9842e5a069da70b837a2d411ec301a7405980c6 100644
--- a/components/safe_browsing_db/v4_store.cc
+++ b/components/safe_browsing_db/v4_store.cc
@@ -18,6 +18,13 @@ const uint32_t kFileMagic = 0x600D71FE;
const uint32_t kFileVersion = 9;
+// The minimum expected size of a hash-prefix.
+const uint32_t kMinHashPrefixLength = 4;
+
+// The maximum expected size of a hash-prefix. This represents a full SHA256
+// hash.
+const uint32_t kMaxHashPrefixLength = 32;
+
void RecordStoreReadResult(StoreReadResult result) {
UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,
STORE_READ_RESULT_MAX);
@@ -28,6 +35,11 @@ void RecordStoreWriteResult(StoreWriteResult result) {
STORE_WRITE_RESULT_MAX);
}
+void RecordMergeResult(MergeResult result) {
+ UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeResult", result,
+ MERGE_RESULT_MAX);
+}
+
// Returns the name of the temporary file used to buffer data for
// |filename|. Exported for unit tests.
const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {
@@ -83,17 +95,30 @@ void V4Store::ApplyUpdate(
UpdatedStoreReadyCallback callback) {
std::unique_ptr<V4Store> new_store(
new V4Store(this->task_runner_, this->store_path_));
-
new_store->state_ = response->new_client_state();
// TODO(vakh):
// 1. Merge the old store and the new update in new_store.
// 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and
// response_type as FULL_UPDATE, and write that to disk.
// 3. Remove this if condition after completing 1. and 2.
- if (response->has_response_type() &&
- response->response_type() == ListUpdateResponse::FULL_UPDATE) {
+ if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {
+ for (const auto& removal : response->removals()) {
+ // TODO(vakh): Allow other compression types.
+ // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567
+ DCHECK_EQ(RAW, removal.compression_type());
+ }
+
+ HashPrefixMap additions_map;
+ UpdateHashPrefixMapFromAdditions(response->additions(), &additions_map);
+
+ new_store->MergeUpdate(hash_prefix_map_, additions_map);
+
+ // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.
+ } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {
StoreWriteResult result = new_store->WriteToDisk(std::move(response));
RecordStoreWriteResult(result);
+ } else {
+ NOTREACHED() << "Unexpected response type: " << response->response_type();
}
// new_store is done updating, pass it to the callback.
@@ -101,6 +126,171 @@ void V4Store::ApplyUpdate(
FROM_HERE, base::Bind(callback, base::Passed(&new_store)));
}
+// static
+void V4Store::UpdateHashPrefixMapFromAdditions(
+ const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions,
+ HashPrefixMap* additions_map) {
+ for (const auto& addition : additions) {
+ // TODO(vakh): Allow other compression types.
+ // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567
+ DCHECK_EQ(RAW, addition.compression_type());
+
+ DCHECK(addition.has_raw_hashes());
+ DCHECK(addition.raw_hashes().has_raw_hashes());
+
+ PrefixSize prefix_size = addition.raw_hashes().prefix_size();
+ RecordMergeResult(AddUnlumpedHashes(
+ prefix_size, addition.raw_hashes().raw_hashes(), additions_map));
+ }
+}
+
+// static
+MergeResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,
+ const std::string& lumped_hashes,
+ HashPrefixMap* additions_map) {
+ if (prefix_size < kMinHashPrefixLength) {
+ NOTREACHED();
+ return PREFIX_SIZE_TOO_SMALL_FAILURE;
+ } else if (prefix_size > kMaxHashPrefixLength) {
+ NOTREACHED();
+ return PREFIX_SIZE_TOO_LARGE_FAILURE;
+ } else {
+ if (lumped_hashes.size() % prefix_size != 0) {
+ return ADDITIONS_SIZE_UNEXPECTED_FAILURE;
+ } else {
+ (*additions_map)[prefix_size] = lumped_hashes;
+ }
+ }
+ return MERGE_SUCCESS;
+}
+
+// static
+bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,
+ const CounterMap& counter_map,
+ PrefixSize* smallest_prefix_size) {
+ HashPrefix smallest_prefix;
+ for (const auto& counter_pair : counter_map) {
+ PrefixSize prefix_size = counter_pair.first;
+ size_t index = counter_pair.second;
+ size_t sized_index = prefix_size * index;
+
+ const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);
+ if (sized_index < hash_prefixes.size()) {
+ HashPrefix this_prefix = hash_prefixes.substr(sized_index, prefix_size);
+ if (smallest_prefix.empty() || smallest_prefix > this_prefix) {
+ smallest_prefix = this_prefix;
+ *smallest_prefix_size = prefix_size;
+ }
+ }
+ }
+ return !smallest_prefix.empty();
+}
+
+// static
+void V4Store::InitializeCounterMap(const HashPrefixMap& hash_prefix_map,
+ CounterMap* counter_map) {
+ for (const auto& map_pair : hash_prefix_map) {
+ (*counter_map)[map_pair.first] = 0;
+ }
+}
+
+// static
+HashPrefix V4Store::GetNextUnmergedPrefixForSize(
+ PrefixSize prefix_size,
+ const HashPrefixMap& hash_prefix_map,
+ const CounterMap& counter_map) {
+ HashPrefixes hash_prefixes = hash_prefix_map.at(prefix_size);
+ size_t index_within_list = counter_map.at(prefix_size);
+ size_t sized_index = prefix_size * index_within_list;
+ return hash_prefixes.substr(sized_index, prefix_size);
+}
+
+// static
+void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,
+ HashPrefixMap* prefix_map_to_update) {
+ for (const auto& pair : other_prefixes_map) {
+ PrefixSize prefix_size = pair.first;
+ size_t prefix_length_to_add = pair.second.length();
+
+ HashPrefixes existing_prefixes = (*prefix_map_to_update)[prefix_size];
+ size_t existing_capacity = existing_prefixes.capacity();
+
+ (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +
+ prefix_length_to_add);
+ }
+}
+
+void V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,
Nathan Parker 2016/07/12 20:35:06 Nice, this function is easier to read!
vakh (use Gerrit instead) 2016/07/12 21:57:03 Acknowledged.
+ const HashPrefixMap& additions_map) {
+ ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);
+ ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);
+
+ PrefixSize next_size_for_old;
+ CounterMap old_counter_map;
+ InitializeCounterMap(old_prefixes_map, &old_counter_map);
+ bool old_has_unmerged = GetNextSmallestPrefixSize(
+ old_prefixes_map, old_counter_map, &next_size_for_old);
+
+ PrefixSize next_size_for_additions;
+ CounterMap additions_counter_map;
+ InitializeCounterMap(additions_map, &additions_counter_map);
+ bool additions_has_unmerged = GetNextSmallestPrefixSize(
+ additions_map, additions_counter_map, &next_size_for_additions);
+
+ // Classical merge sort.
+ // The two constructs to merge are maps: old_prefixes_map, additions_map.
+
+ // At least one of the maps still has elements that need to be merged into the
+ // new store.
+ while (old_has_unmerged || additions_has_unmerged) {
+ HashPrefix next_smallest_prefix_old, next_smallest_prefix_additions;
Nathan Parker 2016/07/12 20:35:06 I _think_ it's more efficient if you define these
vakh (use Gerrit instead) 2016/07/12 21:57:03 Done
+ // Both maps still have elements that need to be merged into the new store.
+ if (old_has_unmerged) {
+ // Get the lexographically smallest hash prefix from the old store.
+ next_smallest_prefix_old = GetNextUnmergedPrefixForSize(
+ next_size_for_old, old_prefixes_map, old_counter_map);
+ }
+
+ if (additions_has_unmerged) {
+ // Get the lexographically smallest hash prefix from the additions in the
+ // latest update from the server.
+ next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(
+ next_size_for_additions, additions_map, additions_counter_map);
+ }
+
+ bool pick_from_old =
+ old_has_unmerged &&
+ (!additions_has_unmerged ||
+ (next_smallest_prefix_old < next_smallest_prefix_additions));
+
+ if (pick_from_old) {
+ hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old;
+
+ // Update the counter map, which means that we have merged one hash
+ // prefix of size |next_size_for_old| from the old store.
+ old_counter_map[next_size_for_old]++;
+
+ // Find the next smallest unmerged element in the old store's map.
+ old_has_unmerged = GetNextSmallestPrefixSize(
+ old_prefixes_map, old_counter_map, &next_size_for_old);
+ } else {
+ // This means that the smallest unmerged hash prefix in the update was
+ // smaller than that in the old store, so add that hash prefix (from the
+ // update) into the new store.
+ hash_prefix_map_[next_size_for_additions] +=
+ next_smallest_prefix_additions;
+
+ // Update the counter map, which means that we have merged one hash
+ // prefix of size |next_size_for_additions| from the update.
+ additions_counter_map[next_size_for_additions]++;
+
+ // Find the next smallest unmerged element in the additions map.
+ additions_has_unmerged = GetNextSmallestPrefixSize(
+ additions_map, additions_counter_map, &next_size_for_additions);
+ }
+ }
+}
+
StoreReadResult V4Store::ReadFromDisk() {
DCHECK(task_runner_->RunsTasksOnCurrentThread());

Powered by Google App Engine
This is Rietveld 408576698