Chromium Code Reviews| Index: components/safe_browsing_db/v4_store.cc |
| diff --git a/components/safe_browsing_db/v4_store.cc b/components/safe_browsing_db/v4_store.cc |
| index 2ef5c039683ef2bb14780aa67f8b69f066cc1470..b12279431da2c6da8ef0bfd97d08021f0f75d9a7 100644 |
| --- a/components/safe_browsing_db/v4_store.cc |
| +++ b/components/safe_browsing_db/v4_store.cc |
| @@ -18,6 +18,13 @@ const uint32_t kFileMagic = 0x600D71FE; |
| const uint32_t kFileVersion = 9; |
| +// The minimum expected size of a hash-prefix. |
|
Nathan Parker
2016/07/12 20:35:05
...size in bytes
vakh (use Gerrit instead)
2016/07/12 21:57:02
Done.
|
| +const uint32_t kMinHashPrefixLength = 4; |
| + |
| +// The maximum expected size of a hash-prefix. This represents a full SHA256 |
| +// hash. |
| +const uint32_t kMaxHashPrefixLength = 32; |
| + |
| void RecordStoreReadResult(StoreReadResult result) { |
| UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result, |
| STORE_READ_RESULT_MAX); |
| @@ -28,6 +35,11 @@ void RecordStoreWriteResult(StoreWriteResult result) { |
| STORE_WRITE_RESULT_MAX); |
| } |
| +void RecordMergeResult(MergeResult result) { |
| + UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeResult", result, |
| + MERGE_RESULT_MAX); |
| +} |
| + |
| // Returns the name of the temporary file used to buffer data for |
| // |filename|. Exported for unit tests. |
| const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) { |
| @@ -83,17 +95,30 @@ void V4Store::ApplyUpdate( |
| UpdatedStoreReadyCallback callback) { |
| std::unique_ptr<V4Store> new_store( |
| new V4Store(this->task_runner_, this->store_path_)); |
| - |
| new_store->state_ = response->new_client_state(); |
| // TODO(vakh): |
| // 1. Merge the old store and the new update in new_store. |
| // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and |
| // response_type as FULL_UPDATE, and write that to disk. |
| // 3. Remove this if condition after completing 1. and 2. |
| - if (response->has_response_type() && |
| - response->response_type() == ListUpdateResponse::FULL_UPDATE) { |
| + if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) { |
| + for (const auto& removal : response->removals()) { |
| + // TODO(vakh): Allow other compression types. |
| + // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567 |
| + DCHECK_EQ(RAW, removal.compression_type()); |
| + } |
| + |
| + HashPrefixMap additions_map; |
| + UpdateHashPrefixMapFromAdditions(response->additions(), &additions_map); |
| + |
| + new_store->MergeUpdate(hash_prefix_map_, additions_map); |
| + |
| + // TODO(vakh): Generate the updated ListUpdateResponse to write to disk. |
| + } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) { |
| StoreWriteResult result = new_store->WriteToDisk(std::move(response)); |
| RecordStoreWriteResult(result); |
| + } else { |
| + NOTREACHED() << "Unexpected response type: " << response->response_type(); |
| } |
| // new_store is done updating, pass it to the callback. |
| @@ -101,6 +126,187 @@ void V4Store::ApplyUpdate( |
| FROM_HERE, base::Bind(callback, base::Passed(&new_store))); |
| } |
| +// static |
| +void V4Store::UpdateHashPrefixMapFromAdditions( |
| + const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions, |
| + HashPrefixMap* additions_map) { |
| + for (const auto& addition : additions) { |
| + // TODO(vakh): Allow other compression types. |
| + // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567 |
| + DCHECK_EQ(RAW, addition.compression_type()); |
| + |
| + DCHECK(addition.has_raw_hashes()); |
| + DCHECK(addition.raw_hashes().has_raw_hashes()); |
| + |
| + PrefixSize prefix_size = addition.raw_hashes().prefix_size(); |
| + RecordMergeResult(AddUnlumpedHashes( |
| + prefix_size, addition.raw_hashes().raw_hashes(), additions_map)); |
| + } |
| +} |
| + |
| +// static |
| +MergeResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size, |
| + const std::string& lumped_hashes, |
| + HashPrefixMap* additions_map) { |
| + if (prefix_size < kMinHashPrefixLength) { |
| + NOTREACHED(); |
| + return PREFIX_SIZE_TOO_SMALL_FAILURE; |
|
Nathan Parker
2016/07/12 20:35:06
You don't need any of the else's here.
vakh (use Gerrit instead)
2016/07/12 21:57:02
Done.
|
| + } else if (prefix_size > kMaxHashPrefixLength) { |
| + NOTREACHED(); |
| + return PREFIX_SIZE_TOO_LARGE_FAILURE; |
| + } else { |
| + if (lumped_hashes.size() % prefix_size != 0) { |
| + return ADDITIONS_SIZE_UNEXPECTED_FAILURE; |
| + } else { |
| + (*additions_map)[prefix_size] = lumped_hashes; |
|
Nathan Parker
2016/07/12 20:35:06
To consider, or add a todo: This copies the string
Nathan Parker
2016/07/12 22:47:12
(Missed this comment) Though, I'm not sure how to
vakh (use Gerrit instead)
2016/07/13 00:29:17
Done.
|
| + } |
| + } |
| + return MERGE_SUCCESS; |
| +} |
| + |
| +// static |
| +bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map, |
| + const CounterMap& counter_map, |
| + PrefixSize* smallest_prefix_size) { |
| + HashPrefix smallest_prefix; |
|
Nathan Parker
2016/07/12 20:35:06
You could use a StringPiece here and within the lo
vakh (use Gerrit instead)
2016/07/12 21:57:02
That would break the consistency of hash prefixes
Nathan Parker
2016/07/12 22:47:12
They are small, but there is a dynamic allocation
vakh (use Gerrit instead)
2016/07/13 00:29:17
Done.
|
| + for (const auto& counter_pair : counter_map) { |
| + PrefixSize prefix_size = counter_pair.first; |
| + size_t index = counter_pair.second; |
| + size_t sized_index = prefix_size * index; |
| + |
| + const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size); |
| + if (sized_index < hash_prefixes.size()) { |
| + HashPrefix this_prefix = hash_prefixes.substr(sized_index, prefix_size); |
| + if (smallest_prefix.empty() || smallest_prefix > this_prefix) { |
| + smallest_prefix = this_prefix; |
| + *smallest_prefix_size = prefix_size; |
| + } |
| + } |
|
Nathan Parker
2016/07/12 20:35:06
else dcheck
vakh (use Gerrit instead)
2016/07/12 21:57:02
That's a valid case. For instance, when one map ha
|
| + } |
| + return !smallest_prefix.empty(); |
| +} |
| + |
| +// static |
| +void V4Store::InitializeCounterMap(const HashPrefixMap& hash_prefix_map, |
| + CounterMap* counter_map) { |
| + for (const auto& map_pair : hash_prefix_map) { |
| + (*counter_map)[map_pair.first] = 0; |
| + } |
| +} |
| + |
| +// static |
| +HashPrefix V4Store::GetNextUnmergedPrefixForSize( |
| + PrefixSize prefix_size, |
| + const HashPrefixMap& hash_prefix_map, |
| + const CounterMap& counter_map) { |
| + HashPrefixes hash_prefixes = hash_prefix_map.at(prefix_size); |
|
Nathan Parker
2016/07/12 20:35:06
This copies the whole string. You could use const
vakh (use Gerrit instead)
2016/07/12 21:57:03
Done.
|
| + size_t index_within_list = counter_map.at(prefix_size); |
| + size_t sized_index = prefix_size * index_within_list; |
| + return hash_prefixes.substr(sized_index, prefix_size); |
| +} |
| + |
| +// static |
| +void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map, |
| + HashPrefixMap* prefix_map_to_update) { |
| + for (const auto& pair : other_prefixes_map) { |
|
Nathan Parker
2016/07/12 20:35:06
Add a comment about how this gets close to ideal,
vakh (use Gerrit instead)
2016/07/12 21:57:02
Updated the comment in header file (consistent wit
|
| + PrefixSize prefix_size = pair.first; |
| + size_t prefix_length_to_add = pair.second.length(); |
| + |
| + HashPrefixes existing_prefixes = (*prefix_map_to_update)[prefix_size]; |
|
Nathan Parker
2016/07/12 20:35:06
const HashPrefixes&. Otherwise it makes a copy.
vakh (use Gerrit instead)
2016/07/12 21:57:02
Done.
|
| + size_t existing_capacity = existing_prefixes.capacity(); |
| + |
| + (*prefix_map_to_update)[prefix_size].reserve(existing_capacity + |
| + prefix_length_to_add); |
| + } |
| +} |
| + |
| +void V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map, |
| + const HashPrefixMap& additions_map) { |
| + ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_); |
|
Nathan Parker
2016/07/12 20:35:06
Is the assumption that hash_prefix_map_ is empty h
vakh (use Gerrit instead)
2016/07/12 21:57:02
Good map. However, for code reuse purposes, I thin
|
| + ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_); |
| + |
| + PrefixSize next_size_for_old; |
| + CounterMap old_counter_map; |
| + InitializeCounterMap(old_prefixes_map, &old_counter_map); |
| + bool found_in_old = GetNextSmallestPrefixSize( |
| + old_prefixes_map, old_counter_map, &next_size_for_old); |
| + |
| + PrefixSize next_size_for_additions; |
| + CounterMap additions_counter_map; |
| + InitializeCounterMap(additions_map, &additions_counter_map); |
| + bool found_in_additions = GetNextSmallestPrefixSize( |
| + additions_map, additions_counter_map, &next_size_for_additions); |
| + |
| + // Classical merge sort. |
| + // The two constructs to merge are maps: old_prefixes_map, additions_map. |
| + |
| + // At least one of the maps still has elements that need to be merged into the |
| + // new store. |
| + while (found_in_old || found_in_additions) { |
| + // Both maps still have elements that need to be merged into the new store. |
| + if (found_in_old && found_in_additions) { |
| + // Get the lexographically smallest hash prefix from the old store. |
| + HashPrefix next_smallest_prefix_old = GetNextUnmergedPrefixForSize( |
| + next_size_for_old, old_prefixes_map, old_counter_map); |
| + // Get the lexographically smallest hash prefix from the additions in the |
| + // latest update from the server. |
| + HashPrefix next_smallest_prefix_additions = GetNextUnmergedPrefixForSize( |
| + next_size_for_additions, additions_map, additions_counter_map); |
| + |
| + // If the smallest unmerged hash prefix in old store is smaller than that |
| + // in the update, add that to the new store. |
| + if (next_smallest_prefix_old < next_smallest_prefix_additions) { |
| + hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old; |
| + |
| + // Update the counter map, which means that we have merged one hash |
| + // prefix of size |next_size_for_old| from the old store. |
| + old_counter_map[next_size_for_old]++; |
| + |
| + // Find the next smallest unmerged element in the old store's map. |
| + found_in_old = GetNextSmallestPrefixSize( |
| + old_prefixes_map, old_counter_map, &next_size_for_old); |
| + } else { |
| + // This means that the smallest unmerged hash prefix in the update was |
| + // smaller than that in the old store, so add that hash prefix (from the |
| + // update) into the new store. |
| + hash_prefix_map_[next_size_for_additions] += |
| + next_smallest_prefix_additions; |
| + |
| + // Update the counter map, which means that we have merged one hash |
| + // prefix of size |next_size_for_additions| from the update. |
| + additions_counter_map[next_size_for_additions]++; |
| + |
| + // Find the next smallest unmerged element in the additions map. |
| + found_in_additions = GetNextSmallestPrefixSize( |
| + additions_map, additions_counter_map, &next_size_for_additions); |
| + } |
| + } else { |
| + // At least one of the maps has become empty. |
| + if (!found_in_old) { |
| + // The map for the old store has been completely merged. Now only merge |
| + // the hash prefixes from the additions map. |
| + HashPrefix next_smallest_prefix_additions = |
| + GetNextUnmergedPrefixForSize(next_size_for_additions, additions_map, |
| + additions_counter_map); |
| + hash_prefix_map_[next_size_for_additions] += |
| + next_smallest_prefix_additions; |
| + additions_counter_map[next_size_for_additions]++; |
| + found_in_additions = GetNextSmallestPrefixSize( |
| + additions_map, additions_counter_map, &next_size_for_additions); |
| + } else { |
| + // The additions map has been completely merged. Now only merge |
| + // the hash prefixes from the old store map. |
| + HashPrefix next_smallest_prefix_old = GetNextUnmergedPrefixForSize( |
| + next_size_for_old, old_prefixes_map, old_counter_map); |
| + hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old; |
| + old_counter_map[next_size_for_old]++; |
| + found_in_old = GetNextSmallestPrefixSize( |
| + old_prefixes_map, old_counter_map, &next_size_for_old); |
| + } |
| + } |
| + } |
| +} |
| + |
| StoreReadResult V4Store::ReadFromDisk() { |
| DCHECK(task_runner_->RunsTasksOnCurrentThread()); |