Chromium Code Reviews| Index: components/safe_browsing_db/v4_store.cc |
| diff --git a/components/safe_browsing_db/v4_store.cc b/components/safe_browsing_db/v4_store.cc |
| index 0920f2bf3e9cdac00c4c1a30750ef14e37e07141..95a53fefeb3f31230702079565a4e79c9c2161bb 100644 |
| --- a/components/safe_browsing_db/v4_store.cc |
| +++ b/components/safe_browsing_db/v4_store.cc |
| @@ -18,6 +18,13 @@ const uint32_t kFileMagic = 0x600D71FE; |
| const uint32_t kFileVersion = 9; |
| +// The minimum expected size of a hash-prefix. |
| +const uint32_t kMinHashPrefixLength = 4; |
|
Nathan Parker
2016/07/11 18:09:58
Nit: Would it be easier to read if the code didn't
vakh (use Gerrit instead)
2016/07/12 07:34:19
Yes, it would be slightly more compact but at the
|
| + |
| +// The maximum expected size of a hash-prefix. This represents a full SHA256 |
| +// hash. |
| +const uint32_t kMaxHashPrefixLength = 32; |
| + |
| void RecordStoreReadResult(StoreReadResult result) { |
| UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result, |
| STORE_READ_RESULT_MAX); |
| @@ -28,12 +35,27 @@ void RecordStoreWriteResult(StoreWriteResult result) { |
| STORE_WRITE_RESULT_MAX); |
| } |
| +void RecordMergeUpdateResult(MergeUpdateResult result) { |
| + UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeUpdateResult", result, |
| + MERGE_UPDATE_RESULT_MAX); |
| +} |
| + |
| // Returns the name of the temporary file used to buffer data for |
| // |filename|. Exported for unit tests. |
| const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) { |
| return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); |
| } |
| +// Returns true if that |first| string is lexicographically smaller than |
| +// |second|. |
| +bool CompareHashPrefixes(const char* first, |
| + const PrefixSize first_size, |
| + const char* second, |
| + const PrefixSize second_size) { |
| + return std::lexicographical_compare(first, first + first_size, second, |
| + second + second_size); |
| +} |
| + |
| } // namespace |
| std::ostream& operator<<(std::ostream& os, const V4Store& store) { |
| @@ -83,15 +105,26 @@ void V4Store::ApplyUpdate( |
| UpdatedStoreReadyCallback callback) { |
| std::unique_ptr<V4Store> new_store( |
| new V4Store(this->task_runner_, this->store_path_)); |
| - |
| new_store->state_ = response->new_client_state(); |
| // TODO(vakh): |
| // 1. Merge the old store and the new update in new_store. |
| // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and |
| // response_type as FULL_UPDATE, and write that to disk. |
| // 3. Remove this if condition after completing 1. and 2. |
| - if (response->has_response_type() && |
| - response->response_type() == ListUpdateResponse::FULL_UPDATE) { |
| + if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) { |
| + for (const auto& removal : response->removals()) { |
| + // TODO(vakh): Allow other compression types. |
| + // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567 |
| + DCHECK_EQ(RAW, removal.compression_type()); |
| + } |
| + |
| + HashPrefixMap additions_map = |
| + GetHashPrefixMapFromAdditions(response->additions()); |
| + |
| + new_store->MergeUpdate(hash_prefix_map_, additions_map); |
| + |
| + // TODO(vakh): Generate the updated ListUpdateResponse to write to disk. |
| + } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) { |
| StoreWriteResult result = new_store->WriteToDisk(std::move(response)); |
| RecordStoreWriteResult(result); |
| } |
|
Nathan Parker
2016/07/11 18:09:58
todo: Do something reasonable if response_type is
vakh (use Gerrit instead)
2016/07/12 07:34:18
Done.
|
| @@ -101,6 +134,184 @@ void V4Store::ApplyUpdate( |
| FROM_HERE, base::Bind(callback, base::Passed(&new_store))); |
| } |
| +// static |
| +HashPrefixMap V4Store::GetHashPrefixMapFromAdditions( |
| + const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions) { |
| + HashPrefixMap additions_map; |
| + for (const auto& addition : additions) { |
| + // TODO(vakh): Allow other compression types. |
| + // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567 |
| + DCHECK_EQ(RAW, addition.compression_type()); |
| + |
| + DCHECK(addition.has_raw_hashes()); |
| + DCHECK(addition.raw_hashes().has_raw_hashes()); |
| + |
| + PrefixSize prefix_size = addition.raw_hashes().prefix_size(); |
| + DCHECK(kMinHashPrefixLength <= prefix_size); |
|
Nathan Parker
2016/07/11 18:09:58
todo: skip additions with invalid metadata (i.e. m
vakh (use Gerrit instead)
2016/07/12 07:34:19
Done.
|
| + DCHECK(kMaxHashPrefixLength >= prefix_size); |
| + |
| + MergeUpdateResult result = AddUnlumpedHashes( |
| + prefix_size, addition.raw_hashes().raw_hashes(), &additions_map); |
| + if (result != MERGE_SUCCESS) { |
| + RecordMergeUpdateResult(result); |
|
Nathan Parker
2016/07/11 18:09:58
Might as well record successes as well, unless tha
vakh (use Gerrit instead)
2016/07/12 07:34:18
Done.
|
| + } |
| + } |
| + |
| + return additions_map; |
| +} |
| + |
| +// static |
| +MergeUpdateResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size, |
| + const std::string& lumped_hashes, |
| + HashPrefixMap* prefix_map) { |
| + if (lumped_hashes.size() % prefix_size != 0) { |
| + return ADDITIONS_SIZE_UNEXPECTED_FAILURE; |
| + } |
| + |
| + for (std::string::const_iterator iter = lumped_hashes.begin(); |
| + iter != lumped_hashes.end(); iter += prefix_size) { |
| + HashPrefix prefix(new char[prefix_size + 1]); |
| + memcpy(prefix.get(), std::string(iter, iter + prefix_size).data(), |
| + prefix_size + 1); |
| + (*prefix_map)[prefix_size].push_back(std::move(prefix)); |
| + } |
| + |
| + return MERGE_SUCCESS; |
| +} |
| + |
| +// static |
| +bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map, |
| + const CounterMap& counter_map, |
| + PrefixSize* smallest_prefix_size) { |
| + bool found = false; |
| + char* smallest_prefix = nullptr; |
| + |
| + for (const auto& counter_pair : counter_map) { |
| + PrefixSize prefix_size = counter_pair.first; |
| + size_t index = counter_pair.second; |
| + |
| + const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size); |
| + if (index < hash_prefixes.size()) { |
| + const HashPrefix& this_prefix = hash_prefixes[index]; |
| + if (!found || |
|
Nathan Parker
2016/07/11 18:09:58
Replace !found with !smallest_prefix, and then you
vakh (use Gerrit instead)
2016/07/12 07:34:18
Done.
|
| + !CompareHashPrefixes(smallest_prefix, *smallest_prefix_size, |
| + this_prefix.get(), prefix_size)) { |
| + found = true; |
| + smallest_prefix = this_prefix.get(); |
| + *smallest_prefix_size = prefix_size; |
| + } |
| + } |
| + } |
| + return found; |
| +} |
| + |
| +// static |
| +CounterMap V4Store::GetInitializedCounterMap( |
| + const HashPrefixMap& hash_prefix_map) { |
| + CounterMap counter_map; |
| + for (const auto& map_pair : hash_prefix_map) { |
| + counter_map[map_pair.first] = 0; |
| + } |
| + return counter_map; |
| +} |
| + |
| +// static |
| +HashPrefix& V4Store::GetNextUnmergedPrefixForSize( |
| + PrefixSize prefix_size, |
| + HashPrefixMap& hash_prefix_map, |
| + const CounterMap& counter_map) { |
| + HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size); |
|
Nathan Parker
2016/07/11 18:09:58
nit (Feel free to ignore): Might be a bit more rea
vakh (use Gerrit instead)
2016/07/12 07:34:18
This code has changed so NA.
|
| + size_t index_within_list = counter_map.at(prefix_size); |
| + HashPrefix& next_unmerged_prefix = hash_prefixes[index_within_list]; |
| + return next_unmerged_prefix; |
| +} |
| + |
| +void V4Store::MergeUpdate(HashPrefixMap& old_prefixes_map, |
| + HashPrefixMap& additions_map) { |
| + PrefixSize next_size_for_old; |
| + CounterMap old_counter_map = GetInitializedCounterMap(old_prefixes_map); |
| + bool found_in_old = GetNextSmallestPrefixSize( |
| + old_prefixes_map, old_counter_map, &next_size_for_old); |
| + |
| + PrefixSize next_size_for_additions; |
| + CounterMap additions_counter_map = GetInitializedCounterMap(additions_map); |
| + bool found_in_additions = GetNextSmallestPrefixSize( |
| + additions_map, additions_counter_map, &next_size_for_additions); |
| + |
| + // Classical merge sort. |
| + // The two constructs to merge are maps: old_prefixes_map, additions_map. |
| + |
| + // At least one of the maps still has elements that need to be merged into the |
| + // new store. |
| + while (found_in_old || found_in_additions) { |
| + // Both maps still have elements that need to be merged into the new store. |
| + if (found_in_old && found_in_additions) { |
|
Nathan Parker
2016/07/11 18:09:58
To remove repeated code here, could you do
if (fo
vakh (use Gerrit instead)
2016/07/12 21:57:02
Done.
|
| + // Get the lexographically smallest hash prefix from the old store. |
| + HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize( |
| + next_size_for_old, old_prefixes_map, old_counter_map); |
| + // Get the lexographically smallest hash prefix from the additions in the |
| + // latest update from the server. |
| + HashPrefix& next_smallest_prefix_additions = GetNextUnmergedPrefixForSize( |
| + next_size_for_additions, additions_map, additions_counter_map); |
| + |
| + // If the smallest unmerged hash prefix in old store is smaller than that |
| + // in the update, add that to the new store. |
| + if (CompareHashPrefixes(next_smallest_prefix_old.get(), next_size_for_old, |
| + next_smallest_prefix_additions.get(), |
| + next_size_for_additions)) { |
| + hash_prefix_map_[next_size_for_old].push_back( |
| + std::move(next_smallest_prefix_old)); |
| + |
| + // Update the counter map, which means that we have merged one hash |
| + // prefix of size |next_size_for_old| from the old store. |
| + old_counter_map[next_size_for_old]++; |
| + |
| + // Find the next smallest unmerged element in the old store's map. |
| + found_in_old = GetNextSmallestPrefixSize( |
| + old_prefixes_map, old_counter_map, &next_size_for_old); |
| + } else { |
| + // This means that the smallest unmerged hash prefix in the update was |
| + // smaller than that in the old store, so add that hash prefix (from the |
| + // update) into the new store. |
| + hash_prefix_map_[next_size_for_additions].push_back( |
| + std::move(next_smallest_prefix_additions)); |
| + |
| + // Update the counter map, which means that we have merged one hash |
| + // prefix of size |next_size_for_additions| from the update. |
| + additions_counter_map[next_size_for_additions]++; |
| + |
| + // Find the next smallest unmerged element in the additions map. |
| + found_in_additions = GetNextSmallestPrefixSize( |
| + additions_map, additions_counter_map, &next_size_for_additions); |
| + } |
| + } else { |
| + // At least one of the maps has become empty. |
| + if (!found_in_old) { |
| + // The map for the old store has been completely merged. Now only merge |
| + // the hash prefixes from the additions map. |
| + HashPrefix& next_smallest_prefix_additions = |
| + GetNextUnmergedPrefixForSize(next_size_for_additions, additions_map, |
| + additions_counter_map); |
| + hash_prefix_map_[next_size_for_additions].push_back( |
| + std::move(next_smallest_prefix_additions)); |
| + additions_counter_map[next_size_for_additions]++; |
| + found_in_additions = GetNextSmallestPrefixSize( |
| + additions_map, additions_counter_map, &next_size_for_additions); |
| + } else { |
| + // The additions map has been completely merged. Now only merge |
| + // the hash prefixes from the old store map. |
| + HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize( |
| + next_size_for_old, old_prefixes_map, old_counter_map); |
| + hash_prefix_map_[next_size_for_old].push_back( |
| + std::move(next_smallest_prefix_old)); |
| + old_counter_map[next_size_for_old]++; |
| + found_in_old = GetNextSmallestPrefixSize( |
| + old_prefixes_map, old_counter_map, &next_size_for_old); |
| + } |
| + } |
| + } |
| +} |
| + |
| StoreReadResult V4Store::ReadFromDisk() { |
| DCHECK(task_runner_->RunsTasksOnCurrentThread()); |