components/safe_browsing_db/v4_store.cc - Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial

Unified Diff: components/safe_browsing_db/v4_store.cc

Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Minor changes to the test inputs Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« components/safe_browsing_db/v4_store.h ('K') | « components/safe_browsing_db/v4_store.h ('k') | components/safe_browsing_db/v4_store_unittest.cc » ('j') | components/safe_browsing_db/v4_store_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/safe_browsing_db/v4_store.cc

diff --git a/components/safe_browsing_db/v4_store.cc b/components/safe_browsing_db/v4_store.cc

index 0920f2bf3e9cdac00c4c1a30750ef14e37e07141..95a53fefeb3f31230702079565a4e79c9c2161bb 100644

--- a/components/safe_browsing_db/v4_store.cc

+++ b/components/safe_browsing_db/v4_store.cc

@@ -18,6 +18,13 @@ const uint32_t kFileMagic = 0x600D71FE;

const uint32_t kFileVersion = 9;

+// The minimum expected size of a hash-prefix.

+const uint32_t kMinHashPrefixLength = 4;

Nathan Parker 2016/07/11 18:09:58 Nit: Would it be easier to read if the code didn't

vakh (use Gerrit instead) 2016/07/12 07:34:19 Yes, it would be slightly more compact but at the

+// The maximum expected size of a hash-prefix. This represents a full SHA256

+// hash.

+const uint32_t kMaxHashPrefixLength = 32;

void RecordStoreReadResult(StoreReadResult result) {

UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,

STORE_READ_RESULT_MAX);

@@ -28,12 +35,27 @@ void RecordStoreWriteResult(StoreWriteResult result) {

STORE_WRITE_RESULT_MAX);

}

+void RecordMergeUpdateResult(MergeUpdateResult result) {

+ UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeUpdateResult", result,

+ MERGE_UPDATE_RESULT_MAX);

// Returns the name of the temporary file used to buffer data for

// |filename|. Exported for unit tests.

const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {

return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));

}

+// Returns true if that |first| string is lexicographically smaller than

+// |second|.

+bool CompareHashPrefixes(const char* first,

+ const PrefixSize first_size,

+ const char* second,

+ const PrefixSize second_size) {

+ return std::lexicographical_compare(first, first + first_size, second,

+ second + second_size);

} // namespace

std::ostream& operator<<(std::ostream& os, const V4Store& store) {

@@ -83,15 +105,26 @@ void V4Store::ApplyUpdate(

UpdatedStoreReadyCallback callback) {

std::unique_ptr<V4Store> new_store(

new V4Store(this->task_runner_, this->store_path_));

new_store->state_ = response->new_client_state();

// TODO(vakh):

// 1. Merge the old store and the new update in new_store.

// 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and

// response_type as FULL_UPDATE, and write that to disk.

// 3. Remove this if condition after completing 1. and 2.

- if (response->has_response_type() &&

- response->response_type() == ListUpdateResponse::FULL_UPDATE) {

+ if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {

+ for (const auto& removal : response->removals()) {

+ // TODO(vakh): Allow other compression types.

+ // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

+ DCHECK_EQ(RAW, removal.compression_type());

+ }

+ HashPrefixMap additions_map =

+ GetHashPrefixMapFromAdditions(response->additions());

+ new_store->MergeUpdate(hash_prefix_map_, additions_map);

+ // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.

+ } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {

StoreWriteResult result = new_store->WriteToDisk(std::move(response));

RecordStoreWriteResult(result);

}

Nathan Parker 2016/07/11 18:09:58 todo: Do something reasonable if response_type is

vakh (use Gerrit instead) 2016/07/12 07:34:18 Done.

@@ -101,6 +134,184 @@ void V4Store::ApplyUpdate(

FROM_HERE, base::Bind(callback, base::Passed(&new_store)));

}

+// static

+HashPrefixMap V4Store::GetHashPrefixMapFromAdditions(

+ const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions) {

+ HashPrefixMap additions_map;

+ for (const auto& addition : additions) {

+ // TODO(vakh): Allow other compression types.

+ // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

+ DCHECK_EQ(RAW, addition.compression_type());

+ DCHECK(addition.has_raw_hashes());

+ DCHECK(addition.raw_hashes().has_raw_hashes());

+ PrefixSize prefix_size = addition.raw_hashes().prefix_size();

+ DCHECK(kMinHashPrefixLength <= prefix_size);

Nathan Parker 2016/07/11 18:09:58 todo: skip additions with invalid metadata (i.e. m

vakh (use Gerrit instead) 2016/07/12 07:34:19 Done.

+ DCHECK(kMaxHashPrefixLength >= prefix_size);

+ MergeUpdateResult result = AddUnlumpedHashes(

+ prefix_size, addition.raw_hashes().raw_hashes(), &additions_map);

+ if (result != MERGE_SUCCESS) {

+ RecordMergeUpdateResult(result);

Nathan Parker 2016/07/11 18:09:58 Might as well record successes as well, unless tha

vakh (use Gerrit instead) 2016/07/12 07:34:18 Done.

+ }

+ return additions_map;

+// static

+MergeUpdateResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,

+ const std::string& lumped_hashes,

+ HashPrefixMap* prefix_map) {

+ if (lumped_hashes.size() % prefix_size != 0) {

+ return ADDITIONS_SIZE_UNEXPECTED_FAILURE;

+ }

+ for (std::string::const_iterator iter = lumped_hashes.begin();

+ iter != lumped_hashes.end(); iter += prefix_size) {

+ HashPrefix prefix(new char[prefix_size + 1]);

+ memcpy(prefix.get(), std::string(iter, iter + prefix_size).data(),

+ prefix_size + 1);

+ (*prefix_map)[prefix_size].push_back(std::move(prefix));

+ }

+ return MERGE_SUCCESS;

+// static

+bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,

+ const CounterMap& counter_map,

+ PrefixSize* smallest_prefix_size) {

+ bool found = false;

+ char* smallest_prefix = nullptr;

+ for (const auto& counter_pair : counter_map) {

+ PrefixSize prefix_size = counter_pair.first;

+ size_t index = counter_pair.second;

+ const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);

+ if (index < hash_prefixes.size()) {

+ const HashPrefix& this_prefix = hash_prefixes[index];

+ if (!found ||

Nathan Parker 2016/07/11 18:09:58 Replace !found with !smallest_prefix, and then you

vakh (use Gerrit instead) 2016/07/12 07:34:18 Done.

+ !CompareHashPrefixes(smallest_prefix, *smallest_prefix_size,

+ this_prefix.get(), prefix_size)) {

+ found = true;

+ smallest_prefix = this_prefix.get();

+ *smallest_prefix_size = prefix_size;

+ }

+ return found;

+// static

+CounterMap V4Store::GetInitializedCounterMap(

+ const HashPrefixMap& hash_prefix_map) {

+ CounterMap counter_map;

+ for (const auto& map_pair : hash_prefix_map) {

+ counter_map[map_pair.first] = 0;

+ }

+ return counter_map;

+// static

+HashPrefix& V4Store::GetNextUnmergedPrefixForSize(

+ PrefixSize prefix_size,

+ HashPrefixMap& hash_prefix_map,

+ const CounterMap& counter_map) {

+ HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);

Nathan Parker 2016/07/11 18:09:58 nit (Feel free to ignore): Might be a bit more rea

vakh (use Gerrit instead) 2016/07/12 07:34:18 This code has changed so NA.

+ size_t index_within_list = counter_map.at(prefix_size);

+ HashPrefix& next_unmerged_prefix = hash_prefixes[index_within_list];

+ return next_unmerged_prefix;

+void V4Store::MergeUpdate(HashPrefixMap& old_prefixes_map,

+ HashPrefixMap& additions_map) {

+ PrefixSize next_size_for_old;

+ CounterMap old_counter_map = GetInitializedCounterMap(old_prefixes_map);

+ bool found_in_old = GetNextSmallestPrefixSize(

+ old_prefixes_map, old_counter_map, &next_size_for_old);

+ PrefixSize next_size_for_additions;

+ CounterMap additions_counter_map = GetInitializedCounterMap(additions_map);

+ bool found_in_additions = GetNextSmallestPrefixSize(

+ additions_map, additions_counter_map, &next_size_for_additions);

+ // Classical merge sort.

+ // The two constructs to merge are maps: old_prefixes_map, additions_map.

+ // At least one of the maps still has elements that need to be merged into the

+ // new store.

+ while (found_in_old || found_in_additions) {

+ // Both maps still have elements that need to be merged into the new store.

+ if (found_in_old && found_in_additions) {

Nathan Parker 2016/07/11 18:09:58 To remove repeated code here, could you do if (fo

vakh (use Gerrit instead) 2016/07/12 21:57:02 Done.

+ // Get the lexographically smallest hash prefix from the old store.

+ HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

+ next_size_for_old, old_prefixes_map, old_counter_map);

+ // Get the lexographically smallest hash prefix from the additions in the

+ // latest update from the server.

+ HashPrefix& next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(

+ next_size_for_additions, additions_map, additions_counter_map);

+ // If the smallest unmerged hash prefix in old store is smaller than that

+ // in the update, add that to the new store.

+ if (CompareHashPrefixes(next_smallest_prefix_old.get(), next_size_for_old,

+ next_smallest_prefix_additions.get(),

+ next_size_for_additions)) {

+ hash_prefix_map_[next_size_for_old].push_back(

+ std::move(next_smallest_prefix_old));

+ // Update the counter map, which means that we have merged one hash

+ // prefix of size |next_size_for_old| from the old store.

+ old_counter_map[next_size_for_old]++;

+ // Find the next smallest unmerged element in the old store's map.

+ found_in_old = GetNextSmallestPrefixSize(

+ old_prefixes_map, old_counter_map, &next_size_for_old);

+ } else {

+ // This means that the smallest unmerged hash prefix in the update was

+ // smaller than that in the old store, so add that hash prefix (from the

+ // update) into the new store.

+ hash_prefix_map_[next_size_for_additions].push_back(

+ std::move(next_smallest_prefix_additions));

+ // Update the counter map, which means that we have merged one hash

+ // prefix of size |next_size_for_additions| from the update.

+ additions_counter_map[next_size_for_additions]++;

+ // Find the next smallest unmerged element in the additions map.

+ found_in_additions = GetNextSmallestPrefixSize(

+ additions_map, additions_counter_map, &next_size_for_additions);

+ }

+ } else {

+ // At least one of the maps has become empty.

+ if (!found_in_old) {

+ // The map for the old store has been completely merged. Now only merge

+ // the hash prefixes from the additions map.

+ HashPrefix& next_smallest_prefix_additions =

+ GetNextUnmergedPrefixForSize(next_size_for_additions, additions_map,

+ additions_counter_map);

+ hash_prefix_map_[next_size_for_additions].push_back(

+ std::move(next_smallest_prefix_additions));

+ additions_counter_map[next_size_for_additions]++;

+ found_in_additions = GetNextSmallestPrefixSize(

+ additions_map, additions_counter_map, &next_size_for_additions);

+ } else {

+ // The additions map has been completely merged. Now only merge

+ // the hash prefixes from the old store map.

+ HashPrefix& next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

+ next_size_for_old, old_prefixes_map, old_counter_map);

+ hash_prefix_map_[next_size_for_old].push_back(

+ std::move(next_smallest_prefix_old));

+ old_counter_map[next_size_for_old]++;

+ found_in_old = GetNextSmallestPrefixSize(

+ old_prefixes_map, old_counter_map, &next_size_for_old);

+ }

StoreReadResult V4Store::ReadFromDisk() {

DCHECK(task_runner_->RunsTasksOnCurrentThread());