components/safe_browsing_db/v4_store.cc - Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial

Unified Diff: components/safe_browsing_db/v4_store.cc

Issue 2128173003: Merge the hash prefixes from the old store and the additions in the partial (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: git fetch and pull Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: components/safe_browsing_db/v4_store.cc

diff --git a/components/safe_browsing_db/v4_store.cc b/components/safe_browsing_db/v4_store.cc

index 2ef5c039683ef2bb14780aa67f8b69f066cc1470..b12279431da2c6da8ef0bfd97d08021f0f75d9a7 100644

--- a/components/safe_browsing_db/v4_store.cc

+++ b/components/safe_browsing_db/v4_store.cc

@@ -18,6 +18,13 @@ const uint32_t kFileMagic = 0x600D71FE;

const uint32_t kFileVersion = 9;

+// The minimum expected size of a hash-prefix.

Nathan Parker 2016/07/12 20:35:05 ...size in bytes

vakh (use Gerrit instead) 2016/07/12 21:57:02 Done.

+const uint32_t kMinHashPrefixLength = 4;

+// The maximum expected size of a hash-prefix. This represents a full SHA256

+// hash.

+const uint32_t kMaxHashPrefixLength = 32;

void RecordStoreReadResult(StoreReadResult result) {

UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4StoreReadResult", result,

STORE_READ_RESULT_MAX);

@@ -28,6 +35,11 @@ void RecordStoreWriteResult(StoreWriteResult result) {

STORE_WRITE_RESULT_MAX);

}

+void RecordMergeResult(MergeResult result) {

+ UMA_HISTOGRAM_ENUMERATION("SafeBrowsing.V4MergeResult", result,

+ MERGE_RESULT_MAX);

// Returns the name of the temporary file used to buffer data for

// |filename|. Exported for unit tests.

const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {

@@ -83,17 +95,30 @@ void V4Store::ApplyUpdate(

UpdatedStoreReadyCallback callback) {

std::unique_ptr<V4Store> new_store(

new V4Store(this->task_runner_, this->store_path_));

new_store->state_ = response->new_client_state();

// TODO(vakh):

// 1. Merge the old store and the new update in new_store.

// 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and

// response_type as FULL_UPDATE, and write that to disk.

// 3. Remove this if condition after completing 1. and 2.

- if (response->has_response_type() &&

- response->response_type() == ListUpdateResponse::FULL_UPDATE) {

+ if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {

+ for (const auto& removal : response->removals()) {

+ // TODO(vakh): Allow other compression types.

+ // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

+ DCHECK_EQ(RAW, removal.compression_type());

+ }

+ HashPrefixMap additions_map;

+ UpdateHashPrefixMapFromAdditions(response->additions(), &additions_map);

+ new_store->MergeUpdate(hash_prefix_map_, additions_map);

+ // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.

+ } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {

StoreWriteResult result = new_store->WriteToDisk(std::move(response));

RecordStoreWriteResult(result);

+ } else {

+ NOTREACHED() << "Unexpected response type: " << response->response_type();

}

// new_store is done updating, pass it to the callback.

@@ -101,6 +126,187 @@ void V4Store::ApplyUpdate(

FROM_HERE, base::Bind(callback, base::Passed(&new_store)));

}

+// static

+void V4Store::UpdateHashPrefixMapFromAdditions(

+ const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions,

+ HashPrefixMap* additions_map) {

+ for (const auto& addition : additions) {

+ // TODO(vakh): Allow other compression types.

+ // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

+ DCHECK_EQ(RAW, addition.compression_type());

+ DCHECK(addition.has_raw_hashes());

+ DCHECK(addition.raw_hashes().has_raw_hashes());

+ PrefixSize prefix_size = addition.raw_hashes().prefix_size();

+ RecordMergeResult(AddUnlumpedHashes(

+ prefix_size, addition.raw_hashes().raw_hashes(), additions_map));

+ }

+// static

+MergeResult V4Store::AddUnlumpedHashes(PrefixSize prefix_size,

+ const std::string& lumped_hashes,

+ HashPrefixMap* additions_map) {

+ if (prefix_size < kMinHashPrefixLength) {

+ NOTREACHED();

+ return PREFIX_SIZE_TOO_SMALL_FAILURE;

Nathan Parker 2016/07/12 20:35:06 You don't need any of the else's here.

vakh (use Gerrit instead) 2016/07/12 21:57:02 Done.

+ } else if (prefix_size > kMaxHashPrefixLength) {

+ NOTREACHED();

+ return PREFIX_SIZE_TOO_LARGE_FAILURE;

+ } else {

+ if (lumped_hashes.size() % prefix_size != 0) {

+ return ADDITIONS_SIZE_UNEXPECTED_FAILURE;

+ } else {

+ (*additions_map)[prefix_size] = lumped_hashes;

Nathan Parker 2016/07/12 20:35:06 To consider, or add a todo: This copies the string

Nathan Parker 2016/07/12 22:47:12 (Missed this comment) Though, I'm not sure how to

vakh (use Gerrit instead) 2016/07/13 00:29:17 Done.

+ }

+ return MERGE_SUCCESS;

+// static

+bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,

+ const CounterMap& counter_map,

+ PrefixSize* smallest_prefix_size) {

+ HashPrefix smallest_prefix;

Nathan Parker 2016/07/12 20:35:06 You could use a StringPiece here and within the lo

vakh (use Gerrit instead) 2016/07/12 21:57:02 That would break the consistency of hash prefixes

Nathan Parker 2016/07/12 22:47:12 They are small, but there is a dynamic allocation

vakh (use Gerrit instead) 2016/07/13 00:29:17 Done.

+ for (const auto& counter_pair : counter_map) {

+ PrefixSize prefix_size = counter_pair.first;

+ size_t index = counter_pair.second;

+ size_t sized_index = prefix_size * index;

+ const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);

+ if (sized_index < hash_prefixes.size()) {

+ HashPrefix this_prefix = hash_prefixes.substr(sized_index, prefix_size);

+ if (smallest_prefix.empty() || smallest_prefix > this_prefix) {

+ smallest_prefix = this_prefix;

+ *smallest_prefix_size = prefix_size;

+ }

Nathan Parker 2016/07/12 20:35:06 else dcheck

vakh (use Gerrit instead) 2016/07/12 21:57:02 That's a valid case. For instance, when one map ha

+ }

+ return !smallest_prefix.empty();

+// static

+void V4Store::InitializeCounterMap(const HashPrefixMap& hash_prefix_map,

+ CounterMap* counter_map) {

+ for (const auto& map_pair : hash_prefix_map) {

+ (*counter_map)[map_pair.first] = 0;

+ }

+// static

+HashPrefix V4Store::GetNextUnmergedPrefixForSize(

+ PrefixSize prefix_size,

+ const HashPrefixMap& hash_prefix_map,

+ const CounterMap& counter_map) {

+ HashPrefixes hash_prefixes = hash_prefix_map.at(prefix_size);

Nathan Parker 2016/07/12 20:35:06 This copies the whole string. You could use const

vakh (use Gerrit instead) 2016/07/12 21:57:03 Done.

+ size_t index_within_list = counter_map.at(prefix_size);

+ size_t sized_index = prefix_size * index_within_list;

+ return hash_prefixes.substr(sized_index, prefix_size);

+// static

+void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,

+ HashPrefixMap* prefix_map_to_update) {

+ for (const auto& pair : other_prefixes_map) {

Nathan Parker 2016/07/12 20:35:06 Add a comment about how this gets close to ideal,

vakh (use Gerrit instead) 2016/07/12 21:57:02 Updated the comment in header file (consistent wit

+ PrefixSize prefix_size = pair.first;

+ size_t prefix_length_to_add = pair.second.length();

+ HashPrefixes existing_prefixes = (*prefix_map_to_update)[prefix_size];

Nathan Parker 2016/07/12 20:35:06 const HashPrefixes&. Otherwise it makes a copy.

vakh (use Gerrit instead) 2016/07/12 21:57:02 Done.

+ size_t existing_capacity = existing_prefixes.capacity();

+ (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +

+ prefix_length_to_add);

+ }

+void V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,

+ const HashPrefixMap& additions_map) {

+ ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);

Nathan Parker 2016/07/12 20:35:06 Is the assumption that hash_prefix_map_ is empty h

vakh (use Gerrit instead) 2016/07/12 21:57:02 Good map. However, for code reuse purposes, I thin

+ ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);

+ PrefixSize next_size_for_old;

+ CounterMap old_counter_map;

+ InitializeCounterMap(old_prefixes_map, &old_counter_map);

+ bool found_in_old = GetNextSmallestPrefixSize(

+ old_prefixes_map, old_counter_map, &next_size_for_old);

+ PrefixSize next_size_for_additions;

+ CounterMap additions_counter_map;

+ InitializeCounterMap(additions_map, &additions_counter_map);

+ bool found_in_additions = GetNextSmallestPrefixSize(

+ additions_map, additions_counter_map, &next_size_for_additions);

+ // Classical merge sort.

+ // The two constructs to merge are maps: old_prefixes_map, additions_map.

+ // At least one of the maps still has elements that need to be merged into the

+ // new store.

+ while (found_in_old || found_in_additions) {

+ // Both maps still have elements that need to be merged into the new store.

+ if (found_in_old && found_in_additions) {

+ // Get the lexographically smallest hash prefix from the old store.

+ HashPrefix next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

+ next_size_for_old, old_prefixes_map, old_counter_map);

+ // Get the lexographically smallest hash prefix from the additions in the

+ // latest update from the server.

+ HashPrefix next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(

+ next_size_for_additions, additions_map, additions_counter_map);

+ // If the smallest unmerged hash prefix in old store is smaller than that

+ // in the update, add that to the new store.

+ if (next_smallest_prefix_old < next_smallest_prefix_additions) {

+ hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old;

+ // Update the counter map, which means that we have merged one hash

+ // prefix of size |next_size_for_old| from the old store.

+ old_counter_map[next_size_for_old]++;

+ // Find the next smallest unmerged element in the old store's map.

+ found_in_old = GetNextSmallestPrefixSize(

+ old_prefixes_map, old_counter_map, &next_size_for_old);

+ } else {

+ // This means that the smallest unmerged hash prefix in the update was

+ // smaller than that in the old store, so add that hash prefix (from the

+ // update) into the new store.

+ hash_prefix_map_[next_size_for_additions] +=

+ next_smallest_prefix_additions;

+ // Update the counter map, which means that we have merged one hash

+ // prefix of size |next_size_for_additions| from the update.

+ additions_counter_map[next_size_for_additions]++;

+ // Find the next smallest unmerged element in the additions map.

+ found_in_additions = GetNextSmallestPrefixSize(

+ additions_map, additions_counter_map, &next_size_for_additions);

+ }

+ } else {

+ // At least one of the maps has become empty.

+ if (!found_in_old) {

+ // The map for the old store has been completely merged. Now only merge

+ // the hash prefixes from the additions map.

+ HashPrefix next_smallest_prefix_additions =

+ GetNextUnmergedPrefixForSize(next_size_for_additions, additions_map,

+ additions_counter_map);

+ hash_prefix_map_[next_size_for_additions] +=

+ next_smallest_prefix_additions;

+ additions_counter_map[next_size_for_additions]++;

+ found_in_additions = GetNextSmallestPrefixSize(

+ additions_map, additions_counter_map, &next_size_for_additions);

+ } else {

+ // The additions map has been completely merged. Now only merge

+ // the hash prefixes from the old store map.

+ HashPrefix next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

+ next_size_for_old, old_prefixes_map, old_counter_map);

+ hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old;

+ old_counter_map[next_size_for_old]++;

+ found_in_old = GetNextSmallestPrefixSize(

+ old_prefixes_map, old_counter_map, &next_size_for_old);

+ }

StoreReadResult V4Store::ReadFromDisk() {

DCHECK(task_runner_->RunsTasksOnCurrentThread());

« no previous file with comments | « components/safe_browsing_db/v4_store.h ('k') | components/safe_browsing_db/v4_store_unittest.cc » ('j') | no next file with comments »