 Chromium Code Reviews
 Chromium Code Reviews Issue 2961723003:
  [USS] Implement ApplySyncChanges and OnURLVisited/Modified/Deleted.  (Closed)
    
  
    Issue 2961723003:
  [USS] Implement ApplySyncChanges and OnURLVisited/Modified/Deleted.  (Closed) 
  | Index: components/history/core/browser/typed_url_sync_bridge.cc | 
| diff --git a/components/history/core/browser/typed_url_sync_bridge.cc b/components/history/core/browser/typed_url_sync_bridge.cc | 
| index 150ae7c50581b5cc8074b7a278843c15b29785aa..a7b25468d90b9d1649b4c6fd26875d6acafe0935 100644 | 
| --- a/components/history/core/browser/typed_url_sync_bridge.cc | 
| +++ b/components/history/core/browser/typed_url_sync_bridge.cc | 
| @@ -36,6 +36,14 @@ static const int kMaxTypedUrlVisits = 100; | 
| // RELOAD visits, which will be stripped. | 
| static const int kMaxVisitsToFetch = 1000; | 
| +// This is the threshold at which we start throttling sync updates for typed | 
| +// URLs - any URLs with a typed_count >= this threshold will be throttled. | 
| +static const int kTypedUrlVisitThrottleThreshold = 10; | 
| + | 
| +// This is the multiple we use when throttling sync updates. If the multiple is | 
| +// N, we sync up every Nth update (i.e. when typed_count % N == 0). | 
| +static const int kTypedUrlVisitThrottleMultiple = 10; | 
| + | 
| // Enforce oldest to newest visit order. | 
| static bool CheckVisitOrdering(const VisitVector& visits) { | 
| int64_t previous_visit_time = 0; | 
| @@ -51,6 +59,7 @@ static bool CheckVisitOrdering(const VisitVector& visits) { | 
| } | 
| std::string GetStorageKeyFromURLRow(const URLRow& row) { | 
| + DCHECK_NE(row.id(), 0); | 
| std::string storage_key(sizeof(row.id()), 0); | 
| base::WriteBigEndian<URLID>(&storage_key[0], row.id()); | 
| return storage_key; | 
| @@ -98,7 +107,7 @@ base::Optional<ModelError> TypedURLSyncBridge::MergeSyncData( | 
| DCHECK(sequence_checker_.CalledOnValidSequence()); | 
| // Create a mapping of all local data by URLID. These will be narrowed down | 
| - // by CreateOrUpdateUrl() to include only the entries different from sync | 
| + // by MergeURLWithSync() to include only the entries different from sync | 
| // server data. | 
| TypedURLMap new_db_urls; | 
| @@ -116,7 +125,7 @@ base::Optional<ModelError> TypedURLSyncBridge::MergeSyncData( | 
| TypedURLVisitVector new_synced_visits; | 
| // Iterate through entity_data and check for all the urls that | 
| - // sync already knows about. CreateOrUpdateUrl() will remove urls that | 
| + // sync already knows about. MergeURLWithSync() will remove urls that | 
| // are the same as the synced ones from |new_db_urls|. | 
| for (const EntityChange& entity_change : entity_data) { | 
| DCHECK(entity_change.data().specifics.has_typed_url()); | 
| @@ -137,9 +146,9 @@ base::Optional<ModelError> TypedURLSyncBridge::MergeSyncData( | 
| continue; | 
| } | 
| - UpdateUrlFromServer(specifics, &new_db_urls, &local_visit_vectors, | 
| - &new_synced_urls, &new_synced_visits, | 
| - &updated_synced_urls); | 
| + MergeURLWithSync(specifics, &new_db_urls, &local_visit_vectors, | 
| + &new_synced_urls, &new_synced_visits, | 
| + &updated_synced_urls); | 
| } | 
| for (const auto& kv : new_db_urls) { | 
| @@ -184,14 +193,82 @@ base::Optional<ModelError> TypedURLSyncBridge::ApplySyncChanges( | 
| std::unique_ptr<MetadataChangeList> metadata_change_list, | 
| EntityChangeList entity_changes) { | 
| DCHECK(sequence_checker_.CalledOnValidSequence()); | 
| - NOTIMPLEMENTED(); | 
| + DCHECK(sync_metadata_database_); | 
| + | 
| + std::vector<GURL> pending_deleted_urls; | 
| + TypedURLVisitVector new_synced_visits; | 
| + history::VisitVector deleted_visits; | 
| + history::URLRows updated_synced_urls; | 
| + history::URLRows new_synced_urls; | 
| + | 
| + for (const EntityChange& entity_change : entity_changes) { | 
| 
pavely
2017/07/06 19:28:29
You need to call UpdateStorageKey for ACTION_ADD s
 
Gang Wu
2017/07/10 19:53:25
Done.
 | 
| + if (entity_change.type() == EntityChange::ACTION_DELETE) { | 
| + URLRow url_row; | 
| + int64_t url_id = sync_metadata_database_->StorageKeyToURLID( | 
| + entity_change.storage_key()); | 
| + if (!history_backend_->GetURLByID(url_id, &url_row)) { | 
| + // Ignoring the case which no matching URLRow with URLID |url_id|. | 
| + continue; | 
| + } | 
| + if (url_row.url().is_empty()) { | 
| + // Ignoring empty URL in sync DB; | 
| 
pavely
2017/07/06 19:28:29
url_row comes from history, not sync db.
Dot at th
 
Gang Wu
2017/07/10 19:53:25
Done.
 | 
| + continue; | 
| + } | 
| + | 
| + pending_deleted_urls.push_back(url_row.url()); | 
| + continue; | 
| + } | 
| + | 
| + DCHECK(entity_change.data().specifics.has_typed_url()); | 
| + const TypedUrlSpecifics& specifics = | 
| + entity_change.data().specifics.typed_url(); | 
| + | 
| + GURL url(specifics.url()); | 
| + | 
| + if (ShouldIgnoreUrl(url)) | 
| + continue; | 
| + | 
| + DCHECK(specifics.visits_size()); | 
| + sync_pb::TypedUrlSpecifics filtered_url = FilterExpiredVisits(specifics); | 
| + if (filtered_url.visits_size() == 0) | 
| + continue; | 
| + | 
| + UpdateFromSync(filtered_url, &new_synced_visits, &deleted_visits, | 
| + &updated_synced_urls, &new_synced_urls); | 
| + } | 
| + | 
| + WriteToHistoryBackend(&new_synced_urls, &updated_synced_urls, | 
| + &pending_deleted_urls, &new_synced_visits, | 
| + &deleted_visits); | 
| return {}; | 
| } | 
| void TypedURLSyncBridge::GetData(StorageKeyList storage_keys, | 
| DataCallback callback) { | 
| DCHECK(sequence_checker_.CalledOnValidSequence()); | 
| - NOTIMPLEMENTED(); | 
| + DCHECK(sync_metadata_database_); | 
| + | 
| + auto batch = base::MakeUnique<MutableDataBatch>(); | 
| + for (const std::string& key : storage_keys) { | 
| + URLRow url_row; | 
| + URLID url_id = sync_metadata_database_->StorageKeyToURLID(key); | 
| + | 
| + ++num_db_accesses_; | 
| + if (!history_backend_->GetURLByID(url_id, &url_row)) { | 
| + // Ignoring the case which no matching URLRow with URLID |url_id|. | 
| 
pavely
2017/07/06 19:28:29
which => with?
 
Gang Wu
2017/07/10 19:53:24
Done.
 | 
| + continue; | 
| 
pavely
2017/07/06 19:28:29
Now that sync metadata lives in history db not fin
 
Gang Wu
2017/07/10 19:53:24
Done.
 | 
| + } | 
| + if (url_row.url().is_empty()) { | 
| + // Ignoring empty URL in sync DB; | 
| 
pavely
2017/07/06 19:28:29
Why do we expect and ignore rows with empty urls f
 
Gang Wu
2017/07/10 19:53:24
Done.
 | 
| + continue; | 
| + } | 
| + | 
| + VisitVector visits_vector; | 
| + FixupURLAndGetVisits(&url_row, &visits_vector); | 
| + batch->Put(key, CreateEntityData(url_row, visits_vector)); | 
| + } | 
| + | 
| + callback.Run(std::move(batch)); | 
| } | 
| void TypedURLSyncBridge::GetAllData(DataCallback callback) { | 
| @@ -245,14 +322,37 @@ void TypedURLSyncBridge::OnURLVisited(history::HistoryBackend* history_backend, | 
| const history::RedirectList& redirects, | 
| base::Time visit_time) { | 
| DCHECK(sequence_checker_.CalledOnValidSequence()); | 
| - NOTIMPLEMENTED(); | 
| + | 
| + if (!change_processor()) | 
| + return; // Sync processor not yet initialized, don't sync. | 
| 
pavely
2017/07/06 19:28:29
change_processor() always returns valid pointer. Y
 
Gang Wu
2017/07/10 19:53:25
Done.
 | 
| + if (!ShouldSyncVisit(row.typed_count(), transition)) | 
| + return; | 
| + | 
| + std::unique_ptr<MetadataChangeList> metadata_change_list = | 
| + CreateMetadataChangeList(); | 
| + | 
| + UpdateSyncFromLocal(row, metadata_change_list.get()); | 
| } | 
| void TypedURLSyncBridge::OnURLsModified( | 
| history::HistoryBackend* history_backend, | 
| const history::URLRows& changed_urls) { | 
| DCHECK(sequence_checker_.CalledOnValidSequence()); | 
| - NOTIMPLEMENTED(); | 
| + | 
| + if (!change_processor()) | 
| + return; // Sync processor not yet initialized, don't sync. | 
| + | 
| + std::unique_ptr<MetadataChangeList> metadata_change_list = | 
| + CreateMetadataChangeList(); | 
| + | 
| + for (const auto& row : changed_urls) { | 
| + // Only care if the modified URL is typed. | 
| + if (row.typed_count() >= 0) { | 
| + // If there were any errors updating the sync node, just ignore them and | 
| + // continue on to process the next URL. | 
| + UpdateSyncFromLocal(row, metadata_change_list.get()); | 
| + } | 
| + } | 
| } | 
| void TypedURLSyncBridge::OnURLsDeleted(history::HistoryBackend* history_backend, | 
| @@ -261,7 +361,40 @@ void TypedURLSyncBridge::OnURLsDeleted(history::HistoryBackend* history_backend, | 
| const history::URLRows& deleted_rows, | 
| const std::set<GURL>& favicon_urls) { | 
| DCHECK(sequence_checker_.CalledOnValidSequence()); | 
| - NOTIMPLEMENTED(); | 
| + if (!change_processor()) | 
| + return; // Sync processor not yet initialized, don't sync. | 
| + | 
| + // Ignore URLs expired due to old age (we don't want to sync them as deletions | 
| + // to avoid extra traffic up to the server, and also to make sure that a | 
| + // client with a bad clock setting won't go on an expiration rampage and | 
| + // delete all history from every client). The server will gracefully age out | 
| + // the sync DB entries when they've been idle for long enough. | 
| + if (expired) | 
| + return; | 
| + | 
| + std::unique_ptr<MetadataChangeList> metadata_change_list = | 
| + CreateMetadataChangeList(); | 
| + | 
| + if (all_history) { | 
| + auto batch = base::MakeUnique<syncer::MetadataBatch>(); | 
| + if (!sync_metadata_database_->GetAllSyncMetadata(batch.get())) { | 
| + change_processor()->ReportError(FROM_HERE, | 
| + "Failed reading typed url metadata from " | 
| + "TypedURLSyncMetadataDatabase."); | 
| + return; | 
| + } | 
| + | 
| + syncer::EntityMetadataMap metadata_map(batch->TakeAllMetadata()); | 
| + for (const auto& kv : metadata_map) { | 
| + change_processor()->Delete(kv.first, metadata_change_list.get()); | 
| + } | 
| + } else { | 
| + // Delete rows. | 
| + for (const auto& row : deleted_rows) { | 
| + std::string storage_key = GetStorageKeyFromURLRow(row); | 
| + change_processor()->Delete(storage_key, metadata_change_list.get()); | 
| + } | 
| + } | 
| } | 
| void TypedURLSyncBridge::Init() { | 
| @@ -484,6 +617,54 @@ TypedURLSyncBridge::MergeResult TypedURLSyncBridge::MergeUrls( | 
| return different; | 
| } | 
| +// static | 
| +void TypedURLSyncBridge::DiffVisits( | 
| + const history::VisitVector& history_visits, | 
| + const sync_pb::TypedUrlSpecifics& sync_specifics, | 
| + std::vector<history::VisitInfo>* new_visits, | 
| + history::VisitVector* removed_visits) { | 
| + DCHECK(new_visits); | 
| + size_t old_visit_count = history_visits.size(); | 
| + size_t new_visit_count = sync_specifics.visits_size(); | 
| + size_t old_index = 0; | 
| + size_t new_index = 0; | 
| + while (old_index < old_visit_count && new_index < new_visit_count) { | 
| + base::Time new_visit_time = | 
| + base::Time::FromInternalValue(sync_specifics.visits(new_index)); | 
| + if (history_visits[old_index].visit_time < new_visit_time) { | 
| + if (new_index > 0 && removed_visits) { | 
| + // If there are visits missing from the start of the node, that | 
| + // means that they were probably clipped off due to our code that | 
| + // limits the size of the sync nodes - don't delete them from our | 
| + // local history. | 
| + removed_visits->push_back(history_visits[old_index]); | 
| + } | 
| + ++old_index; | 
| + } else if (history_visits[old_index].visit_time > new_visit_time) { | 
| + new_visits->push_back(history::VisitInfo( | 
| + new_visit_time, ui::PageTransitionFromInt( | 
| + sync_specifics.visit_transitions(new_index)))); | 
| + ++new_index; | 
| + } else { | 
| + ++old_index; | 
| + ++new_index; | 
| + } | 
| + } | 
| + | 
| + if (removed_visits) { | 
| + for (; old_index < old_visit_count; ++old_index) { | 
| + removed_visits->push_back(history_visits[old_index]); | 
| + } | 
| + } | 
| + | 
| + for (; new_index < new_visit_count; ++new_index) { | 
| + new_visits->push_back(history::VisitInfo( | 
| + base::Time::FromInternalValue(sync_specifics.visits(new_index)), | 
| + ui::PageTransitionFromInt( | 
| + sync_specifics.visit_transitions(new_index)))); | 
| + } | 
| +} | 
| + | 
| // static | 
| void TypedURLSyncBridge::UpdateURLRowFromTypedUrlSpecifics( | 
| const TypedUrlSpecifics& typed_url, | 
| @@ -525,7 +706,7 @@ void TypedURLSyncBridge::ClearErrorStats() { | 
| num_db_errors_ = 0; | 
| } | 
| -void TypedURLSyncBridge::UpdateUrlFromServer( | 
| +void TypedURLSyncBridge::MergeURLWithSync( | 
| const sync_pb::TypedUrlSpecifics& server_typed_url, | 
| TypedURLMap* local_typed_urls, | 
| URLVisitVectorMap* local_visit_vectors, | 
| @@ -661,6 +842,69 @@ void TypedURLSyncBridge::UpdateUrlFromServer( | 
| } | 
| } | 
| +void TypedURLSyncBridge::UpdateFromSync( | 
| + const sync_pb::TypedUrlSpecifics& typed_url, | 
| + TypedURLVisitVector* visits_to_add, | 
| + history::VisitVector* visits_to_remove, | 
| + history::URLRows* updated_urls, | 
| + history::URLRows* new_urls) { | 
| + history::URLRow new_url(GURL(typed_url.url())); | 
| + history::VisitVector existing_visits; | 
| + bool existing_url = history_backend_->GetURL(new_url.url(), &new_url); | 
| + if (existing_url) { | 
| + // This URL already exists locally - fetch the visits so we can | 
| + // merge them below. | 
| + if (!FixupURLAndGetVisits(&new_url, &existing_visits)) { | 
| + return; | 
| + } | 
| + } | 
| + visits_to_add->push_back(std::pair<GURL, std::vector<history::VisitInfo>>( | 
| + new_url.url(), std::vector<history::VisitInfo>())); | 
| + | 
| + // Update the URL with information from the typed URL. | 
| + UpdateURLRowFromTypedUrlSpecifics(typed_url, &new_url); | 
| + | 
| + // Figure out which visits we need to add. | 
| + DiffVisits(existing_visits, typed_url, &visits_to_add->back().second, | 
| + visits_to_remove); | 
| + | 
| + if (existing_url) { | 
| + updated_urls->push_back(new_url); | 
| + } else { | 
| + new_urls->push_back(new_url); | 
| + } | 
| +} | 
| + | 
| +bool TypedURLSyncBridge::UpdateSyncFromLocal( | 
| + URLRow row, | 
| + MetadataChangeList* metadata_change_list) { | 
| + DCHECK_GE(row.typed_count(), 0); | 
| + | 
| + if (ShouldIgnoreUrl(row.url())) | 
| + return false; | 
| + | 
| + // Get the visits for this node. | 
| + VisitVector visit_vector; | 
| + if (!FixupURLAndGetVisits(&row, &visit_vector)) { | 
| + return false; | 
| + } | 
| + | 
| + DCHECK(!visit_vector.empty()); | 
| + | 
| + std::unique_ptr<syncer::EntityData> entity_data = | 
| + CreateEntityData(row, visit_vector); | 
| + if (!entity_data->specifics.has_typed_url()) { | 
| + // Cannot create EntityData, ex. no TYPED visits. | 
| + return false; | 
| + } | 
| + | 
| + std::string storage_key = GetStorageKeyFromURLRow(row); | 
| + change_processor()->Put(storage_key, std::move(entity_data), | 
| + metadata_change_list); | 
| + | 
| + return true; | 
| +} | 
| + | 
| base::Optional<ModelError> TypedURLSyncBridge::WriteToHistoryBackend( | 
| const history::URLRows* new_urls, | 
| const history::URLRows* updated_urls, | 
| @@ -777,6 +1021,21 @@ bool TypedURLSyncBridge::ShouldIgnoreVisits( | 
| return true; | 
| } | 
| +bool TypedURLSyncBridge::ShouldSyncVisit(int typed_count, | 
| + ui::PageTransition transition) { | 
| + // Just use an ad-hoc criteria to determine whether to ignore this | 
| + // notification. For most users, the distribution of visits is roughly a bell | 
| + // curve with a long tail - there are lots of URLs with < 5 visits so we want | 
| + // to make sure we sync up every visit to ensure the proper ordering of | 
| + // suggestions. But there are relatively few URLs with > 10 visits, and those | 
| + // tend to be more broadly distributed such that there's no need to sync up | 
| + // every visit to preserve their relative ordering. | 
| + return (ui::PageTransitionCoreTypeIs(transition, ui::PAGE_TRANSITION_TYPED) && | 
| + typed_count >= 0 && | 
| + (typed_count < kTypedUrlVisitThrottleThreshold || | 
| + (typed_count % kTypedUrlVisitThrottleMultiple) == 0)); | 
| +} | 
| + | 
| bool TypedURLSyncBridge::FixupURLAndGetVisits(URLRow* url, | 
| VisitVector* visits) { | 
| ++num_db_accesses_; | 
| @@ -855,7 +1114,6 @@ std::unique_ptr<EntityData> TypedURLSyncBridge::CreateEntityData( | 
| return base::MakeUnique<EntityData>(); | 
| 
pavely
2017/07/06 19:28:29
CreateEntityData came from TypedUrlSyncableService
 
Gang Wu
2017/07/10 19:53:24
Done.
 | 
| } | 
| entity_data->non_unique_name = row.url().spec(); | 
| - | 
| return entity_data; | 
| } |