chrome/browser/history/typed_url_syncable_service.cc - Issue 12703036: [Sync] Add interface and backend impl for typed URL syncable service

Unified Diff: chrome/browser/history/typed_url_syncable_service.cc

Issue 12703036: [Sync] Add interface and backend impl for typed URL syncable service (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/browser/history/typed_url_syncable_service.h ('K') | « chrome/browser/history/typed_url_syncable_service.h ('k') | chrome/browser/history/typed_url_syncable_service_unittest.cc » ('j') | chrome/browser/history/typed_url_syncable_service_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/browser/history/typed_url_syncable_service.cc

diff --git a/chrome/browser/history/typed_url_syncable_service.cc b/chrome/browser/history/typed_url_syncable_service.cc

new file mode 100644

index 0000000000000000000000000000000000000000..7ce913f4f9d302e573a9c8ee5e8c53b85d11c6b3

--- /dev/null

+++ b/chrome/browser/history/typed_url_syncable_service.cc

@@ -0,0 +1,439 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "chrome/browser/history/typed_url_syncable_service.h"

+#include "base/auto_reset.h"

+#include "base/logging.h"

+#include "base/metrics/histogram.h"

+#include "base/utf_string_conversions.h"

+#include "chrome/browser/history/history_backend.h"

+#include "net/base/net_util.h"

+#include "sync/protocol/sync.pb.h"

+#include "sync/protocol/typed_url_specifics.pb.h"

+namespace {

+// The server backend can't handle arbitrarily large node sizes, so to keep

+// the size under control we limit the visit array.

+static const int kMaxTypedUrlVisits = 100;

+// There's no limit on how many visits the history DB could have for a given

+// typed URL, so we limit how many we fetch from the DB to avoid crashes due to

+// running out of memory (http://crbug.com/89793). This value is different

+// from kMaxTypedUrlVisits, as some of the visits fetched from the DB may be

+// RELOAD visits, which will be stripped.

+static const int kMaxVisitsToFetch = 1000;

+// This is the threshold at which we start throttling sync updates for typed

+// URLs - any URLs with a typed_count >= this threshold will be throttled.

+static const int kTypedUrlVisitThrottleThreshold = 10;

+// This is the multiple we use when throttling sync updates. If the multiple is

+// N, we sync up every Nth update (i.e. when typed_count % N == 0).

+static const int kTypedUrlVisitThrottleMultiple = 10;

+const char kTypedUrlTag[] = "google_chrome_typed_urls";

+static bool CheckVisitOrdering(const history::VisitVector& visits) {

+ int64 previous_visit_time = 0;

+ for (history::VisitVector::const_iterator visit = visits.begin();

+ visit != visits.end(); ++visit) {

+ if (visit != visits.begin()) {

+ // We allow duplicate visits here - they shouldn't really be allowed, but

+ // they still seem to show up sometimes and we haven't figured out the

+ // source, so we just log an error instead of failing an assertion.

+ // (http://crbug.com/91473).

+ if (previous_visit_time == visit->visit_time.ToInternalValue())

+ DVLOG(1) << "Duplicate visit time encountered";

+ else if (previous_visit_time > visit->visit_time.ToInternalValue())

+ return false;

+ }

+ previous_visit_time = visit->visit_time.ToInternalValue();

+ }

+ return true;

+TypedUrlSyncableService::TypedUrlSyncableService(

+ history::HistoryBackend* history_backend)

+ : history_backend_(history_backend),

+ processing_syncer_changes_(false),

+ expected_loop_(MessageLoop::current()) {

+ DCHECK(history_backend_);

+ DCHECK(expected_loop_ == MessageLoop::current());

+TypedUrlSyncableService::~TypedUrlSyncableService() {

+ DCHECK(expected_loop_ == MessageLoop::current());

+syncer::SyncMergeResult TypedUrlSyncableService::MergeDataAndStartSyncing(

+ syncer::ModelType type,

+ const syncer::SyncDataList& initial_sync_data,

+ scoped_ptr<syncer::SyncChangeProcessor> sync_processor,

+ scoped_ptr<syncer::SyncErrorFactory> error_handler) {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ DCHECK(!sync_processor_.get());

+ DCHECK(sync_processor.get());

+ DCHECK(error_handler.get());

+ DCHECK_EQ(type, syncer::TYPED_URLS);

+ syncer::SyncMergeResult merge_result(type);

+ sync_processor_ = sync_processor.Pass();

+ sync_error_handler_ = error_handler.Pass();

+ // TODO(mgist): Add implementation

+ return merge_result;

+void TypedUrlSyncableService::StopSyncing(syncer::ModelType type) {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ DCHECK_EQ(type, syncer::TYPED_URLS);

+ sync_processor_.reset();

+ sync_error_handler_.reset();

+syncer::SyncDataList TypedUrlSyncableService::GetAllSyncData(

+ syncer::ModelType type) const {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ syncer::SyncDataList list;

+ // TODO(mgist): Add implementation

+ return list;

+syncer::SyncError TypedUrlSyncableService::ProcessSyncChanges(

+ const tracked_objects::Location& from_here,

+ const syncer::SyncChangeList& change_list) {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ // TODO(mgist): Add implementation

+ return syncer::SyncError(FROM_HERE,

+ "Typed url syncable service is not implemented.",

+ syncer::TYPED_URLS);

+void TypedUrlSyncableService::OnUrlsModified(

+ history::URLsModifiedDetails* details) {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ DCHECK(details);

+ if (processing_syncer_changes_)

+ return; // These are changes originating from us, ignore.

+ if (!sync_processor_.get())

+ return; // Sync processor not yet initialized, don't sync.

+ // Create SyncChangeList

+ syncer::SyncChangeList changes;

+ for (history::URLRows::iterator url = details->changed_urls.begin();

+ url != details->changed_urls.end(); ++url) {

+ // Only care if the modified URL is typed

+ if (url->typed_count() > 0) {

+ // If there were any errors updating the sync node, just ignore them and

+ // continue on to process the next URL.

+ CreateOrUpdateSyncNode(*url, &changes);

+ }

+ // Send SyncChangeList to server if there are any changes

+ if (changes.size() > 0)

+ sync_processor_->ProcessSyncChanges(FROM_HERE, changes);

+void TypedUrlSyncableService::OnUrlVisited(

+ history::URLVisitedDetails* details) {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ DCHECK(details);

+ if (processing_syncer_changes_)

+ return; // These are changes originating from us, ignore.

+ if (!sync_processor_.get())

+ return; // Sync processor not yet initialized, don't sync.

+ if (!ShouldSyncVisit(details))

+ return;

+ // Create SyncChangeList

+ syncer::SyncChangeList changes;

+ CreateOrUpdateSyncNode(details->row, &changes);

+ // Send SyncChangeList to server if there are any changes

+ if (changes.size() > 0)

+ sync_processor_->ProcessSyncChanges(FROM_HERE, changes);

+void TypedUrlSyncableService::OnUrlsDeleted(

+ history::URLsDeletedDetails* details) {

+ DCHECK(expected_loop_ == MessageLoop::current());

+ DCHECK(details);

+ if (processing_syncer_changes_)

+ return; // These are changes originating from us, ignore.

+ if (!sync_processor_.get())

+ return; // Sync processor not yet initialized, don't sync.

+ // Ignore archivals (we don't want to sync them as deletions, to avoid

+ // extra traffic up to the server, and also to make sure that a client with

+ // a bad clock setting won't go on an archival rampage and delete all

+ // history from every client). The server will gracefully age out the sync DB

+ // entries when they've been idle for long enough.

+ if (details->archived)

+ return;

+ // Create SyncChangeList

+ syncer::SyncChangeList changes;

+ if (details->all_history) {

+ // Delete all synced typed urls

+ for (std::set<GURL>::const_iterator url = synced_typed_urls_.begin();

+ url != synced_typed_urls_.end(); ++url) {

+ history::VisitVector visits;

+ history::URLRow row(*url);

+ AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,

+ row, visits, url->spec(), &changes);

+ }

+ // Clear cache of server state

+ synced_typed_urls_.clear();

+ } else {

+ DCHECK(!details->rows.empty());

+ // Delete rows in details

+ for (history::URLRows::const_iterator row = details->rows.begin();

+ row != details->rows.end(); ++row) {

+ // Add specifics to change list for all synced urls that were deleted

+ if (synced_typed_urls_.find(row->url()) != synced_typed_urls_.end()) {

+ history::VisitVector visits;

+ AddTypedUrlToChangeList(syncer::SyncChange::ACTION_DELETE,

+ *row, visits, row->url().spec(), &changes);

+ // Delete typed url from cache

+ synced_typed_urls_.erase(row->url());

+ }

+ // Send SyncChangeList to server if there are any changes

+ if (changes.size() > 0)

+ sync_processor_->ProcessSyncChanges(FROM_HERE, changes);

+bool TypedUrlSyncableService::ShouldIgnoreUrl(const GURL& url) {

+ // Ignore empty URLs. Not sure how this can happen (maybe import from other

+ // busted browsers, or misuse of the history API, or just plain bugs) but we

+ // can't deal with them.

+ if (url.spec().empty())

+ return true;

+ // Ignore local file URLs.

+ if (url.SchemeIsFile())

+ return true;

+ // Ignore localhost URLs.

+ if (net::IsLocalhost(url.host()))

+ return true;

+ return false;

+bool TypedUrlSyncableService::ShouldSyncVisit(

+ history::URLVisitedDetails* details) {

+ int typed_count = details->row.typed_count();

+ content::PageTransition transition = static_cast<content::PageTransition>(

+ details->transition & content::PAGE_TRANSITION_CORE_MASK);

+ // Just use an ad-hoc criteria to determine whether to ignore this

+ // notification. For most users, the distribution of visits is roughly a bell

+ // curve with a long tail - there are lots of URLs with < 5 visits so we want

+ // to make sure we sync up every visit to ensure the proper ordering of

+ // suggestions. But there are relatively few URLs with > 10 visits, and those

+ // tend to be more broadly distributed such that there's no need to sync up

+ // every visit to preserve their relative ordering.

+ return (transition == content::PAGE_TRANSITION_TYPED &&

+ typed_count > 0 &&

+ (typed_count < kTypedUrlVisitThrottleThreshold ||

+ (typed_count % kTypedUrlVisitThrottleMultiple) == 0));

+bool TypedUrlSyncableService::CreateOrUpdateSyncNode(

+ history::URLRow url,

+ syncer::SyncChangeList* changes) {

+ DCHECK_GT(url.typed_count(), 0);

+ if (ShouldIgnoreUrl(url.url()))

+ return true;

+ // Get the visits for this node.

+ history::VisitVector visit_vector;

+ if (!FixupURLAndGetVisits(&url, &visit_vector)) {

+ DLOG(ERROR) << "Could not load visits for url: " << url.url();

+ return false;

+ }

+ DCHECK(!visit_vector.empty());

+ std::string title = url.url().spec();

+ syncer::SyncChange::SyncChangeType change_type;

+ // If server already has URL, then send a sync update, else add it

+ change_type =

+ (synced_typed_urls_.find(url.url()) != synced_typed_urls_.end()) ?

+ syncer::SyncChange::ACTION_UPDATE :

+ syncer::SyncChange::ACTION_ADD;

Andrew T Wilson (Slow) 2013/04/09 12:48:30 Is there a better way to differentiate between add

+ // Ensure cache of server state is up to date

+ synced_typed_urls_.insert(url.url());

+ AddTypedUrlToChangeList(change_type, url, visit_vector, title, changes);

+ return true;

+void TypedUrlSyncableService::AddTypedUrlToChangeList(

+ syncer::SyncChange::SyncChangeType change_type,

+ const history::URLRow& row,

+ const history::VisitVector& visits,

+ std::string title,

+ syncer::SyncChangeList* change_list) {

+ sync_pb::EntitySpecifics entity_specifics;

+ sync_pb::TypedUrlSpecifics* typed_url = entity_specifics.mutable_typed_url();

+ if (change_type == syncer::SyncChange::ACTION_DELETE) {

+ typed_url->set_url(row.url().spec());

Andrew T Wilson (Slow) 2013/04/09 12:48:30 Why do we special-case ACTION_DELETE here?

+ } else {

+ WriteToTypedUrlSpecifics(row, visits, typed_url);

+ }

+ change_list->push_back(

+ syncer::SyncChange(FROM_HERE, change_type,

+ syncer::SyncData::CreateLocalData(

+ kTypedUrlTag, title, entity_specifics)));

+void TypedUrlSyncableService::WriteToTypedUrlSpecifics(

+ const history::URLRow& url,

+ const history::VisitVector& visits,

+ sync_pb::TypedUrlSpecifics* typed_url) {

+ DCHECK(!url.last_visit().is_null());

+ DCHECK(!visits.empty());

+ DCHECK_EQ(url.last_visit().ToInternalValue(),

+ visits.back().visit_time.ToInternalValue());

+ typed_url->set_url(url.url().spec());

+ typed_url->set_title(UTF16ToUTF8(url.title()));

+ typed_url->set_hidden(url.hidden());

+ DCHECK(CheckVisitOrdering(visits));

+ bool only_typed = false;

+ int skip_count = 0;

+ if (visits.size() > static_cast<size_t>(kMaxTypedUrlVisits)) {

+ int typed_count = 0;

+ int total = 0;

+ // Walk the passed-in visit vector and count the # of typed visits.

+ for (history::VisitVector::const_iterator visit = visits.begin();

+ visit != visits.end(); ++visit) {

+ content::PageTransition transition = content::PageTransitionFromInt(

+ visit->transition & content::PAGE_TRANSITION_CORE_MASK);

+ // We ignore reload visits.

+ if (transition == content::PAGE_TRANSITION_RELOAD)

+ continue;

+ ++total;

+ if (transition == content::PAGE_TRANSITION_TYPED)

+ ++typed_count;

+ }

+ // We should have at least one typed visit. This can sometimes happen if

+ // the history DB has an inaccurate count for some reason (there's been

+ // bugs in the history code in the past which has left users in the wild

+ // with incorrect counts - http://crbug.com/84258).

+ DCHECK(typed_count > 0);

+ if (typed_count > kMaxTypedUrlVisits) {

+ only_typed = true;

+ skip_count = typed_count - kMaxTypedUrlVisits;

+ } else if (total > kMaxTypedUrlVisits) {

+ skip_count = total - kMaxTypedUrlVisits;

+ }

+ for (history::VisitVector::const_iterator visit = visits.begin();

+ visit != visits.end(); ++visit) {

+ content::PageTransition transition = content::PageTransitionFromInt(

+ visit->transition & content::PAGE_TRANSITION_CORE_MASK);

+ // Skip reload visits.

+ if (transition == content::PAGE_TRANSITION_RELOAD)

+ continue;

+ // If we only have room for typed visits, then only add typed visits.

+ if (only_typed && transition != content::PAGE_TRANSITION_TYPED)

+ continue;

+ if (skip_count > 0) {

+ // We have too many entries to fit, so we need to skip the oldest ones.

+ // Only skip typed URLs if there are too many typed URLs to fit.

+ if (only_typed || transition != content::PAGE_TRANSITION_TYPED) {

+ --skip_count;

+ continue;

+ }

+ typed_url->add_visits(visit->visit_time.ToInternalValue());

+ typed_url->add_visit_transitions(visit->transition);

+ }

+ DCHECK_EQ(skip_count, 0);

+ if (typed_url->visits_size() == 0) {

+ // If we get here, it's because we don't actually have any TYPED visits

+ // even though the visit's typed_count > 0 (corrupted typed_count). So

+ // let's go ahead and add a RELOAD visit at the most recent visit since

+ // it's not legal to have an empty visit array (yet another workaround

+ // for http://crbug.com/84258).

+ typed_url->add_visits(url.last_visit().ToInternalValue());

+ typed_url->add_visit_transitions(content::PAGE_TRANSITION_RELOAD);

+ }

+ CHECK_GT(typed_url->visits_size(), 0);

+ CHECK_LE(typed_url->visits_size(), kMaxTypedUrlVisits);

+ CHECK_EQ(typed_url->visits_size(), typed_url->visit_transitions_size());

+bool TypedUrlSyncableService::FixupURLAndGetVisits(

+ history::URLRow* url,

+ history::VisitVector* visits) {

+ ++num_db_accesses_;

+ CHECK(history_backend_);

+ if (!history_backend_->GetMostRecentVisitsForURL(

+ url->id(), kMaxVisitsToFetch, visits)) {

+ ++num_db_errors_;

+ return false;

+ }

+ // Sometimes (due to a bug elsewhere in the history or sync code, or due to

+ // a crash between adding a URL to the history database and updating the

+ // visit DB) the visit vector for a URL can be empty. If this happens, just

+ // create a new visit whose timestamp is the same as the last_visit time.

+ // This is a workaround for http://crbug.com/84258.

+ if (visits->empty()) {

+ DVLOG(1) << "Found empty visits for URL: " << url->url();

+ history::VisitRow visit(

+ url->id(), url->last_visit(), 0, content::PAGE_TRANSITION_TYPED, 0);

+ visits->push_back(visit);

+ }

+ // GetMostRecentVisitsForURL() returns the data in the opposite order that

+ // we need it, so reverse it.

+ std::reverse(visits->begin(), visits->end());

+ // Sometimes, the last_visit field in the URL doesn't match the timestamp of

+ // the last visit in our visit array (they come from different tables, so

+ // crashes/bugs can cause them to mismatch), so just set it here.

+ url->set_last_visit(visits->back().visit_time);

+ DCHECK(CheckVisitOrdering(*visits));

+ return true;