Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(40)

Unified Diff: chrome/browser/predictors/resource_prefetch_predictor.cc

Issue 2321343002: Redirect handling in resource prefetch predictor (Closed)
Patch Set: Redirects database schema changed Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/predictors/resource_prefetch_predictor.cc
diff --git a/chrome/browser/predictors/resource_prefetch_predictor.cc b/chrome/browser/predictors/resource_prefetch_predictor.cc
index 110810f90de73cfedf34f8df57c6f6c38887df39..bc9450aec741110d0b81c3961c4f0c60bc56391d 100644
--- a/chrome/browser/predictors/resource_prefetch_predictor.cc
+++ b/chrome/browser/predictors/resource_prefetch_predictor.cc
@@ -72,20 +72,20 @@ namespace predictors {
class GetUrlVisitCountTask : public history::HistoryDBTask {
public:
typedef ResourcePrefetchPredictor::URLRequestSummary URLRequestSummary;
- typedef base::Callback<void(
- size_t, // Visit count.
- const NavigationID&,
- const std::vector<URLRequestSummary>&)> VisitInfoCallback;
-
- GetUrlVisitCountTask(
- const NavigationID& navigation_id,
- std::vector<URLRequestSummary>* requests,
- VisitInfoCallback callback)
+ typedef ResourcePrefetchPredictor::Navigation Navigation;
+ typedef base::Callback<void(size_t, // Visit count.
+ const NavigationID&,
+ const Navigation&)>
+ VisitInfoCallback;
+
+ GetUrlVisitCountTask(const NavigationID& navigation_id,
+ std::unique_ptr<Navigation> navigation,
+ VisitInfoCallback callback)
: visit_count_(0),
navigation_id_(navigation_id),
- requests_(requests),
+ navigation_(std::move(navigation)),
callback_(callback) {
- DCHECK(requests_.get());
+ DCHECK(navigation_.get());
}
bool RunOnDBThread(history::HistoryBackend* backend,
@@ -97,7 +97,7 @@ class GetUrlVisitCountTask : public history::HistoryDBTask {
}
void DoneRunOnMainThread() override {
- callback_.Run(visit_count_, navigation_id_, *requests_);
+ callback_.Run(visit_count_, navigation_id_, *navigation_);
}
private:
@@ -105,7 +105,7 @@ class GetUrlVisitCountTask : public history::HistoryDBTask {
int visit_count_;
NavigationID navigation_id_;
- std::unique_ptr<std::vector<URLRequestSummary>> requests_;
+ std::unique_ptr<Navigation> navigation_;
VisitInfoCallback callback_;
DISALLOW_COPY_AND_ASSIGN(GetUrlVisitCountTask);
@@ -281,6 +281,11 @@ ResourcePrefetchPredictor::URLRequestSummary::URLRequestSummary(
ResourcePrefetchPredictor::URLRequestSummary::~URLRequestSummary() {
}
+ResourcePrefetchPredictor::Navigation::Navigation(const GURL& i_initial_url)
+ : initial_url(i_initial_url) {}
+
+ResourcePrefetchPredictor::Navigation::~Navigation() {}
+
// static
bool ResourcePrefetchPredictor::URLRequestSummary::SummarizeResponse(
const net::URLRequest& request,
@@ -437,9 +442,10 @@ void ResourcePrefetchPredictor::OnMainFrameRequest(
CleanupAbandonedNavigations(request.navigation_id);
// New empty navigation entry.
- inflight_navigations_.insert(std::make_pair(
- request.navigation_id,
- make_linked_ptr(new std::vector<URLRequestSummary>())));
+ const GURL& initial_url = request.navigation_id.main_frame_url;
+ inflight_navigations_.insert(
+ std::make_pair(request.navigation_id,
+ std::unique_ptr<Navigation>(new Navigation(initial_url))));
Benoit L 2016/09/19 13:53:47 nit: MakeUnique is preferred to this. (it's like C
}
void ResourcePrefetchPredictor::OnMainFrameResponse(
@@ -462,7 +468,14 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect(
// Stop any inflight prefetching. Remove the older navigation.
StopPrefetching(response.navigation_id);
- inflight_navigations_.erase(response.navigation_id);
+
+ std::unique_ptr<Navigation> navigation;
+ NavigationMap::iterator nav_it =
+ inflight_navigations_.find(response.navigation_id);
+ if (nav_it != inflight_navigations_.end()) {
+ navigation.reset(nav_it->second.release());
+ inflight_navigations_.erase(nav_it);
+ }
// A redirect will not lead to another OnMainFrameRequest call, so record the
// redirect url as a new navigation.
@@ -471,11 +484,14 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect(
if (response.redirect_url.is_empty())
return;
+ if (!navigation) {
+ navigation.reset(new Navigation(response.navigation_id.main_frame_url));
Benoit L 2016/09/19 13:53:47 nit: no braces when it fits on a single line (per
+ }
+
NavigationID navigation_id(response.navigation_id);
navigation_id.main_frame_url = response.redirect_url;
- inflight_navigations_.insert(std::make_pair(
- navigation_id,
- make_linked_ptr(new std::vector<URLRequestSummary>())));
+ inflight_navigations_.insert(
+ std::make_pair(navigation_id, std::move(navigation)));
}
void ResourcePrefetchPredictor::OnSubresourceResponse(
@@ -488,7 +504,7 @@ void ResourcePrefetchPredictor::OnSubresourceResponse(
return;
}
- nav_it->second->push_back(response);
+ nav_it->second->requests.push_back(response);
}
void ResourcePrefetchPredictor::OnNavigationComplete(
@@ -503,7 +519,7 @@ void ResourcePrefetchPredictor::OnNavigationComplete(
const NavigationID navigation_id(nav_it->first);
// Remove the navigation from the inflight navigations.
- std::vector<URLRequestSummary>* requests = (nav_it->second).release();
+ std::unique_ptr<Navigation> navigation = std::move(nav_it->second);
inflight_navigations_.erase(nav_it);
// Kick off history lookup to determine if we should record the URL.
@@ -513,7 +529,7 @@ void ResourcePrefetchPredictor::OnNavigationComplete(
DCHECK(history_service);
history_service->ScheduleDBTask(
std::unique_ptr<history::HistoryDBTask>(new GetUrlVisitCountTask(
- navigation_id, requests,
+ navigation_id, std::move(navigation),
base::Bind(&ResourcePrefetchPredictor::OnVisitCountLookup,
AsWeakPtr()))),
&history_lookup_consumer_);
@@ -616,29 +632,43 @@ void ResourcePrefetchPredictor::StartInitialization() {
// Create local caches using the database as loaded.
std::unique_ptr<PrefetchDataMap> url_data_map(new PrefetchDataMap());
std::unique_ptr<PrefetchDataMap> host_data_map(new PrefetchDataMap());
+ std::unique_ptr<RedirectDataMap> url_redirect_data_map(new RedirectDataMap());
+ std::unique_ptr<RedirectDataMap> host_redirect_data_map(
+ new RedirectDataMap());
PrefetchDataMap* url_data_ptr = url_data_map.get();
PrefetchDataMap* host_data_ptr = host_data_map.get();
+ RedirectDataMap* url_redirect_data_ptr = url_redirect_data_map.get();
+ RedirectDataMap* host_redirect_data_ptr = host_redirect_data_map.get();
BrowserThread::PostTaskAndReply(
BrowserThread::DB, FROM_HERE,
- base::Bind(&ResourcePrefetchPredictorTables::GetAllData,
- tables_, url_data_ptr, host_data_ptr),
+ base::Bind(&ResourcePrefetchPredictorTables::GetAllData, tables_,
Benoit L 2016/09/19 13:53:47 nit: Why not just url_data.get() here (and so on f
alexilin 2016/09/19 16:49:25 I'm still not very confident with this "passing po
+ url_data_ptr, host_data_ptr, url_redirect_data_ptr,
+ host_redirect_data_ptr),
base::Bind(&ResourcePrefetchPredictor::CreateCaches, AsWeakPtr(),
- base::Passed(&url_data_map), base::Passed(&host_data_map)));
+ base::Passed(&url_data_map), base::Passed(&host_data_map),
+ base::Passed(&url_redirect_data_map),
+ base::Passed(&host_redirect_data_map)));
}
void ResourcePrefetchPredictor::CreateCaches(
std::unique_ptr<PrefetchDataMap> url_data_map,
- std::unique_ptr<PrefetchDataMap> host_data_map) {
+ std::unique_ptr<PrefetchDataMap> host_data_map,
+ std::unique_ptr<RedirectDataMap> url_redirect_data_map,
+ std::unique_ptr<RedirectDataMap> host_redirect_data_map) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK_EQ(INITIALIZING, initialization_state_);
DCHECK(!url_table_cache_);
DCHECK(!host_table_cache_);
+ DCHECK(!url_redirect_table_cache_);
+ DCHECK(!host_redirect_table_cache_);
DCHECK(inflight_navigations_.empty());
url_table_cache_.reset(url_data_map.release());
host_table_cache_.reset(host_data_map.release());
+ url_redirect_table_cache_.reset(url_redirect_data_map.release());
+ host_redirect_table_cache_.reset(host_redirect_data_map.release());
UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.UrlTableMainFrameUrlCount",
url_table_cache_->size());
@@ -715,11 +745,10 @@ void ResourcePrefetchPredictor::DeleteUrls(const history::URLRows& urls) {
}
if (!urls_to_delete.empty() || !hosts_to_delete.empty()) {
- BrowserThread::PostTask(BrowserThread::DB, FROM_HERE,
- base::Bind(&ResourcePrefetchPredictorTables::DeleteData,
- tables_,
- urls_to_delete,
- hosts_to_delete));
+ BrowserThread::PostTask(
+ BrowserThread::DB, FROM_HERE,
+ base::Bind(&ResourcePrefetchPredictorTables::DeleteResourceData,
+ tables_, urls_to_delete, hosts_to_delete));
}
}
@@ -740,22 +769,48 @@ void ResourcePrefetchPredictor::RemoveOldestEntryInPrefetchDataMap(
}
data_map->erase(key_to_delete);
- BrowserThread::PostTask(BrowserThread::DB, FROM_HERE,
- base::Bind(&ResourcePrefetchPredictorTables::DeleteSingleDataPoint,
- tables_,
- key_to_delete,
- key_type));
+ BrowserThread::PostTask(
+ BrowserThread::DB, FROM_HERE,
+ base::Bind(
+ &ResourcePrefetchPredictorTables::DeleteSingleResourceDataPoint,
+ tables_, key_to_delete, key_type));
+}
+
+void ResourcePrefetchPredictor::RemoveOldestEntryInRedirectDataMap(
+ PrefetchKeyType key_type,
+ RedirectDataMap* data_map) {
+ if (data_map->empty())
+ return;
+
+ base::Time oldest_time;
+ std::string key_to_delete;
+ for (RedirectDataMap::iterator it = data_map->begin(); it != data_map->end();
Benoit L 2016/09/19 13:53:47 nit: Can you use a range for loop here?
alexilin 2016/09/19 16:49:25 Of course!
+ ++it) {
+ if (key_to_delete.empty() || it->second.last_visit < oldest_time) {
+ key_to_delete = it->first;
+ oldest_time = it->second.last_visit;
+ }
+ }
+
+ data_map->erase(key_to_delete);
+ BrowserThread::PostTask(
+ BrowserThread::DB, FROM_HERE,
+ base::Bind(
+ &ResourcePrefetchPredictorTables::DeleteSingleRedirectDataPoint,
+ tables_, key_to_delete, key_type));
}
void ResourcePrefetchPredictor::OnVisitCountLookup(
size_t visit_count,
const NavigationID& navigation_id,
- const std::vector<URLRequestSummary>& requests) {
+ const Navigation& navigation) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HistoryVisitCountForUrl",
visit_count);
+ // TODO(alexilin) make only one request to DB thread
+
// URL level data - merge only if we are already saving the data, or we it
// meets the cutoff requirement.
const std::string url_spec = navigation_id.main_frame_url.spec();
@@ -765,17 +820,28 @@ void ResourcePrefetchPredictor::OnVisitCountLookup(
(visit_count >= config_.min_url_visit_count);
if (should_track_url && config_.IsURLLearningEnabled()) {
- LearnNavigation(url_spec, PREFETCH_KEY_TYPE_URL, requests,
+ LearnNavigation(url_spec, PREFETCH_KEY_TYPE_URL, navigation.requests,
config_.max_urls_to_track, url_table_cache_.get());
+
+ const std::string initial_url_spec = navigation.initial_url.spec();
+ if (url_spec != initial_url_spec) {
+ LearnRedirect(initial_url_spec, PREFETCH_KEY_TYPE_URL, url_spec,
+ config_.max_urls_to_track, url_redirect_table_cache_.get());
+ }
}
// Host level data - no cutoff, always learn the navigation if enabled.
if (config_.IsHostLearningEnabled()) {
- LearnNavigation(navigation_id.main_frame_url.host(),
- PREFETCH_KEY_TYPE_HOST,
- requests,
+ const std::string host = navigation_id.main_frame_url.host();
+ LearnNavigation(host, PREFETCH_KEY_TYPE_HOST, navigation.requests,
+ config_.max_hosts_to_track, host_table_cache_.get());
+
+ const std::string initial_host = navigation.initial_url.host();
+ if (host != initial_host) {
+ LearnRedirect(initial_host, PREFETCH_KEY_TYPE_HOST, host,
config_.max_hosts_to_track,
- host_table_cache_.get());
+ host_redirect_table_cache_.get());
+ }
}
// Remove the navigation from the results map.
@@ -823,7 +889,7 @@ void ResourcePrefetchPredictor::LearnNavigation(
resources_seen.insert(new_resources[i].resource_url);
}
} else {
- ResourceRows& old_resources = cache_entry->second.resources;
+ std::vector<ResourceRow>& old_resources = cache_entry->second.resources;
cache_entry->second.last_visit = base::Time::Now();
// Build indices over the data.
@@ -890,8 +956,8 @@ void ResourcePrefetchPredictor::LearnNavigation(
}
// Trim and sort the resources after the update.
- ResourceRows& resources = cache_entry->second.resources;
- for (ResourceRows::iterator it = resources.begin();
+ std::vector<ResourceRow>& resources = cache_entry->second.resources;
+ for (std::vector<ResourceRow>::iterator it = resources.begin();
Benoit L 2016/09/19 13:53:47 tiny nit: this is one of the trivial cases where a
it != resources.end();) {
it->UpdateScore();
if (it->consecutive_misses >= config_.max_consecutive_misses)
@@ -909,23 +975,110 @@ void ResourcePrefetchPredictor::LearnNavigation(
data_map->erase(key);
BrowserThread::PostTask(
BrowserThread::DB, FROM_HERE,
- base::Bind(&ResourcePrefetchPredictorTables::DeleteSingleDataPoint,
- tables_,
- key,
- key_type));
+ base::Bind(
+ &ResourcePrefetchPredictorTables::DeleteSingleResourceDataPoint,
+ tables_, key, key_type));
} else {
bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
PrefetchData empty_data(
!is_host ? PREFETCH_KEY_TYPE_HOST : PREFETCH_KEY_TYPE_URL,
std::string());
+ RedirectData empty_url_redirect_data(PREFETCH_KEY_TYPE_URL, std::string());
+ RedirectData empty_host_redirect_data(PREFETCH_KEY_TYPE_HOST,
+ std::string());
const PrefetchData& host_data = is_host ? cache_entry->second : empty_data;
const PrefetchData& url_data = is_host ? empty_data : cache_entry->second;
BrowserThread::PostTask(
BrowserThread::DB, FROM_HERE,
- base::Bind(&ResourcePrefetchPredictorTables::UpdateData,
- tables_,
- url_data,
- host_data));
+ base::Bind(&ResourcePrefetchPredictorTables::UpdateData, tables_,
+ url_data, host_data, empty_url_redirect_data,
+ empty_host_redirect_data));
+ }
+}
+
+void ResourcePrefetchPredictor::LearnRedirect(const std::string& key,
+ PrefetchKeyType key_type,
+ const std::string& final_redirect,
+ size_t max_redirect_map_size,
+ RedirectDataMap* redirect_map) {
+ DCHECK_CURRENTLY_ON(BrowserThread::UI);
+
+ RedirectDataMap::iterator cache_entry = redirect_map->find(key);
+ if (cache_entry == redirect_map->end()) {
+ if (redirect_map->size() >= max_redirect_map_size) {
+ RemoveOldestEntryInRedirectDataMap(key_type, redirect_map);
Benoit L 2016/09/19 13:53:47 nit: no braces.
+ }
+
+ cache_entry =
+ redirect_map->insert(std::make_pair(key, RedirectData(key_type, key)))
+ .first;
+ cache_entry->second.last_visit = base::Time::Now();
+ RedirectRow row_to_add;
+ row_to_add.url = final_redirect;
+ row_to_add.number_of_hits = 1;
+ cache_entry->second.redirects.push_back(row_to_add);
+ } else {
+ std::vector<RedirectRow>& redirects = cache_entry->second.redirects;
+ bool need_to_add = true;
+
+ cache_entry->second.last_visit = base::Time::Now();
+ for (RedirectRow& redirect : redirects) {
+ if (redirect.url == final_redirect) {
+ need_to_add = false;
+ ++redirect.number_of_hits;
+ redirect.consecutive_misses = 0;
+ } else {
+ ++redirect.number_of_misses;
+ ++redirect.consecutive_misses;
+ }
+ }
+
+ if (need_to_add) {
+ RedirectRow row_to_add;
+ row_to_add.url = final_redirect;
Benoit L 2016/09/19 13:53:47 The key is the final redirect url?
alexilin 2016/09/19 16:49:25 No. RedirectRow doesn't have a key at all, it is s
+ row_to_add.number_of_hits = 1;
+ redirects.push_back(row_to_add);
+ }
+ }
+
+ std::vector<RedirectRow>& redirects = cache_entry->second.redirects;
+ // Trim and sort redirects after update.
+ for (std::vector<RedirectRow>::iterator it = redirects.begin();
+ it != redirects.end();) {
+ it->UpdateScore();
+ if (it->consecutive_misses >= config_.max_consecutive_misses)
+ it = redirects.erase(it);
+ else
+ ++it;
+ }
+ ResourcePrefetchPredictorTables::SortRedirectRows(&redirects);
+
+ if (redirects.size() > config_.max_redirects_per_entry)
+ redirects.resize(config_.max_redirects_per_entry);
+
+ if (redirects.empty()) {
+ redirect_map->erase(cache_entry);
+ BrowserThread::PostTask(
+ BrowserThread::DB, FROM_HERE,
+ base::Bind(
+ &ResourcePrefetchPredictorTables::DeleteSingleRedirectDataPoint,
+ tables_, key, key_type));
+ } else {
+ bool is_host = key_type == PREFETCH_KEY_TYPE_HOST;
+ RedirectData empty_redirect_data(
+ !is_host ? PREFETCH_KEY_TYPE_HOST : PREFETCH_KEY_TYPE_URL,
+ std::string());
+ PrefetchData empty_url_data(PREFETCH_KEY_TYPE_URL, std::string());
+ PrefetchData empty_host_data(PREFETCH_KEY_TYPE_HOST, std::string());
+ const RedirectData& host_redirect_data =
+ is_host ? cache_entry->second : empty_redirect_data;
+ const RedirectData& url_redirect_data =
+ is_host ? empty_redirect_data : cache_entry->second;
+ BrowserThread::PostTask(
+ BrowserThread::DB, FROM_HERE,
+ base::Bind(&ResourcePrefetchPredictorTables::UpdateData, tables_,
+ empty_url_data, empty_host_data, url_redirect_data,
+ host_redirect_data));
}
}

Powered by Google App Engine
This is Rietveld 408576698