Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Unified Diff: chrome/browser/predictors/resource_prefetch_predictor.cc

Issue 2321343002: Redirect handling in resource prefetch predictor (Closed)
Patch Set: test Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/predictors/resource_prefetch_predictor.cc
diff --git a/chrome/browser/predictors/resource_prefetch_predictor.cc b/chrome/browser/predictors/resource_prefetch_predictor.cc
index 110810f90de73cfedf34f8df57c6f6c38887df39..3ceecc269a6620ee2ea7c3b4384fcb1f974a6506 100644
--- a/chrome/browser/predictors/resource_prefetch_predictor.cc
+++ b/chrome/browser/predictors/resource_prefetch_predictor.cc
@@ -75,15 +75,18 @@ class GetUrlVisitCountTask : public history::HistoryDBTask {
typedef base::Callback<void(
size_t, // Visit count.
const NavigationID&,
- const std::vector<URLRequestSummary>&)> VisitInfoCallback;
+ const std::vector<URLRequestSummary>&,
+ std::vector<GURL>*)> VisitInfoCallback;
GetUrlVisitCountTask(
const NavigationID& navigation_id,
- std::vector<URLRequestSummary>* requests,
+ std::unique_ptr<std::vector<URLRequestSummary>> requests,
+ std::unique_ptr<std::vector<GURL>> redirects,
VisitInfoCallback callback)
: visit_count_(0),
navigation_id_(navigation_id),
- requests_(requests),
+ requests_(std::move(requests)),
+ redirects_(std::move(redirects)),
callback_(callback) {
DCHECK(requests_.get());
}
@@ -97,7 +100,7 @@ class GetUrlVisitCountTask : public history::HistoryDBTask {
}
void DoneRunOnMainThread() override {
- callback_.Run(visit_count_, navigation_id_, *requests_);
+ callback_.Run(visit_count_, navigation_id_, *requests_, redirects_.get());
}
private:
@@ -106,6 +109,7 @@ class GetUrlVisitCountTask : public history::HistoryDBTask {
int visit_count_;
NavigationID navigation_id_;
std::unique_ptr<std::vector<URLRequestSummary>> requests_;
+ std::unique_ptr<std::vector<GURL>> redirects_;
VisitInfoCallback callback_;
DISALLOW_COPY_AND_ASSIGN(GetUrlVisitCountTask);
@@ -130,6 +134,8 @@ bool ResourcePrefetchPredictor::ShouldRecordRequest(
IsHandledMainPage(request);
}
+// comment to delete
+
// static
bool ResourcePrefetchPredictor::ShouldRecordResponse(
net::URLRequest* response) {
@@ -431,6 +437,8 @@ void ResourcePrefetchPredictor::OnMainFrameRequest(
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK_EQ(INITIALIZED, initialization_state_);
+ VLOG(1) << "Requested URL: " << request.navigation_id.main_frame_url;
mattcary 2016/09/09 09:16:11 These logging messages are probably too verbose fo
alexilin 2016/09/09 13:59:19 Sure, I added them just to find out what's what.
+
StartPrefetching(request.navigation_id);
// Cleanup older navigations.
@@ -448,6 +456,9 @@ void ResourcePrefetchPredictor::OnMainFrameResponse(
if (initialization_state_ != INITIALIZED)
return;
+ VLOG(1) << "Main frame got response (" <<
+ response.navigation_id.main_frame_url << ")";
+
StopPrefetching(response.navigation_id);
}
@@ -455,6 +466,9 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect(
const URLRequestSummary& response) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
+ VLOG(1) << "Redirect from " << response.navigation_id.main_frame_url
+ << " to " << response.redirect_url;
+
// TODO(shishir): There are significant gains to be had here if we can use the
// start URL in a redirect chain as the key to start prefetching. We can save
// of redirect times considerably assuming that the redirect chains do not
@@ -464,6 +478,14 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect(
StopPrefetching(response.navigation_id);
inflight_navigations_.erase(response.navigation_id);
+ RedirectsMap::iterator redir_it = redirects_map_.find(response.navigation_id);
+ std::vector<GURL>* redirects_chain = nullptr;
+ if (redir_it != redirects_map_.end())
+ {
+ redirects_chain = (redir_it->second).release();
+ redirects_map_.erase(redir_it);
+ }
+
// A redirect will not lead to another OnMainFrameRequest call, so record the
// redirect url as a new navigation.
@@ -471,11 +493,18 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect(
if (response.redirect_url.is_empty())
return;
+ if (redirects_chain == nullptr)
+ redirects_chain = new std::vector<GURL>();
+ redirects_chain->push_back(response.navigation_id.main_frame_url);
+
NavigationID navigation_id(response.navigation_id);
navigation_id.main_frame_url = response.redirect_url;
inflight_navigations_.insert(std::make_pair(
navigation_id,
make_linked_ptr(new std::vector<URLRequestSummary>())));
+ redirects_map_.insert(std::make_pair(
+ navigation_id,
+ make_linked_ptr(redirects_chain)));
}
void ResourcePrefetchPredictor::OnSubresourceResponse(
@@ -495,6 +524,9 @@ void ResourcePrefetchPredictor::OnNavigationComplete(
const NavigationID& nav_id_without_timing_info) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
+ VLOG(1) << "Navigation completed (" <<
+ nav_id_without_timing_info.main_frame_url << ")";
+
NavigationMap::iterator nav_it =
inflight_navigations_.find(nav_id_without_timing_info);
if (nav_it == inflight_navigations_.end())
@@ -506,6 +538,13 @@ void ResourcePrefetchPredictor::OnNavigationComplete(
std::vector<URLRequestSummary>* requests = (nav_it->second).release();
inflight_navigations_.erase(nav_it);
+ std::vector<GURL>* redirects = nullptr;
+ RedirectsMap::iterator redir_it = redirects_map_.find(navigation_id);
+ if (redir_it != redirects_map_.end()) {
+ redirects = (redir_it->second).release();
+ redirects_map_.erase(redir_it);
+ }
+
// Kick off history lookup to determine if we should record the URL.
history::HistoryService* history_service =
HistoryServiceFactory::GetForProfile(profile_,
@@ -513,7 +552,9 @@ void ResourcePrefetchPredictor::OnNavigationComplete(
DCHECK(history_service);
history_service->ScheduleDBTask(
std::unique_ptr<history::HistoryDBTask>(new GetUrlVisitCountTask(
- navigation_id, requests,
+ navigation_id,
+ std::unique_ptr<std::vector<URLRequestSummary>>(requests),
+ std::unique_ptr<std::vector<GURL>>(redirects),
base::Bind(&ResourcePrefetchPredictor::OnVisitCountLookup,
AsWeakPtr()))),
&history_lookup_consumer_);
@@ -616,29 +657,47 @@ void ResourcePrefetchPredictor::StartInitialization() {
// Create local caches using the database as loaded.
std::unique_ptr<PrefetchDataMap> url_data_map(new PrefetchDataMap());
std::unique_ptr<PrefetchDataMap> host_data_map(new PrefetchDataMap());
+ std::unique_ptr<RedirectDataMap> url_redirect_data_map(
+ new RedirectDataMap());
+ std::unique_ptr<RedirectDataMap> host_redirect_data_map(
+ new RedirectDataMap());
PrefetchDataMap* url_data_ptr = url_data_map.get();
PrefetchDataMap* host_data_ptr = host_data_map.get();
+ RedirectDataMap* url_redirect_data_ptr = url_redirect_data_map.get();
+ RedirectDataMap* host_redirect_data_ptr = host_redirect_data_map.get();
BrowserThread::PostTaskAndReply(
BrowserThread::DB, FROM_HERE,
- base::Bind(&ResourcePrefetchPredictorTables::GetAllData,
- tables_, url_data_ptr, host_data_ptr),
+ base::Bind(&ResourcePrefetchPredictorTables::GetAllData, tables_,
+ url_data_ptr,
+ host_data_ptr,
+ url_redirect_data_ptr,
+ host_redirect_data_ptr),
base::Bind(&ResourcePrefetchPredictor::CreateCaches, AsWeakPtr(),
- base::Passed(&url_data_map), base::Passed(&host_data_map)));
+ base::Passed(&url_data_map),
+ base::Passed(&host_data_map),
+ base::Passed(&url_redirect_data_map),
+ base::Passed(&host_redirect_data_map)));
}
void ResourcePrefetchPredictor::CreateCaches(
std::unique_ptr<PrefetchDataMap> url_data_map,
- std::unique_ptr<PrefetchDataMap> host_data_map) {
+ std::unique_ptr<PrefetchDataMap> host_data_map,
+ std::unique_ptr<RedirectDataMap> url_redirect_data_map,
+ std::unique_ptr<RedirectDataMap> host_redirect_data_map) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
DCHECK_EQ(INITIALIZING, initialization_state_);
DCHECK(!url_table_cache_);
DCHECK(!host_table_cache_);
+ DCHECK(!url_redirect_table_cache_);
+ DCHECK(!host_redirect_table_cache_);
DCHECK(inflight_navigations_.empty());
url_table_cache_.reset(url_data_map.release());
host_table_cache_.reset(host_data_map.release());
+ url_redirect_table_cache_.reset(url_redirect_data_map.release());
+ host_redirect_table_cache_.reset(host_redirect_data_map.release());
UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.UrlTableMainFrameUrlCount",
url_table_cache_->size());
@@ -684,6 +743,15 @@ void ResourcePrefetchPredictor::CleanupAbandonedNavigations(
++it;
}
}
+ for (RedirectsMap::iterator it = redirects_map_.begin();
+ it != redirects_map_.end();) {
+ if (it->first.IsSameRenderer(navigation_id) ||
+ (time_now - it->first.creation_time > max_navigation_age)) {
+ redirects_map_.erase(it++);
+ } else {
+ ++it;
+ }
+ }
}
void ResourcePrefetchPredictor::DeleteAllUrls() {
@@ -750,12 +818,28 @@ void ResourcePrefetchPredictor::RemoveOldestEntryInPrefetchDataMap(
void ResourcePrefetchPredictor::OnVisitCountLookup(
size_t visit_count,
const NavigationID& navigation_id,
- const std::vector<URLRequestSummary>& requests) {
+ const std::vector<URLRequestSummary>& requests,
+ std::vector<GURL>* redirects) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HistoryVisitCountForUrl",
visit_count);
+ // (@alexilin) FOR DEBUG
+ if (redirects != nullptr) {
+ std::string chain_string;
+ for (GURL& link : *redirects)
+ {
+ chain_string += link.spec();
+ chain_string += "->";
+ }
+ chain_string += navigation_id.main_frame_url.spec();
+ VLOG(1) << "Redirects chain: " << chain_string;
+ } else {
+ VLOG(1) << "No redirects";
+ }
+ // DELETE IT
+
// URL level data - merge only if we are already saving the data, or we it
// meets the cutoff requirement.
const std::string url_spec = navigation_id.main_frame_url.spec();
@@ -765,17 +849,37 @@ void ResourcePrefetchPredictor::OnVisitCountLookup(
(visit_count >= config_.min_url_visit_count);
if (should_track_url && config_.IsURLLearningEnabled()) {
+ VLOG(1) << "Learn by URL";
LearnNavigation(url_spec, PREFETCH_KEY_TYPE_URL, requests,
config_.max_urls_to_track, url_table_cache_.get());
+
+ if (redirects != nullptr) {
+ std::vector<std::string> redirect_urls;
+ for (GURL& redirect: *redirects) {
+ redirect_urls.push_back(redirect.spec());
+ }
+ LearnRedirects(redirect_urls, PREFETCH_KEY_TYPE_URL, url_spec,
+ config_.max_urls_to_track,
+ url_redirect_table_cache_.get());
+ }
}
// Host level data - no cutoff, always learn the navigation if enabled.
if (config_.IsHostLearningEnabled()) {
- LearnNavigation(navigation_id.main_frame_url.host(),
- PREFETCH_KEY_TYPE_HOST,
- requests,
- config_.max_hosts_to_track,
- host_table_cache_.get());
+ VLOG(1) << "Learn by host";
+ const std::string host = navigation_id.main_frame_url.host();
+ LearnNavigation(host, PREFETCH_KEY_TYPE_HOST, requests,
+ config_.max_hosts_to_track, host_table_cache_.get());
+
+ if (redirects != nullptr) {
+ std::vector<std::string> redirect_hosts;
+ for (GURL& redirect : *redirects) {
+ redirect_hosts.push_back(redirect.host());
+ }
+ LearnRedirects(redirect_hosts, PREFETCH_KEY_TYPE_HOST, host,
+ config_.max_hosts_to_track,
+ host_redirect_table_cache_.get());
+ }
}
// Remove the navigation from the results map.
@@ -929,6 +1033,71 @@ void ResourcePrefetchPredictor::LearnNavigation(
}
}
+void ResourcePrefetchPredictor::LearnRedirects(
+ const std::vector<std::string>& keys,
mattcary 2016/09/09 13:19:21 I wonder if it's necessary to store data on all th
alexilin 2016/09/09 13:59:19 Yes, you understand the code correctly. But in the
mattcary 2016/09/09 14:14:26 Yeah, I can't disagree with anything you said. +1
+ PrefetchKeyType key_type,
+ const std::string& final_redirect,
+ size_t max_redirect_map_size,
+ RedirectDataMap* redirect_map) {
+ DCHECK_CURRENTLY_ON(BrowserThread::UI);
+
+ std::set<std::string> visited_keys;
+ for (const std::string& key : keys) {
+
+ if (visited_keys.find(key) != visited_keys.end())
+ continue;
+
+ visited_keys.insert(key);
+
+ RedirectDataMap::iterator cache_entry = redirect_map->find(key);
+ if (cache_entry == redirect_map->end()) {
+ if (redirect_map->size() >= max_redirect_map_size) {
+ // TODO: remove oldest entry
+ }
+
+ cache_entry = redirect_map->insert(std::make_pair(
+ key, RedirectData(key_type, key))).first;
+ cache_entry->second.last_visit = base::Time::Now();
+ RedirectStatRow row_to_add;
+ row_to_add.final_redirect = final_redirect;
+ row_to_add.number_of_hits = 1;
+ cache_entry->second.redirect_stats.push_back(row_to_add);
+ } else {
+ cache_entry->second.last_visit = base::Time::Now();
+ RedirectStatRows& redirect_stats = cache_entry->second.redirect_stats;
+ bool need_to_add = true;
+
+ for (RedirectStatRow& redirect_stat : redirect_stats) {
+ if (redirect_stat.final_redirect == final_redirect) {
+ need_to_add = false;
+ ++redirect_stat.number_of_hits;
+ redirect_stat.consecutive_misses = 0;
+ } else {
+ ++redirect_stat.number_of_misses;
+ ++redirect_stat.consecutive_misses;
+ }
+ }
+
+ if (need_to_add) {
+ RedirectStatRow row_to_add;
+ row_to_add.final_redirect = final_redirect;
+ row_to_add.number_of_hits = 1;
+ redirect_stats.push_back(row_to_add);
+ }
+ }
+
+ RedirectStatRows& redirect_stats = cache_entry->second.redirect_stats;
+ // TODO: sort and trim redirect stats
+
+ // TODO: aggregate all changes in one DB access
+ if (redirect_stats.empty()) {
+ // TODO: remove database entry
+ } else {
+ // TODO: update database entry
+ }
+ }
+}
+
void ResourcePrefetchPredictor::OnURLsDeleted(
history::HistoryService* history_service,
bool all_history,

Powered by Google App Engine
This is Rietveld 408576698