Chromium Code Reviews| Index: chrome/browser/predictors/resource_prefetch_predictor.cc |
| diff --git a/chrome/browser/predictors/resource_prefetch_predictor.cc b/chrome/browser/predictors/resource_prefetch_predictor.cc |
| index 110810f90de73cfedf34f8df57c6f6c38887df39..3ceecc269a6620ee2ea7c3b4384fcb1f974a6506 100644 |
| --- a/chrome/browser/predictors/resource_prefetch_predictor.cc |
| +++ b/chrome/browser/predictors/resource_prefetch_predictor.cc |
| @@ -75,15 +75,18 @@ class GetUrlVisitCountTask : public history::HistoryDBTask { |
| typedef base::Callback<void( |
| size_t, // Visit count. |
| const NavigationID&, |
| - const std::vector<URLRequestSummary>&)> VisitInfoCallback; |
| + const std::vector<URLRequestSummary>&, |
| + std::vector<GURL>*)> VisitInfoCallback; |
| GetUrlVisitCountTask( |
| const NavigationID& navigation_id, |
| - std::vector<URLRequestSummary>* requests, |
| + std::unique_ptr<std::vector<URLRequestSummary>> requests, |
| + std::unique_ptr<std::vector<GURL>> redirects, |
| VisitInfoCallback callback) |
| : visit_count_(0), |
| navigation_id_(navigation_id), |
| - requests_(requests), |
| + requests_(std::move(requests)), |
| + redirects_(std::move(redirects)), |
| callback_(callback) { |
| DCHECK(requests_.get()); |
| } |
| @@ -97,7 +100,7 @@ class GetUrlVisitCountTask : public history::HistoryDBTask { |
| } |
| void DoneRunOnMainThread() override { |
| - callback_.Run(visit_count_, navigation_id_, *requests_); |
| + callback_.Run(visit_count_, navigation_id_, *requests_, redirects_.get()); |
| } |
| private: |
| @@ -106,6 +109,7 @@ class GetUrlVisitCountTask : public history::HistoryDBTask { |
| int visit_count_; |
| NavigationID navigation_id_; |
| std::unique_ptr<std::vector<URLRequestSummary>> requests_; |
| + std::unique_ptr<std::vector<GURL>> redirects_; |
| VisitInfoCallback callback_; |
| DISALLOW_COPY_AND_ASSIGN(GetUrlVisitCountTask); |
| @@ -130,6 +134,8 @@ bool ResourcePrefetchPredictor::ShouldRecordRequest( |
| IsHandledMainPage(request); |
| } |
| +// comment to delete |
| + |
| // static |
| bool ResourcePrefetchPredictor::ShouldRecordResponse( |
| net::URLRequest* response) { |
| @@ -431,6 +437,8 @@ void ResourcePrefetchPredictor::OnMainFrameRequest( |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK_EQ(INITIALIZED, initialization_state_); |
| + VLOG(1) << "Requested URL: " << request.navigation_id.main_frame_url; |
|
mattcary
2016/09/09 09:16:11
These logging messages are probably too verbose fo
alexilin
2016/09/09 13:59:19
Sure, I added them just to find out what's what.
|
| + |
| StartPrefetching(request.navigation_id); |
| // Cleanup older navigations. |
| @@ -448,6 +456,9 @@ void ResourcePrefetchPredictor::OnMainFrameResponse( |
| if (initialization_state_ != INITIALIZED) |
| return; |
| + VLOG(1) << "Main frame got response (" << |
| + response.navigation_id.main_frame_url << ")"; |
| + |
| StopPrefetching(response.navigation_id); |
| } |
| @@ -455,6 +466,9 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect( |
| const URLRequestSummary& response) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| + VLOG(1) << "Redirect from " << response.navigation_id.main_frame_url |
| + << " to " << response.redirect_url; |
| + |
| // TODO(shishir): There are significant gains to be had here if we can use the |
| // start URL in a redirect chain as the key to start prefetching. We can save |
| // of redirect times considerably assuming that the redirect chains do not |
| @@ -464,6 +478,14 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect( |
| StopPrefetching(response.navigation_id); |
| inflight_navigations_.erase(response.navigation_id); |
| + RedirectsMap::iterator redir_it = redirects_map_.find(response.navigation_id); |
| + std::vector<GURL>* redirects_chain = nullptr; |
| + if (redir_it != redirects_map_.end()) |
| + { |
| + redirects_chain = (redir_it->second).release(); |
| + redirects_map_.erase(redir_it); |
| + } |
| + |
| // A redirect will not lead to another OnMainFrameRequest call, so record the |
| // redirect url as a new navigation. |
| @@ -471,11 +493,18 @@ void ResourcePrefetchPredictor::OnMainFrameRedirect( |
| if (response.redirect_url.is_empty()) |
| return; |
| + if (redirects_chain == nullptr) |
| + redirects_chain = new std::vector<GURL>(); |
| + redirects_chain->push_back(response.navigation_id.main_frame_url); |
| + |
| NavigationID navigation_id(response.navigation_id); |
| navigation_id.main_frame_url = response.redirect_url; |
| inflight_navigations_.insert(std::make_pair( |
| navigation_id, |
| make_linked_ptr(new std::vector<URLRequestSummary>()))); |
| + redirects_map_.insert(std::make_pair( |
| + navigation_id, |
| + make_linked_ptr(redirects_chain))); |
| } |
| void ResourcePrefetchPredictor::OnSubresourceResponse( |
| @@ -495,6 +524,9 @@ void ResourcePrefetchPredictor::OnNavigationComplete( |
| const NavigationID& nav_id_without_timing_info) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| + VLOG(1) << "Navigation completed (" << |
| + nav_id_without_timing_info.main_frame_url << ")"; |
| + |
| NavigationMap::iterator nav_it = |
| inflight_navigations_.find(nav_id_without_timing_info); |
| if (nav_it == inflight_navigations_.end()) |
| @@ -506,6 +538,13 @@ void ResourcePrefetchPredictor::OnNavigationComplete( |
| std::vector<URLRequestSummary>* requests = (nav_it->second).release(); |
| inflight_navigations_.erase(nav_it); |
| + std::vector<GURL>* redirects = nullptr; |
| + RedirectsMap::iterator redir_it = redirects_map_.find(navigation_id); |
| + if (redir_it != redirects_map_.end()) { |
| + redirects = (redir_it->second).release(); |
| + redirects_map_.erase(redir_it); |
| + } |
| + |
| // Kick off history lookup to determine if we should record the URL. |
| history::HistoryService* history_service = |
| HistoryServiceFactory::GetForProfile(profile_, |
| @@ -513,7 +552,9 @@ void ResourcePrefetchPredictor::OnNavigationComplete( |
| DCHECK(history_service); |
| history_service->ScheduleDBTask( |
| std::unique_ptr<history::HistoryDBTask>(new GetUrlVisitCountTask( |
| - navigation_id, requests, |
| + navigation_id, |
| + std::unique_ptr<std::vector<URLRequestSummary>>(requests), |
| + std::unique_ptr<std::vector<GURL>>(redirects), |
| base::Bind(&ResourcePrefetchPredictor::OnVisitCountLookup, |
| AsWeakPtr()))), |
| &history_lookup_consumer_); |
| @@ -616,29 +657,47 @@ void ResourcePrefetchPredictor::StartInitialization() { |
| // Create local caches using the database as loaded. |
| std::unique_ptr<PrefetchDataMap> url_data_map(new PrefetchDataMap()); |
| std::unique_ptr<PrefetchDataMap> host_data_map(new PrefetchDataMap()); |
| + std::unique_ptr<RedirectDataMap> url_redirect_data_map( |
| + new RedirectDataMap()); |
| + std::unique_ptr<RedirectDataMap> host_redirect_data_map( |
| + new RedirectDataMap()); |
| PrefetchDataMap* url_data_ptr = url_data_map.get(); |
| PrefetchDataMap* host_data_ptr = host_data_map.get(); |
| + RedirectDataMap* url_redirect_data_ptr = url_redirect_data_map.get(); |
| + RedirectDataMap* host_redirect_data_ptr = host_redirect_data_map.get(); |
| BrowserThread::PostTaskAndReply( |
| BrowserThread::DB, FROM_HERE, |
| - base::Bind(&ResourcePrefetchPredictorTables::GetAllData, |
| - tables_, url_data_ptr, host_data_ptr), |
| + base::Bind(&ResourcePrefetchPredictorTables::GetAllData, tables_, |
| + url_data_ptr, |
| + host_data_ptr, |
| + url_redirect_data_ptr, |
| + host_redirect_data_ptr), |
| base::Bind(&ResourcePrefetchPredictor::CreateCaches, AsWeakPtr(), |
| - base::Passed(&url_data_map), base::Passed(&host_data_map))); |
| + base::Passed(&url_data_map), |
| + base::Passed(&host_data_map), |
| + base::Passed(&url_redirect_data_map), |
| + base::Passed(&host_redirect_data_map))); |
| } |
| void ResourcePrefetchPredictor::CreateCaches( |
| std::unique_ptr<PrefetchDataMap> url_data_map, |
| - std::unique_ptr<PrefetchDataMap> host_data_map) { |
| + std::unique_ptr<PrefetchDataMap> host_data_map, |
| + std::unique_ptr<RedirectDataMap> url_redirect_data_map, |
| + std::unique_ptr<RedirectDataMap> host_redirect_data_map) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| DCHECK_EQ(INITIALIZING, initialization_state_); |
| DCHECK(!url_table_cache_); |
| DCHECK(!host_table_cache_); |
| + DCHECK(!url_redirect_table_cache_); |
| + DCHECK(!host_redirect_table_cache_); |
| DCHECK(inflight_navigations_.empty()); |
| url_table_cache_.reset(url_data_map.release()); |
| host_table_cache_.reset(host_data_map.release()); |
| + url_redirect_table_cache_.reset(url_redirect_data_map.release()); |
| + host_redirect_table_cache_.reset(host_redirect_data_map.release()); |
| UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.UrlTableMainFrameUrlCount", |
| url_table_cache_->size()); |
| @@ -684,6 +743,15 @@ void ResourcePrefetchPredictor::CleanupAbandonedNavigations( |
| ++it; |
| } |
| } |
| + for (RedirectsMap::iterator it = redirects_map_.begin(); |
| + it != redirects_map_.end();) { |
| + if (it->first.IsSameRenderer(navigation_id) || |
| + (time_now - it->first.creation_time > max_navigation_age)) { |
| + redirects_map_.erase(it++); |
| + } else { |
| + ++it; |
| + } |
| + } |
| } |
| void ResourcePrefetchPredictor::DeleteAllUrls() { |
| @@ -750,12 +818,28 @@ void ResourcePrefetchPredictor::RemoveOldestEntryInPrefetchDataMap( |
| void ResourcePrefetchPredictor::OnVisitCountLookup( |
| size_t visit_count, |
| const NavigationID& navigation_id, |
| - const std::vector<URLRequestSummary>& requests) { |
| + const std::vector<URLRequestSummary>& requests, |
| + std::vector<GURL>* redirects) { |
| DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| UMA_HISTOGRAM_COUNTS("ResourcePrefetchPredictor.HistoryVisitCountForUrl", |
| visit_count); |
| + // (@alexilin) FOR DEBUG |
| + if (redirects != nullptr) { |
| + std::string chain_string; |
| + for (GURL& link : *redirects) |
| + { |
| + chain_string += link.spec(); |
| + chain_string += "->"; |
| + } |
| + chain_string += navigation_id.main_frame_url.spec(); |
| + VLOG(1) << "Redirects chain: " << chain_string; |
| + } else { |
| + VLOG(1) << "No redirects"; |
| + } |
| + // DELETE IT |
| + |
| // URL level data - merge only if we are already saving the data, or we it |
| // meets the cutoff requirement. |
| const std::string url_spec = navigation_id.main_frame_url.spec(); |
| @@ -765,17 +849,37 @@ void ResourcePrefetchPredictor::OnVisitCountLookup( |
| (visit_count >= config_.min_url_visit_count); |
| if (should_track_url && config_.IsURLLearningEnabled()) { |
| + VLOG(1) << "Learn by URL"; |
| LearnNavigation(url_spec, PREFETCH_KEY_TYPE_URL, requests, |
| config_.max_urls_to_track, url_table_cache_.get()); |
| + |
| + if (redirects != nullptr) { |
| + std::vector<std::string> redirect_urls; |
| + for (GURL& redirect: *redirects) { |
| + redirect_urls.push_back(redirect.spec()); |
| + } |
| + LearnRedirects(redirect_urls, PREFETCH_KEY_TYPE_URL, url_spec, |
| + config_.max_urls_to_track, |
| + url_redirect_table_cache_.get()); |
| + } |
| } |
| // Host level data - no cutoff, always learn the navigation if enabled. |
| if (config_.IsHostLearningEnabled()) { |
| - LearnNavigation(navigation_id.main_frame_url.host(), |
| - PREFETCH_KEY_TYPE_HOST, |
| - requests, |
| - config_.max_hosts_to_track, |
| - host_table_cache_.get()); |
| + VLOG(1) << "Learn by host"; |
| + const std::string host = navigation_id.main_frame_url.host(); |
| + LearnNavigation(host, PREFETCH_KEY_TYPE_HOST, requests, |
| + config_.max_hosts_to_track, host_table_cache_.get()); |
| + |
| + if (redirects != nullptr) { |
| + std::vector<std::string> redirect_hosts; |
| + for (GURL& redirect : *redirects) { |
| + redirect_hosts.push_back(redirect.host()); |
| + } |
| + LearnRedirects(redirect_hosts, PREFETCH_KEY_TYPE_HOST, host, |
| + config_.max_hosts_to_track, |
| + host_redirect_table_cache_.get()); |
| + } |
| } |
| // Remove the navigation from the results map. |
| @@ -929,6 +1033,71 @@ void ResourcePrefetchPredictor::LearnNavigation( |
| } |
| } |
| +void ResourcePrefetchPredictor::LearnRedirects( |
| + const std::vector<std::string>& keys, |
|
mattcary
2016/09/09 13:19:21
I wonder if it's necessary to store data on all th
alexilin
2016/09/09 13:59:19
Yes, you understand the code correctly.
But in the
mattcary
2016/09/09 14:14:26
Yeah, I can't disagree with anything you said. +1
|
| + PrefetchKeyType key_type, |
| + const std::string& final_redirect, |
| + size_t max_redirect_map_size, |
| + RedirectDataMap* redirect_map) { |
| + DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| + |
| + std::set<std::string> visited_keys; |
| + for (const std::string& key : keys) { |
| + |
| + if (visited_keys.find(key) != visited_keys.end()) |
| + continue; |
| + |
| + visited_keys.insert(key); |
| + |
| + RedirectDataMap::iterator cache_entry = redirect_map->find(key); |
| + if (cache_entry == redirect_map->end()) { |
| + if (redirect_map->size() >= max_redirect_map_size) { |
| + // TODO: remove oldest entry |
| + } |
| + |
| + cache_entry = redirect_map->insert(std::make_pair( |
| + key, RedirectData(key_type, key))).first; |
| + cache_entry->second.last_visit = base::Time::Now(); |
| + RedirectStatRow row_to_add; |
| + row_to_add.final_redirect = final_redirect; |
| + row_to_add.number_of_hits = 1; |
| + cache_entry->second.redirect_stats.push_back(row_to_add); |
| + } else { |
| + cache_entry->second.last_visit = base::Time::Now(); |
| + RedirectStatRows& redirect_stats = cache_entry->second.redirect_stats; |
| + bool need_to_add = true; |
| + |
| + for (RedirectStatRow& redirect_stat : redirect_stats) { |
| + if (redirect_stat.final_redirect == final_redirect) { |
| + need_to_add = false; |
| + ++redirect_stat.number_of_hits; |
| + redirect_stat.consecutive_misses = 0; |
| + } else { |
| + ++redirect_stat.number_of_misses; |
| + ++redirect_stat.consecutive_misses; |
| + } |
| + } |
| + |
| + if (need_to_add) { |
| + RedirectStatRow row_to_add; |
| + row_to_add.final_redirect = final_redirect; |
| + row_to_add.number_of_hits = 1; |
| + redirect_stats.push_back(row_to_add); |
| + } |
| + } |
| + |
| + RedirectStatRows& redirect_stats = cache_entry->second.redirect_stats; |
| + // TODO: sort and trim redirect stats |
| + |
| + // TODO: aggregate all changes in one DB access |
| + if (redirect_stats.empty()) { |
| + // TODO: remove database entry |
| + } else { |
| + // TODO: update database entry |
| + } |
| + } |
| +} |
| + |
| void ResourcePrefetchPredictor::OnURLsDeleted( |
| history::HistoryService* history_service, |
| bool all_history, |