Index: components/precache/core/precache_fetcher.cc |
diff --git a/components/precache/core/precache_fetcher.cc b/components/precache/core/precache_fetcher.cc |
index 8f45bff3136821d26dcf92c8a563c76179d90e90..77766e1588e61f4af7fcaaf351b457915a93311c 100644 |
--- a/components/precache/core/precache_fetcher.cc |
+++ b/components/precache/core/precache_fetcher.cc |
@@ -6,6 +6,7 @@ |
#include <algorithm> |
#include <limits> |
+#include <set> |
#include <utility> |
#include <vector> |
@@ -53,11 +54,14 @@ const int kNoTracking = |
net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | |
net::LOAD_DO_NOT_SEND_AUTH_DATA; |
-namespace { |
- |
-// The maximum number of URLFetcher requests that can be on flight in parallel. |
+// The maximum number of URLFetcher requests that can be in flight in parallel. |
+// Note that OnManifestFetchComplete and OnResourceFetchComplete perform |
+// remove_if operations which are O(kMaxParallelFetches). Those should be |
+// optimized before increasing this value significantly. |
const int kMaxParallelFetches = 10; |
+namespace { |
+ |
// The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to |
// a number we expect to be in the 99th percentile for the histogram, give or |
// take. |
@@ -183,25 +187,25 @@ std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) { |
// hosts in |hosts_to_fetch|, is added to |hosts_info|. |
std::deque<ManifestHostInfo> RetrieveManifestInfo( |
const base::WeakPtr<PrecacheDatabase>& precache_database, |
- std::vector<std::string> hosts_to_fetch) { |
+ std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) { |
std::deque<ManifestHostInfo> hosts_info; |
if (!precache_database) |
return hosts_info; |
for (const auto& host : hosts_to_fetch) { |
- auto referrer_host_info = precache_database->GetReferrerHost(host); |
+ auto referrer_host_info = precache_database->GetReferrerHost(host.first); |
if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { |
std::vector<GURL> used_urls, unused_urls; |
precache_database->GetURLListForReferrerHost(referrer_host_info.id, |
&used_urls, &unused_urls); |
hosts_info.push_back( |
- ManifestHostInfo(referrer_host_info.manifest_id, host, |
- GetResourceURLBase64Hash(used_urls), |
+ ManifestHostInfo(referrer_host_info.manifest_id, host.first, |
+ host.second, GetResourceURLBase64Hash(used_urls), |
GetResourceURLBase64Hash(unused_urls))); |
} else { |
hosts_info.push_back( |
- ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host, |
- std::string(), std::string())); |
+ ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first, |
+ host.second, std::string(), std::string())); |
} |
} |
return hosts_info; |
@@ -225,6 +229,10 @@ bool IsQuotaTimeExpired(const PrecacheQuota& quota, |
start_time + base::TimeDelta::FromDays(1) < time_now; |
} |
+double ResourceWeight(const PrecacheResource& resource, int64_t host_visits) { |
+ return resource.weight_ratio() * host_visits; |
+} |
+ |
} // namespace |
PrecacheFetcher::Fetcher::Fetcher( |
@@ -301,9 +309,6 @@ void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress( |
// |current_network_bytes| is guaranteed to be non-negative, so this cast |
// is safe. |
static_cast<size_t>(current_network_bytes) > max_bytes_) { |
- VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total |
- << ") is over " << max_bytes_; |
- |
// Call the completion callback, to attempt the next download, or to trigger |
// cleanup in precache_delegate_->OnDone(). |
response_bytes_ = current; |
@@ -363,30 +368,22 @@ void PrecacheFetcher::RecordCompletionStatistics( |
base::TimeDelta::FromSeconds(1), |
base::TimeDelta::FromHours(4), 50); |
- // Number of manifests for which we have downloaded all resources. |
- int manifests_completed = |
- unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch; |
- |
- // If there are resource URLs left to fetch, the last manifest is not yet |
- // completed. |
- if (remaining_resource_urls_to_fetch > 0) |
- --manifests_completed; |
- |
- DCHECK_GE(manifests_completed, 0); |
- int percent_completed = unfinished_work.num_manifest_urls() == 0 |
- ? 0 |
- : (static_cast<double>(manifests_completed) / |
- unfinished_work.num_manifest_urls() * 100); |
+ int num_total_resources = unfinished_work.num_resource_urls(); |
+ int percent_completed = |
+ num_total_resources == 0 |
+ ? 101 // Overflow bucket. |
+ : (100 * (static_cast<double>(num_total_resources - |
+ remaining_resource_urls_to_fetch) / |
+ num_total_resources)); |
UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted", |
percent_completed); |
- UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", |
- unfinished_work.total_bytes(), |
- 1, kMaxResponseBytes, 100); |
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", |
+ unfinished_work.total_bytes(), 1, |
+ kMaxResponseBytes, 100); |
UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network", |
- unfinished_work.network_bytes(), |
- 1, kMaxResponseBytes, |
- 100); |
+ unfinished_work.network_bytes(), 1, |
+ kMaxResponseBytes, 100); |
} |
// static |
@@ -426,8 +423,10 @@ PrecacheFetcher::PrecacheFetcher( |
// keeping track of the current resource index. |
for (const auto& resource : unfinished_work->resource()) { |
if (resource.has_url() && resource.has_top_host_name()) { |
+ // Weight doesn't matter, as the resources have already been sorted by |
+ // this point. |
resources_to_fetch_.emplace_back(GURL(resource.url()), |
- resource.top_host_name()); |
+ resource.top_host_name(), 0); |
} |
} |
unfinished_work_ = std::move(unfinished_work); |
@@ -446,28 +445,24 @@ std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { |
// If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and |
// top hosts should be left as is in |unfinished_work_|. |
unfinished_work_->clear_top_host(); |
- for (const auto& top_host : top_hosts_to_fetch_) { |
+ for (const auto& top_host : top_hosts_fetching_) |
unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
- } |
+ for (const auto& top_host : top_hosts_to_fetch_) |
+ unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
+ } |
+ for (const auto& resource : resources_fetching_) { |
+ auto new_resource = unfinished_work_->add_resource(); |
+ new_resource->set_url(resource.url.spec()); |
+ new_resource->set_top_host_name(resource.referrer); |
} |
for (const auto& resource : resources_to_fetch_) { |
auto new_resource = unfinished_work_->add_resource(); |
- new_resource->set_url(resource.first.spec()); |
- new_resource->set_top_host_name(resource.second); |
- } |
- for (const auto& it : pool_.elements()) { |
- const Fetcher* fetcher = it.first; |
- GURL config_url = |
- config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
- if (fetcher->is_resource_request()) { |
- auto resource = unfinished_work_->add_resource(); |
- resource->set_url(fetcher->url().spec()); |
- resource->set_top_host_name(fetcher->referrer()); |
- } else if (fetcher->url() != config_url) { |
- unfinished_work_->add_top_host()->set_hostname(fetcher->referrer()); |
- } |
+ new_resource->set_url(resource.url.spec()); |
+ new_resource->set_top_host_name(resource.referrer); |
} |
+ top_hosts_fetching_.clear(); |
top_hosts_to_fetch_.clear(); |
+ resources_fetching_.clear(); |
resources_to_fetch_.clear(); |
pool_.DeleteAll(); |
return std::move(unfinished_work_); |
@@ -488,7 +483,6 @@ void PrecacheFetcher::Start() { |
// Fetch the precache configuration settings from the server. |
DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; |
- VLOG(3) << "Fetching " << config_url; |
pool_.Add(base::MakeUnique<Fetcher>( |
request_context_.get(), config_url, std::string(), |
base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), |
@@ -498,35 +492,35 @@ void PrecacheFetcher::Start() { |
void PrecacheFetcher::StartNextResourceFetch() { |
DCHECK(unfinished_work_->has_config_settings()); |
while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { |
- const auto& resource = resources_to_fetch_.front(); |
+ ResourceInfo& resource = resources_to_fetch_.front(); |
const size_t max_bytes = std::min( |
quota_.remaining(), |
std::min(unfinished_work_->config_settings().max_bytes_per_resource(), |
unfinished_work_->config_settings().max_bytes_total() - |
unfinished_work_->total_bytes())); |
- VLOG(3) << "Fetching " << resource.first << " " << resource.second; |
pool_.Add(base::MakeUnique<Fetcher>( |
- request_context_.get(), resource.first, resource.second, |
+ request_context_.get(), resource.url, resource.referrer, |
base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), |
true /* is_resource_request */, max_bytes)); |
+ resources_fetching_.push_back(std::move(resource)); |
resources_to_fetch_.pop_front(); |
} |
} |
-void PrecacheFetcher::StartNextManifestFetch() { |
- if (top_hosts_to_fetch_.empty() || !pool_.IsAvailable()) |
- return; |
- |
- // We only fetch one manifest at a time to keep the size of |
- // resources_to_fetch_ as small as possible. |
- VLOG(3) << "Fetching " << top_hosts_to_fetch_.front().manifest_url; |
- pool_.Add(base::MakeUnique<Fetcher>( |
- request_context_.get(), top_hosts_to_fetch_.front().manifest_url, |
- top_hosts_to_fetch_.front().hostname, |
- base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()), |
- false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
- top_hosts_to_fetch_.pop_front(); |
+void PrecacheFetcher::StartNextManifestFetches() { |
+ // We fetch as many manifests at a time as possible, as we need all resource |
+ // URLs in memory in order to rank them. |
+ while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) { |
+ ManifestHostInfo& top_host = top_hosts_to_fetch_.front(); |
+ pool_.Add(base::MakeUnique<Fetcher>( |
+ request_context_.get(), top_host.manifest_url, top_host.hostname, |
+ base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(), |
+ top_host.visits), |
+ false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
+ top_hosts_fetching_.push_back(std::move(top_host)); |
+ top_hosts_to_fetch_.pop_front(); |
+ } |
} |
void PrecacheFetcher::NotifyDone( |
@@ -545,23 +539,14 @@ void PrecacheFetcher::StartNextFetch() { |
if ((unfinished_work_->total_bytes() > |
unfinished_work_->config_settings().max_bytes_total()) || |
quota_.remaining() == 0) { |
- size_t pending_manifests_in_pool = 0; |
- size_t pending_resources_in_pool = 0; |
- for (const auto& element_pair : pool_.elements()) { |
- const Fetcher* fetcher = element_pair.first; |
- if (fetcher->is_resource_request()) |
- pending_resources_in_pool++; |
- else if (fetcher->url() != config_url_) |
- pending_manifests_in_pool++; |
- } |
pool_.DeleteAll(); |
- NotifyDone(top_hosts_to_fetch_.size() + pending_manifests_in_pool, |
- resources_to_fetch_.size() + pending_resources_in_pool); |
+ NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(), |
+ resources_to_fetch_.size() + resources_fetching_.size()); |
return; |
} |
StartNextResourceFetch(); |
- StartNextManifestFetch(); |
+ StartNextManifestFetches(); |
if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() && |
pool_.IsEmpty()) { |
// There are no more URLs to fetch, so end the precache cycle. |
@@ -589,9 +574,7 @@ void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
void PrecacheFetcher::DetermineManifests() { |
DCHECK(unfinished_work_->has_config_settings()); |
- std::vector<std::string> top_hosts_to_fetch; |
- std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info( |
- new std::deque<ManifestHostInfo>); |
+ std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch; |
// Keep track of manifest URLs that are being fetched, in order to elide |
// duplicates. |
std::set<base::StringPiece> seen_top_hosts; |
@@ -602,7 +585,7 @@ void PrecacheFetcher::DetermineManifests() { |
if (rank > unfinished_work_->config_settings().top_sites_count()) |
break; |
if (seen_top_hosts.insert(host.hostname()).second) |
- top_hosts_to_fetch.push_back(host.hostname()); |
+ top_hosts_to_fetch.emplace_back(host.hostname(), host.visits()); |
} |
// Attempt to fetch manifests for starting hosts up to the maximum top sites |
@@ -613,12 +596,15 @@ void PrecacheFetcher::DetermineManifests() { |
if (resources_to_fetch_.empty()) { |
for (const std::string& host : |
unfinished_work_->config_settings().forced_site()) { |
+ // We add a forced site with visits == 0, which means its resources will |
+ // be downloaded last. TODO(twifkak): Consider removing support for |
+ // forced_site. |
if (seen_top_hosts.insert(host).second) |
- top_hosts_to_fetch.push_back(host); |
+ top_hosts_to_fetch.emplace_back(host, 0); |
} |
} |
- // We only fetch one manifest at a time to keep the size of |
- // resources_to_fetch_ as small as possible. |
+ // We retrieve manifest usage and quota info from the local database before |
+ // fetching the manifests. |
PostTaskAndReplyWithResult( |
db_task_runner_.get(), FROM_HERE, |
base::Bind(&RetrieveManifestInfo, precache_database_, |
@@ -636,7 +622,7 @@ void PrecacheFetcher::OnManifestInfoRetrieved( |
// is invalid. |
top_hosts_to_fetch_.clear(); |
unfinished_work_->set_num_manifest_urls(manifests_info.size()); |
- NotifyDone(manifests_info.size(), resources_to_fetch_.size()); |
+ NotifyDone(manifests_info.size(), resources_to_rank_.size()); |
return; |
} |
@@ -683,10 +669,12 @@ void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { |
ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, |
const std::string& hostname, |
+ int64_t visits, |
const std::string& used_url_hash, |
const std::string& unused_url_hash) |
: manifest_id(manifest_id), |
hostname(hostname), |
+ visits(visits), |
used_url_hash(used_url_hash), |
unused_url_hash(unused_url_hash) {} |
@@ -696,7 +684,19 @@ ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; |
ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; |
-void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
+ResourceInfo::ResourceInfo(const GURL& url, |
+ const std::string& referrer, |
+ double weight) |
+ : url(url), referrer(referrer), weight(weight) {} |
+ |
+ResourceInfo::~ResourceInfo() {} |
+ |
+ResourceInfo::ResourceInfo(ResourceInfo&&) = default; |
+ |
+ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default; |
+ |
+void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits, |
+ const Fetcher& source) { |
DCHECK(unfinished_work_->has_config_settings()); |
UpdateStats(source.response_bytes(), source.network_response_bytes()); |
if (source.network_url_fetcher() == nullptr) { |
@@ -715,7 +715,9 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
manifest.resource(i).has_url()) { |
GURL url(manifest.resource(i).url()); |
if (url.is_valid()) { |
- resources_to_fetch_.emplace_back(url, source.referrer()); |
+ double weight = ResourceWeight(manifest.resource(i), host_visits); |
+ if (weight >= unfinished_work_->config_settings().min_weight()) |
+ resources_to_rank_.emplace_back(url, source.referrer(), weight); |
} |
} |
} |
@@ -726,10 +728,43 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
} |
} |
+ top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) { |
+ return top_host.manifest_url == source.url(); |
+ }); |
+ |
pool_.Delete(source); |
+ |
+ if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty()) |
+ QueueResourcesForFetch(); |
+ |
StartNextFetch(); |
} |
+void PrecacheFetcher::QueueResourcesForFetch() { |
+ // Done fetching manifests. Now move resources_to_rank_ into |
+ // resources_to_fetch_, so that StartNextFetch will begin fetching resources. |
+ resources_to_fetch_ = std::move(resources_to_rank_); |
+ |
+ if (unfinished_work_->config_settings().global_ranking()) { |
+ // Sort resources_to_fetch_ by descending weight. |
+ std::stable_sort(resources_to_fetch_.begin(), resources_to_fetch_.end(), |
+ [](const ResourceInfo& first, const ResourceInfo& second) { |
+ return first.weight > second.weight; |
+ }); |
+ } |
+ |
+ // Truncate to size |total_resources_count|. |
+ const size_t num_resources = std::min( |
+ resources_to_fetch_.size(), |
+ static_cast<size_t>( |
+ unfinished_work_->config_settings().total_resources_count())); |
+ resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources, |
+ resources_to_fetch_.end()); |
+ |
+ // Save denominator for PercentCompleted UMA. |
+ unfinished_work_->set_num_resource_urls(resources_to_fetch_.size()); |
+} |
+ |
void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
UpdateStats(source.response_bytes(), source.network_response_bytes()); |
@@ -739,6 +774,10 @@ void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
source.url(), source.referrer(), base::Time::Now(), |
source.was_cached(), source.response_bytes())); |
+ resources_fetching_.remove_if([&source](const ResourceInfo& resource) { |
+ return resource.url == source.url(); |
+ }); |
+ |
pool_.Delete(source); |
// The resource has already been put in the cache during the fetch process, so |