Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(8623)

Unified Diff: components/precache/core/precache_fetcher.cc

Issue 2403193002: Precache: Optionally rank resources-to-precache globally. (Closed)
Patch Set: Code readability improvements per bengr. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/precache/core/precache_fetcher.h ('k') | components/precache/core/precache_fetcher_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/precache/core/precache_fetcher.cc
diff --git a/components/precache/core/precache_fetcher.cc b/components/precache/core/precache_fetcher.cc
index 8f45bff3136821d26dcf92c8a563c76179d90e90..9a8fa6de738bddfb9b97cb12d6c5760f57d417df 100644
--- a/components/precache/core/precache_fetcher.cc
+++ b/components/precache/core/precache_fetcher.cc
@@ -6,6 +6,7 @@
#include <algorithm>
#include <limits>
+#include <set>
#include <utility>
#include <vector>
@@ -53,11 +54,14 @@ const int kNoTracking =
net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
net::LOAD_DO_NOT_SEND_AUTH_DATA;
-namespace {
-
// The maximum number of URLFetcher requests that can be on flight in parallel.
bengr 2016/10/18 18:57:20 nit: on -> in
twifkak 2016/10/18 19:52:27 Done.
+// Note that OnManifestFetchComplete and OnResourceFetchComplete perform
+// remove_if operations which are O(kMaxParallelFetches). Those should be
+// optimized before increasing this value significantly.
const int kMaxParallelFetches = 10;
+namespace {
+
// The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
// a number we expect to be in the 99th percentile for the histogram, give or
// take.
@@ -183,25 +187,26 @@ std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) {
// hosts in |hosts_to_fetch|, is added to |hosts_info|.
std::deque<ManifestHostInfo> RetrieveManifestInfo(
const base::WeakPtr<PrecacheDatabase>& precache_database,
- std::vector<std::string> hosts_to_fetch) {
+ std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) {
+ VLOG(9) << "RetrieveManifestInfo";
std::deque<ManifestHostInfo> hosts_info;
if (!precache_database)
return hosts_info;
for (const auto& host : hosts_to_fetch) {
- auto referrer_host_info = precache_database->GetReferrerHost(host);
+ auto referrer_host_info = precache_database->GetReferrerHost(host.first);
if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) {
std::vector<GURL> used_urls, unused_urls;
precache_database->GetURLListForReferrerHost(referrer_host_info.id,
&used_urls, &unused_urls);
hosts_info.push_back(
- ManifestHostInfo(referrer_host_info.manifest_id, host,
- GetResourceURLBase64Hash(used_urls),
+ ManifestHostInfo(referrer_host_info.manifest_id, host.first,
+ host.second, GetResourceURLBase64Hash(used_urls),
GetResourceURLBase64Hash(unused_urls)));
} else {
hosts_info.push_back(
- ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host,
- std::string(), std::string()));
+ ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first,
+ host.second, std::string(), std::string()));
}
}
return hosts_info;
@@ -209,6 +214,7 @@ std::deque<ManifestHostInfo> RetrieveManifestInfo(
PrecacheQuota RetrieveQuotaInfo(
const base::WeakPtr<PrecacheDatabase>& precache_database) {
+ VLOG(9) << "RetrieveQuotaInfo";
PrecacheQuota quota;
if (precache_database) {
quota = precache_database->GetQuota();
@@ -363,30 +369,24 @@ void PrecacheFetcher::RecordCompletionStatistics(
base::TimeDelta::FromSeconds(1),
base::TimeDelta::FromHours(4), 50);
- // Number of manifests for which we have downloaded all resources.
- int manifests_completed =
- unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch;
+ int num_total_resources = unfinished_work.num_resource_urls();
+ int percent_completed =
+ num_total_resources == 0
bengr 2016/10/18 18:57:20 I wonder if we should be reporting num_total_resou
twifkak 2016/10/18 19:52:27 I'm amenable to that. My intuition is that this is
bengr 2016/10/18 21:50:31 sgtm
twifkak 2016/10/18 22:55:47 Done.
+ ? 0
+ : (100 * (static_cast<double>(num_total_resources -
+ remaining_resource_urls_to_fetch) /
+ num_total_resources));
- // If there are resource URLs left to fetch, the last manifest is not yet
- // completed.
- if (remaining_resource_urls_to_fetch > 0)
- --manifests_completed;
-
- DCHECK_GE(manifests_completed, 0);
- int percent_completed = unfinished_work.num_manifest_urls() == 0
- ? 0
- : (static_cast<double>(manifests_completed) /
- unfinished_work.num_manifest_urls() * 100);
+ VLOG(6) << "Percent completed: " << percent_completed;
UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
percent_completed);
- UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
- unfinished_work.total_bytes(),
- 1, kMaxResponseBytes, 100);
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
+ unfinished_work.total_bytes(), 1,
+ kMaxResponseBytes, 100);
UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
- unfinished_work.network_bytes(),
- 1, kMaxResponseBytes,
- 100);
+ unfinished_work.network_bytes(), 1,
+ kMaxResponseBytes, 100);
}
// static
@@ -399,6 +399,7 @@ PrecacheFetcher::PrecacheFetcher(
net::URLRequestContextGetter* request_context,
const GURL& config_url,
const std::string& manifest_url_prefix,
+ bool global_ranking,
std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
uint32_t experiment_id,
const base::WeakPtr<PrecacheDatabase>& precache_database,
@@ -407,6 +408,7 @@ PrecacheFetcher::PrecacheFetcher(
: request_context_(request_context),
config_url_(config_url),
manifest_url_prefix_(manifest_url_prefix),
+ global_ranking_(global_ranking),
precache_database_(precache_database),
db_task_runner_(std::move(db_task_runner)),
precache_delegate_(precache_delegate),
@@ -426,8 +428,10 @@ PrecacheFetcher::PrecacheFetcher(
// keeping track of the current resource index.
for (const auto& resource : unfinished_work->resource()) {
if (resource.has_url() && resource.has_top_host_name()) {
+ // Weight doesn't matter, as the resources have already been sorted by
+ // this point.
resources_to_fetch_.emplace_back(GURL(resource.url()),
- resource.top_host_name());
+ resource.top_host_name(), 0);
}
}
unfinished_work_ = std::move(unfinished_work);
@@ -446,28 +450,24 @@ std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
// If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and
// top hosts should be left as is in |unfinished_work_|.
unfinished_work_->clear_top_host();
- for (const auto& top_host : top_hosts_to_fetch_) {
+ for (const auto& top_host : top_hosts_fetching_)
+ unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
+ for (const auto& top_host : top_hosts_to_fetch_)
unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
- }
+ }
+ for (const auto& resource : resources_fetching_) {
+ auto new_resource = unfinished_work_->add_resource();
+ new_resource->set_url(resource.url.spec());
+ new_resource->set_top_host_name(resource.referrer);
}
for (const auto& resource : resources_to_fetch_) {
auto new_resource = unfinished_work_->add_resource();
- new_resource->set_url(resource.first.spec());
- new_resource->set_top_host_name(resource.second);
- }
- for (const auto& it : pool_.elements()) {
- const Fetcher* fetcher = it.first;
- GURL config_url =
- config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
- if (fetcher->is_resource_request()) {
- auto resource = unfinished_work_->add_resource();
- resource->set_url(fetcher->url().spec());
- resource->set_top_host_name(fetcher->referrer());
- } else if (fetcher->url() != config_url) {
- unfinished_work_->add_top_host()->set_hostname(fetcher->referrer());
- }
+ new_resource->set_url(resource.url.spec());
+ new_resource->set_top_host_name(resource.referrer);
}
+ top_hosts_fetching_.clear();
top_hosts_to_fetch_.clear();
+ resources_fetching_.clear();
resources_to_fetch_.clear();
pool_.DeleteAll();
return std::move(unfinished_work_);
@@ -498,40 +498,43 @@ void PrecacheFetcher::Start() {
void PrecacheFetcher::StartNextResourceFetch() {
DCHECK(unfinished_work_->has_config_settings());
while (!resources_to_fetch_.empty() && pool_.IsAvailable()) {
- const auto& resource = resources_to_fetch_.front();
+ ResourceInfo& resource = resources_to_fetch_.front();
const size_t max_bytes = std::min(
quota_.remaining(),
std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
unfinished_work_->config_settings().max_bytes_total() -
unfinished_work_->total_bytes()));
- VLOG(3) << "Fetching " << resource.first << " " << resource.second;
+ VLOG(3) << "Fetching " << resource.url << " " << resource.referrer;
pool_.Add(base::MakeUnique<Fetcher>(
- request_context_.get(), resource.first, resource.second,
+ request_context_.get(), resource.url, resource.referrer,
base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()),
true /* is_resource_request */, max_bytes));
+ resources_fetching_.push_back(std::move(resource));
resources_to_fetch_.pop_front();
}
}
-void PrecacheFetcher::StartNextManifestFetch() {
- if (top_hosts_to_fetch_.empty() || !pool_.IsAvailable())
- return;
-
- // We only fetch one manifest at a time to keep the size of
- // resources_to_fetch_ as small as possible.
- VLOG(3) << "Fetching " << top_hosts_to_fetch_.front().manifest_url;
- pool_.Add(base::MakeUnique<Fetcher>(
- request_context_.get(), top_hosts_to_fetch_.front().manifest_url,
- top_hosts_to_fetch_.front().hostname,
- base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()),
- false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
- top_hosts_to_fetch_.pop_front();
+void PrecacheFetcher::StartNextManifestFetches() {
+ // We fetch as many manifests at a time as possible, as we need all resource
+ // URLs in memory in order to rank them.
+ while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) {
+ ManifestHostInfo& top_host = top_hosts_to_fetch_.front();
+ VLOG(3) << "Fetching " << top_host.manifest_url;
bengr 2016/10/18 18:57:20 Please remove all VLOG statements.
twifkak 2016/10/18 19:52:27 Done.
+ pool_.Add(base::MakeUnique<Fetcher>(
+ request_context_.get(), top_host.manifest_url, top_host.hostname,
+ base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(),
+ top_host.visits),
+ false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
+ top_hosts_fetching_.push_back(std::move(top_host));
+ top_hosts_to_fetch_.pop_front();
+ }
}
void PrecacheFetcher::NotifyDone(
size_t remaining_manifest_urls_to_fetch,
size_t remaining_resource_urls_to_fetch) {
+ VLOG(9) << "NotifyDone";
RecordCompletionStatistics(*unfinished_work_,
remaining_manifest_urls_to_fetch,
remaining_resource_urls_to_fetch);
@@ -539,29 +542,21 @@ void PrecacheFetcher::NotifyDone(
}
void PrecacheFetcher::StartNextFetch() {
+ VLOG(9) << "StartNextFetch";
DCHECK(unfinished_work_->has_config_settings());
// If over the precache total size cap or daily quota, then stop prefetching.
if ((unfinished_work_->total_bytes() >
unfinished_work_->config_settings().max_bytes_total()) ||
quota_.remaining() == 0) {
- size_t pending_manifests_in_pool = 0;
- size_t pending_resources_in_pool = 0;
- for (const auto& element_pair : pool_.elements()) {
- const Fetcher* fetcher = element_pair.first;
- if (fetcher->is_resource_request())
- pending_resources_in_pool++;
- else if (fetcher->url() != config_url_)
- pending_manifests_in_pool++;
- }
pool_.DeleteAll();
- NotifyDone(top_hosts_to_fetch_.size() + pending_manifests_in_pool,
- resources_to_fetch_.size() + pending_resources_in_pool);
+ NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(),
+ resources_to_fetch_.size() + resources_fetching_.size());
return;
}
StartNextResourceFetch();
- StartNextManifestFetch();
+ StartNextManifestFetches();
if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() &&
pool_.IsEmpty()) {
// There are no more URLs to fetch, so end the precache cycle.
@@ -572,6 +567,7 @@ void PrecacheFetcher::StartNextFetch() {
}
void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
+ VLOG(9) << "OnConfigFetchComplete";
UpdateStats(source.response_bytes(), source.network_response_bytes());
if (source.network_url_fetcher() == nullptr) {
pool_.DeleteAll(); // Cancel any other ongoing request.
@@ -589,9 +585,7 @@ void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
void PrecacheFetcher::DetermineManifests() {
DCHECK(unfinished_work_->has_config_settings());
- std::vector<std::string> top_hosts_to_fetch;
- std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info(
- new std::deque<ManifestHostInfo>);
+ std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch;
// Keep track of manifest URLs that are being fetched, in order to elide
// duplicates.
std::set<base::StringPiece> seen_top_hosts;
@@ -602,7 +596,7 @@ void PrecacheFetcher::DetermineManifests() {
if (rank > unfinished_work_->config_settings().top_sites_count())
break;
if (seen_top_hosts.insert(host.hostname()).second)
- top_hosts_to_fetch.push_back(host.hostname());
+ top_hosts_to_fetch.emplace_back(host.hostname(), host.visits());
}
// Attempt to fetch manifests for starting hosts up to the maximum top sites
@@ -613,12 +607,15 @@ void PrecacheFetcher::DetermineManifests() {
if (resources_to_fetch_.empty()) {
for (const std::string& host :
unfinished_work_->config_settings().forced_site()) {
+ // We add a forced site with visits == 0, which means its resources will
+ // be downloaded last. TODO(twifkak): Consider removing support for
+ // forced_site.
if (seen_top_hosts.insert(host).second)
- top_hosts_to_fetch.push_back(host);
+ top_hosts_to_fetch.emplace_back(host, 0);
}
}
- // We only fetch one manifest at a time to keep the size of
- // resources_to_fetch_ as small as possible.
+ // We retrieve manifest usage and quota info from the local database before
+ // fetching the manifests.
PostTaskAndReplyWithResult(
db_task_runner_.get(), FROM_HERE,
base::Bind(&RetrieveManifestInfo, precache_database_,
@@ -628,6 +625,7 @@ void PrecacheFetcher::DetermineManifests() {
void PrecacheFetcher::OnManifestInfoRetrieved(
std::deque<ManifestHostInfo> manifests_info) {
+ VLOG(9) << "OnManifestInfoRetrieved";
const std::string prefix = manifest_url_prefix_.empty()
? GetDefaultManifestURLPrefix()
: manifest_url_prefix_;
@@ -636,7 +634,7 @@ void PrecacheFetcher::OnManifestInfoRetrieved(
// is invalid.
top_hosts_to_fetch_.clear();
unfinished_work_->set_num_manifest_urls(manifests_info.size());
- NotifyDone(manifests_info.size(), resources_to_fetch_.size());
+ NotifyDone(manifests_info.size(), resources_to_rank_.size());
return;
}
@@ -666,6 +664,7 @@ void PrecacheFetcher::OnManifestInfoRetrieved(
}
void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) {
+ VLOG(9) << "OnQuotaInfoRetrieved";
quota_ = quota;
base::Time time_now = base::Time::Now();
if (IsQuotaTimeExpired(quota_, time_now)) {
@@ -683,10 +682,12 @@ void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) {
ManifestHostInfo::ManifestHostInfo(int64_t manifest_id,
const std::string& hostname,
+ int64_t visits,
const std::string& used_url_hash,
const std::string& unused_url_hash)
: manifest_id(manifest_id),
hostname(hostname),
+ visits(visits),
used_url_hash(used_url_hash),
unused_url_hash(unused_url_hash) {}
@@ -696,7 +697,20 @@ ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default;
ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default;
-void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
+ResourceInfo::ResourceInfo(const GURL& url,
+ const std::string& referrer,
+ double weight)
+ : url(url), referrer(referrer), weight(weight) {}
+
+ResourceInfo::~ResourceInfo() {}
+
+ResourceInfo::ResourceInfo(ResourceInfo&&) = default;
+
+ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default;
+
+void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits,
+ const Fetcher& source) {
+ VLOG(9) << "OnManifestFetchComplete " << source.referrer();
DCHECK(unfinished_work_->has_config_settings());
UpdateStats(source.response_bytes(), source.network_response_bytes());
if (source.network_url_fetcher() == nullptr) {
@@ -705,9 +719,11 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
PrecacheManifest manifest;
if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) {
- const int32_t len =
- std::min(manifest.resource_size(),
- unfinished_work_->config_settings().top_resources_count());
+ int32_t len = manifest.resource_size();
+ if (!global_ranking_) {
+ len = std::min(
+ len, unfinished_work_->config_settings().top_resources_count());
+ }
const uint64_t resource_bitset =
GetResourceBitset(manifest, experiment_id_);
for (int i = 0; i < len; ++i) {
@@ -715,7 +731,10 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
manifest.resource(i).has_url()) {
GURL url(manifest.resource(i).url());
if (url.is_valid()) {
- resources_to_fetch_.emplace_back(url, source.referrer());
+ VLOG(9) << "Adding resource " << url.spec();
+ double weight = manifest.resource(i).weight_ratio() * host_visits;
+ if (weight >= unfinished_work_->config_settings().min_weight())
+ resources_to_rank_.emplace_back(url, source.referrer(), weight);
}
}
}
@@ -726,7 +745,36 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
}
}
+ top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) {
+ return top_host.manifest_url == source.url();
+ });
+
pool_.Delete(source);
+
+ if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty()) {
+ VLOG(9) << "Ranking resources.";
+ // Done fetching manifests. Now sort resources_to_rank_ into
bengr 2016/10/18 18:57:20 Not a requirement, but this seems like it's gettin
twifkak 2016/10/18 19:52:27 Okay, I extracted the inside of this if statement
+ // resources_to_fetch_, by descending weight. When StartNextFetch runs, it
+ // will begin fetching resources.
+ resources_to_fetch_ = std::move(resources_to_rank_);
+ if (global_ranking_) {
+ std::stable_sort(
+ resources_to_fetch_.begin(), resources_to_fetch_.end(),
+ [](const ResourceInfo& first, const ResourceInfo& second) {
+ return first.weight > second.weight;
+ });
+ }
+ // Truncate to size |total_resources_count|.
+ const size_t num_resources = std::min(
+ resources_to_fetch_.size(),
+ static_cast<size_t>(
+ unfinished_work_->config_settings().total_resources_count()));
+ resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources,
+ resources_to_fetch_.end());
+ // Save denominator for PercentCompleted UMA.
+ unfinished_work_->set_num_resource_urls(resources_to_fetch_.size());
+ }
+
StartNextFetch();
}
@@ -739,6 +787,10 @@ void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
source.url(), source.referrer(), base::Time::Now(),
source.was_cached(), source.response_bytes()));
+ resources_fetching_.remove_if([&source](const ResourceInfo& resource) {
+ return resource.url == source.url();
+ });
+
pool_.Delete(source);
// The resource has already been put in the cache during the fetch process, so
« no previous file with comments | « components/precache/core/precache_fetcher.h ('k') | components/precache/core/precache_fetcher_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698