Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(309)

Unified Diff: components/precache/core/precache_fetcher.cc

Issue 2229983002: Send the list of used and unused resources for precache (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/precache/core/precache_fetcher.cc
diff --git a/components/precache/core/precache_fetcher.cc b/components/precache/core/precache_fetcher.cc
index a5cf65086d2f665ced1263529f2e433788d25c9e..95dca302c0dd1579c3ce0d01a21a3781c7058989 100644
--- a/components/precache/core/precache_fetcher.cc
+++ b/components/precache/core/precache_fetcher.cc
@@ -4,12 +4,10 @@
#include "components/precache/core/precache_fetcher.h"
-#include <algorithm>
sclittle 2016/08/11 22:52:35 You still need <algorithm> for std::min.
Raj 2016/08/12 19:04:20 Done.
#include <limits>
-#include <string>
#include <utility>
-#include <vector>
+#include "base/base64.h"
#include "base/bind.h"
#include "base/bind_helpers.h"
#include "base/callback.h"
@@ -21,6 +19,9 @@
#include "base/memory/ptr_util.h"
#include "base/memory/ref_counted.h"
#include "base/metrics/histogram_macros.h"
+#include "base/sha1.h"
+#include "base/task_runner_util.h"
+#include "components/precache/core/precache_database.h"
#include "components/precache/core/precache_switches.h"
#include "components/precache/core/proto/precache.pb.h"
#include "components/precache/core/proto/unfinished_work.pb.h"
@@ -29,6 +30,7 @@
#include "net/base/io_buffer.h"
#include "net/base/load_flags.h"
#include "net/base/net_errors.h"
+#include "net/base/url_util.h"
#include "net/http/http_response_headers.h"
#include "net/url_request/url_fetcher_response_writer.h"
#include "net/url_request/url_request_context_getter.h"
@@ -90,15 +92,6 @@ std::string GetDefaultManifestURLPrefix() {
#endif
}
-// Construct the URL of the precache manifest for the given name (either host or
-// URL). The server is expecting a request for a URL consisting of the manifest
-// URL prefix followed by the doubly escaped name.
-std::string ConstructManifestURL(const std::string& prefix,
- const std::string& name) {
- return prefix + net::EscapeQueryParamValue(
- net::EscapeQueryParamValue(name, false), false);
-}
-
// Attempts to parse a protobuf message from the response string of a
// URLFetcher. If parsing is successful, the message parameter will contain the
// parsed protobuf and this function will return true. Otherwise, returns false.
@@ -159,18 +152,22 @@ class URLFetcherNullWriter : public net::URLFetcherResponseWriter {
}
};
-void AppendManifestURLIfValidAndNew(
- const std::string& prefix,
- const std::string& name,
- base::hash_set<std::string>* seen_manifest_urls,
- std::list<GURL>* unique_manifest_urls) {
- const std::string manifest_url = ConstructManifestURL(prefix, name);
- bool first_seen = seen_manifest_urls->insert(manifest_url).second;
- if (first_seen) {
- GURL url(manifest_url);
- if (url.is_valid())
- unique_manifest_urls->push_back(url);
+// Returns the base64 encoded resource URL hashes. The resource URLs are hashed
+// individually, and 8 bytes of each hash is appended together, which is then
+// encoded to base64.
+std::string GetResourceURLBase64Hash(const std::deque<GURL>& urls) {
+ std::string hashes;
sclittle 2016/08/11 22:52:35 nit: to avoid a bunch of reallocations, you could
Raj 2016/08/12 19:04:20 Done.
+ for (const auto& url : urls) {
+ std::string url_spec = url.spec();
sclittle 2016/08/11 22:52:35 nit: change to const std::string& to avoid string
Raj 2016/08/12 19:04:20 Done.
+ unsigned char sha1_hash[base::kSHA1Length];
+ base::SHA1HashBytes(
bengr 2016/08/11 18:49:14 test that this works on an empty string.
+ reinterpret_cast<const unsigned char*>(url_spec.c_str()),
+ url_spec.size(), sha1_hash);
+ // Each resource hash uses 8 bytes.
+ hashes.append(reinterpret_cast<const char*>(sha1_hash), 8);
sclittle 2016/08/11 22:52:35 Replace "8" with "arraysize(sha1_hash)".
Raj 2016/08/12 19:04:21 hmm. sha1 is actually 20 bytes. But we are using o
sclittle 2016/08/15 20:13:09 Oh, OK, that makes sense. Could you explain that h
}
+ base::Base64Encode(hashes, &hashes);
+ return hashes;
}
} // namespace
@@ -178,16 +175,20 @@ void AppendManifestURLIfValidAndNew(
PrecacheFetcher::Fetcher::Fetcher(
net::URLRequestContextGetter* request_context,
const GURL& url,
+ const std::string& referrer,
const base::Callback<void(const Fetcher&)>& callback,
bool is_resource_request,
size_t max_bytes)
: request_context_(request_context),
url_(url),
+ referrer_(referrer),
callback_(callback),
is_resource_request_(is_resource_request),
max_bytes_(max_bytes),
response_bytes_(0),
- network_response_bytes_(0) {
+ network_response_bytes_(0),
+ was_cached_(false) {
+ DCHECK(url.is_valid());
if (is_resource_request_)
LoadFromCache();
else
@@ -240,13 +241,13 @@ void PrecacheFetcher::Fetcher::OnURLFetchDownloadProgress(
VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total
<< ") is over " << max_bytes_;
- // Cancel the download.
- network_url_fetcher_.reset();
-
// Call the completion callback, to attempt the next download, or to trigger
// cleanup in precache_delegate_->OnDone().
response_bytes_ = network_response_bytes_ = current;
+ was_cached_ = source->WasCached();
+ // Cancel the download.
+ network_url_fetcher_.reset();
callback_.Run(*this);
}
}
@@ -276,6 +277,7 @@ void PrecacheFetcher::Fetcher::OnURLFetchComplete(
// Then Fetcher is done with this URL and can return control to the caller.
response_bytes_ = source->GetReceivedResponseContentLength();
network_response_bytes_ = source->GetTotalReceivedBytes();
+ was_cached_ = source->WasCached();
callback_.Run(*this);
}
@@ -326,10 +328,14 @@ PrecacheFetcher::PrecacheFetcher(
const std::string& manifest_url_prefix,
std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
uint32_t experiment_id,
+ base::WeakPtr<PrecacheDatabase> precache_database,
sclittle 2016/08/11 22:52:35 nit: pass by const ref to avoid extra Add/Remove r
Raj 2016/08/12 19:04:21 Done.
+ const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner,
PrecacheFetcher::PrecacheDelegate* precache_delegate)
: request_context_(request_context),
config_url_(config_url),
manifest_url_prefix_(manifest_url_prefix),
+ precache_database_(precache_database),
+ db_task_runner_(std::move(db_task_runner)),
precache_delegate_(precache_delegate),
pool_(kMaxParallelFetches),
experiment_id_(experiment_id) {
@@ -342,15 +348,12 @@ PrecacheFetcher::PrecacheFetcher(
<< "Could not determine the default precache manifest URL prefix.";
DCHECK(unfinished_work);
- // Copy manifests and resources to member variables as a convenience.
- // TODO(bengr): Consider accessing these directly from the proto.
Raj 2016/08/09 22:56:48 Since the proto datastructure does not support rem
- for (const auto& manifest : unfinished_work->manifest()) {
- if (manifest.has_url())
- manifest_urls_to_fetch_.push_back(GURL(manifest.url()));
- }
+ // Copy resources to member variable as a convenience.
sclittle 2016/08/11 22:52:35 You're already changing the member variables so mu
Raj 2016/08/12 19:04:20 hmm. That is a good suggestion. Added a TODO for n
for (const auto& resource : unfinished_work->resource()) {
- if (resource.has_url())
- resource_urls_to_fetch_.push_back(GURL(resource.url()));
+ if (resource.has_url() && resource.has_tophostname()) {
+ resources_to_fetch_.emplace_back(
+ std::make_pair(GURL(resource.url()), resource.tophostname()));
+ }
}
unfinished_work_ = std::move(unfinished_work);
}
@@ -363,21 +366,32 @@ std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
if (!unfinished_work_)
return nullptr;
- unfinished_work_->clear_manifest();
unfinished_work_->clear_resource();
- for (const auto& manifest : manifest_urls_to_fetch_)
- unfinished_work_->add_manifest()->set_url(manifest.spec());
- for (const auto& resource : resource_urls_to_fetch_)
- unfinished_work_->add_resource()->set_url(resource.spec());
+ if (top_hosts_to_fetch_) {
+ unfinished_work_->clear_top_host();
+ for (const auto& top_host : *top_hosts_to_fetch_) {
+ unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
+ }
+ }
+ for (const auto& resource : resources_to_fetch_) {
+ auto new_resource = unfinished_work_->add_resource();
+ new_resource->set_url(resource.first.spec());
+ new_resource->set_tophostname(resource.second);
+ }
for (const auto& it : pool_.elements()) {
const Fetcher* fetcher = it.first;
- if (fetcher->is_resource_request())
- unfinished_work_->add_resource()->set_url(fetcher->url().spec());
- else if (fetcher->url() != config_url_)
- unfinished_work_->add_manifest()->set_url(fetcher->url().spec());
+ GURL config_url =
+ config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
+ if (fetcher->is_resource_request()) {
+ auto resource = unfinished_work_->add_resource();
+ resource->set_url(fetcher->url().spec());
+ resource->set_tophostname(fetcher->referrer());
+ } else if (fetcher->url() != config_url) {
+ unfinished_work_->add_top_host()->set_hostname(fetcher->referrer());
+ }
}
- manifest_urls_to_fetch_.clear();
- resource_urls_to_fetch_.clear();
+ top_hosts_to_fetch_.reset();
+ resources_to_fetch_.clear();
pool_.DeleteAll();
return std::move(unfinished_work_);
}
@@ -399,44 +413,43 @@ void PrecacheFetcher::Start() {
DCHECK(pool_.IsEmpty()) << "All parallel requests should be available";
VLOG(3) << "Fetching " << config_url;
pool_.Add(base::WrapUnique(new Fetcher(
- request_context_.get(), config_url,
- base::Bind(&PrecacheFetcher::OnConfigFetchComplete,
- base::Unretained(this)),
+ request_context_.get(), config_url, std::string(),
+ base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()),
false /* is_resource_request */, std::numeric_limits<int32_t>::max())));
}
void PrecacheFetcher::StartNextResourceFetch() {
DCHECK(unfinished_work_->has_config_settings());
- while (!resource_urls_to_fetch_.empty() && pool_.IsAvailable()) {
+ while (!resources_to_fetch_.empty() && pool_.IsAvailable()) {
+ const auto& resource = resources_to_fetch_.front();
const size_t max_bytes =
std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
unfinished_work_->config_settings().max_bytes_total() -
unfinished_work_->total_bytes());
- VLOG(3) << "Fetching " << resource_urls_to_fetch_.front();
- pool_.Add(base::WrapUnique(
- new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(),
- base::Bind(&PrecacheFetcher::OnResourceFetchComplete,
- base::Unretained(this)),
- true /* is_resource_request */, max_bytes)));
-
- resource_urls_to_fetch_.pop_front();
+ VLOG(3) << "Fetching " << resource.first << " " << resource.second;
+ pool_.Add(base::WrapUnique(new Fetcher(
+ request_context_.get(), resource.first, resource.second,
+ base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()),
+ true /* is_resource_request */, max_bytes)));
+
+ resources_to_fetch_.pop_front();
}
}
void PrecacheFetcher::StartNextManifestFetch() {
- if (manifest_urls_to_fetch_.empty() || !pool_.IsAvailable())
+ if (!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty() ||
+ !pool_.IsAvailable())
return;
// We only fetch one manifest at a time to keep the size of
- // resource_urls_to_fetch_ as small as possible.
- VLOG(3) << "Fetching " << manifest_urls_to_fetch_.front();
+ // resources_to_fetch_ as small as possible.
+ VLOG(3) << "Fetching " << top_hosts_to_fetch_->front().manifest_url;
pool_.Add(base::WrapUnique(new Fetcher(
- request_context_.get(), manifest_urls_to_fetch_.front(),
- base::Bind(&PrecacheFetcher::OnManifestFetchComplete,
- base::Unretained(this)),
+ request_context_.get(), top_hosts_to_fetch_->front().manifest_url,
+ top_hosts_to_fetch_->front().hostname,
+ base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()),
false /* is_resource_request */, std::numeric_limits<int32_t>::max())));
-
- manifest_urls_to_fetch_.pop_front();
+ top_hosts_to_fetch_->pop_front();
}
void PrecacheFetcher::NotifyDone(
@@ -463,14 +476,17 @@ void PrecacheFetcher::StartNextFetch() {
pending_manifests_in_pool++;
}
pool_.DeleteAll();
- NotifyDone(manifest_urls_to_fetch_.size() + pending_manifests_in_pool,
- resource_urls_to_fetch_.size() + pending_resources_in_pool);
+ int pending_top_hosts =
+ top_hosts_to_fetch_ ? top_hosts_to_fetch_->size() : 0;
+ NotifyDone(pending_top_hosts + pending_manifests_in_pool,
+ resources_to_fetch_.size() + pending_resources_in_pool);
return;
}
StartNextResourceFetch();
StartNextManifestFetch();
- if (pool_.IsEmpty()) {
+ if ((!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty()) &&
+ resources_to_fetch_.empty() && pool_.IsEmpty()) {
// There are no more URLs to fetch, so end the precache cycle.
NotifyDone(0, 0);
// OnDone may have deleted this PrecacheFetcher, so don't do anything after
@@ -495,43 +511,110 @@ void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
void PrecacheFetcher::DetermineManifests() {
DCHECK(unfinished_work_->has_config_settings());
- std::string prefix = manifest_url_prefix_.empty()
- ? GetDefaultManifestURLPrefix()
- : manifest_url_prefix_;
- DCHECK_NE(std::string(), prefix)
- << "Could not determine the precache manifest URL prefix.";
-
- // Keep track of manifest URLs that are being fetched, in order to elide
- // duplicates.
- base::hash_set<std::string> seen_manifest_urls;
-
- // Attempt to fetch manifests for starting hosts up to the maximum top sites
- // count. If a manifest does not exist for a particular starting host, then
- // the fetch will fail, and that starting host will be ignored. Starting
- // hosts are not added if this is a continuation from a previous precache
- // session.
- if (manifest_urls_to_fetch_.empty() &&
- resource_urls_to_fetch_.empty()) {
- int64_t rank = 0;
- for (const auto& host : unfinished_work_->top_host()) {
- ++rank;
- if (rank > unfinished_work_->config_settings().top_sites_count())
- break;
- AppendManifestURLIfValidAndNew(prefix, host.hostname(),
- &seen_manifest_urls,
- &manifest_urls_to_fetch_);
- }
- for (const std::string& host
- : unfinished_work_->config_settings().forced_site()) {
- AppendManifestURLIfValidAndNew(prefix, host, &seen_manifest_urls,
- &manifest_urls_to_fetch_);
- }
+ // Keep track of manifest URLs that are being fetched, in order to elide
+ // duplicates.
+ std::set<std::string> seen_top_hosts;
sclittle 2016/08/11 22:52:35 You should move this into the below if-statement s
Raj 2016/08/12 19:04:20 Done.
+ std::unique_ptr<std::deque<std::string>> top_hosts_to_fetch(
sclittle 2016/08/11 22:52:35 You can remove the std::unique_ptr around this if
Raj 2016/08/12 19:04:20 Nice. It works. But top_hosts_to_fetch is a local
+ new std::deque<std::string>);
+ std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info(
+ new std::deque<ManifestHostInfo>);
+
+ // Attempt to fetch manifests for starting hosts up to the maximum top sites
+ // count. If a manifest does not exist for a particular starting host, then
+ // the fetch will fail, and that starting host will be ignored. Starting
+ // hosts are not added if this is a continuation from a previous precache
+ // session.
+ if (top_hosts_to_fetch->empty() && resources_to_fetch_.empty()) {
sclittle 2016/08/11 22:52:35 |top_hosts_to_fetch| will always be empty at this
Raj 2016/08/12 19:04:20 Yep.
+ int64_t rank = 0;
+ for (const auto& host : unfinished_work_->top_host()) {
+ ++rank;
+ if (rank > unfinished_work_->config_settings().top_sites_count())
+ break;
+ if (seen_top_hosts.insert(host.hostname()).second)
+ top_hosts_to_fetch->emplace_back(host.hostname());
sclittle 2016/08/11 22:52:35 nit: I think you can just use push_back here, it'l
Raj 2016/08/12 19:04:20 Done.
+ }
+
+ for (const std::string& host :
+ unfinished_work_->config_settings().forced_site()) {
+ if (seen_top_hosts.insert(host).second)
+ top_hosts_to_fetch->emplace_back(host);
sclittle 2016/08/11 22:52:35 nit: I think you can replace emplace_back with pus
Raj 2016/08/12 19:04:21 Done.
+ }
+ }
+ // We only fetch one manifest at a time to keep the size of
+ // resources_to_fetch_ as small as possible.
+ auto retrieve_manifest_callback =
+ base::Bind(&PrecacheFetcher::RetrieveManifestInfo, AsWeakPtr(),
+ base::Passed(&top_hosts_to_fetch), top_hosts_info.get());
+ db_task_runner_->PostTaskAndReply(
+ FROM_HERE, retrieve_manifest_callback,
+ base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr(),
+ base::Passed(&top_hosts_info)));
+}
+
+void PrecacheFetcher::RetrieveManifestInfo(
+ std::unique_ptr<std::deque<std::string>> hosts_to_fetch,
+ std::deque<ManifestHostInfo>* hosts_info) {
+ for (const auto& host : *hosts_to_fetch) {
+ auto referrer_host_info = precache_database_->GetReferrerHost(host);
+ if (referrer_host_info.id != PrecacheReferrerHostEntry::INVALID_ID) {
+ std::deque<GURL> used_urls, unused_urls;
sclittle 2016/08/11 22:52:35 nit: Can this just be an std::vector? Typically an
Raj 2016/08/12 19:04:20 Done.
+ precache_database_->GetURLListForReferrerHost(referrer_host_info.id,
+ used_urls, unused_urls);
+ hosts_info->emplace_back(ManifestHostInfo(
+ referrer_host_info.id, host, GetResourceURLBase64Hash(used_urls),
+ GetResourceURLBase64Hash(unused_urls)));
+ } else {
+ hosts_info->emplace_back(
+ ManifestHostInfo(PrecacheReferrerHostEntry::INVALID_ID, host,
+ std::string(), std::string()));
+ }
+ }
+}
+
+void PrecacheFetcher::OnManifestInfoRetrieved(
+ std::unique_ptr<std::deque<ManifestHostInfo>> manifests_info) {
+ DCHECK(manifests_info);
+ const std::string prefix = manifest_url_prefix_.empty()
+ ? GetDefaultManifestURLPrefix()
+ : manifest_url_prefix_;
+ top_hosts_to_fetch_.reset(new std::deque<ManifestHostInfo>());
+ for (auto& manifest : *manifests_info) {
+ GURL manifest_url(
+ prefix +
+ net::EscapeQueryParamValue(
+ net::EscapeQueryParamValue(manifest.hostname, false), false));
sclittle 2016/08/11 22:52:35 Can't you just add all the query params when you c
Raj 2016/08/12 19:04:20 I do not see any GURL constructor that takes query
sclittle 2016/08/15 20:13:09 Ok, this is probably fine then. I don't see a nice
+ if (manifest_url.is_valid() &&
+ manifest.manifest_id != PrecacheReferrerHostEntry::INVALID_ID) {
+ manifest_url = net::AppendOrReplaceQueryParameter(
+ manifest_url, "manifest", std::to_string(manifest.manifest_id));
+ manifest_url = net::AppendOrReplaceQueryParameter(
+ manifest_url, "used_resources", manifest.used_url_hash);
+ manifest_url = net::AppendOrReplaceQueryParameter(
+ manifest_url, "unused_resources", manifest.unused_url_hash);
+ DCHECK(manifest_url.is_valid());
}
- unfinished_work_->set_num_manifest_urls(manifest_urls_to_fetch_.size());
- StartNextFetch();
+ manifest.manifest_url = manifest_url;
+ if (manifest_url.is_valid())
+ top_hosts_to_fetch_->emplace_back(manifest);
+ }
+ unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_->size());
+ StartNextFetch();
}
+ManifestHostInfo::ManifestHostInfo(int64_t manifest_id,
+ const std::string& hostname,
+ const std::string& used_url_hash,
+ const std::string& unused_url_hash)
+ : manifest_id(manifest_id),
+ hostname(hostname),
+ used_url_hash(used_url_hash),
+ unused_url_hash(unused_url_hash) {}
+
+ManifestHostInfo::~ManifestHostInfo() {}
+
+ManifestHostInfo::ManifestHostInfo(const ManifestHostInfo& other) = default;
+
void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
DCHECK(unfinished_work_->has_config_settings());
UpdateStats(source.response_bytes(), source.network_response_bytes());
@@ -550,10 +633,17 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
if (((0x1ULL << i) & resource_bitset) &&
manifest.resource(i).has_url()) {
GURL url(manifest.resource(i).url());
- if (url.is_valid())
- resource_urls_to_fetch_.push_back(url);
+ if (url.is_valid()) {
+ resources_to_fetch_.emplace_back(
+ std::make_pair(url, source.referrer()));
+ }
}
}
+ db_task_runner_->PostTask(
+ FROM_HERE,
+ base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost,
+ precache_database_, source.referrer(),
+ manifest.id().timestamp().seconds(), base::Time::Now()));
}
}
@@ -563,7 +653,15 @@ void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
UpdateStats(source.response_bytes(), source.network_response_bytes());
+
+ db_task_runner_->PostTask(
+ FROM_HERE,
+ base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_,
+ source.url(), source.referrer(), base::Time::Now(),
+ source.was_cached(), source.response_bytes()));
+
pool_.Delete(source);
+
// The resource has already been put in the cache during the fetch process, so
// nothing more needs to be done for the resource.
StartNextFetch();

Powered by Google App Engine
This is Rietveld 408576698