Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/precache/core/precache_fetcher.h" | 5 #include "components/precache/core/precache_fetcher.h" |
| 6 | 6 |
| 7 #include <algorithm> | |
|
sclittle
2016/08/11 22:52:35
You still need <algorithm> for std::min.
Raj
2016/08/12 19:04:20
Done.
| |
| 8 #include <limits> | 7 #include <limits> |
| 9 #include <string> | |
| 10 #include <utility> | 8 #include <utility> |
| 11 #include <vector> | |
| 12 | 9 |
| 10 #include "base/base64.h" | |
| 13 #include "base/bind.h" | 11 #include "base/bind.h" |
| 14 #include "base/bind_helpers.h" | 12 #include "base/bind_helpers.h" |
| 15 #include "base/callback.h" | 13 #include "base/callback.h" |
| 16 #include "base/command_line.h" | 14 #include "base/command_line.h" |
| 17 #include "base/compiler_specific.h" | 15 #include "base/compiler_specific.h" |
| 18 #include "base/containers/hash_tables.h" | 16 #include "base/containers/hash_tables.h" |
| 19 #include "base/location.h" | 17 #include "base/location.h" |
| 20 #include "base/logging.h" | 18 #include "base/logging.h" |
| 21 #include "base/memory/ptr_util.h" | 19 #include "base/memory/ptr_util.h" |
| 22 #include "base/memory/ref_counted.h" | 20 #include "base/memory/ref_counted.h" |
| 23 #include "base/metrics/histogram_macros.h" | 21 #include "base/metrics/histogram_macros.h" |
| 22 #include "base/sha1.h" | |
| 23 #include "base/task_runner_util.h" | |
| 24 #include "components/precache/core/precache_database.h" | |
| 24 #include "components/precache/core/precache_switches.h" | 25 #include "components/precache/core/precache_switches.h" |
| 25 #include "components/precache/core/proto/precache.pb.h" | 26 #include "components/precache/core/proto/precache.pb.h" |
| 26 #include "components/precache/core/proto/unfinished_work.pb.h" | 27 #include "components/precache/core/proto/unfinished_work.pb.h" |
| 27 #include "net/base/completion_callback.h" | 28 #include "net/base/completion_callback.h" |
| 28 #include "net/base/escape.h" | 29 #include "net/base/escape.h" |
| 29 #include "net/base/io_buffer.h" | 30 #include "net/base/io_buffer.h" |
| 30 #include "net/base/load_flags.h" | 31 #include "net/base/load_flags.h" |
| 31 #include "net/base/net_errors.h" | 32 #include "net/base/net_errors.h" |
| 33 #include "net/base/url_util.h" | |
| 32 #include "net/http/http_response_headers.h" | 34 #include "net/http/http_response_headers.h" |
| 33 #include "net/url_request/url_fetcher_response_writer.h" | 35 #include "net/url_request/url_fetcher_response_writer.h" |
| 34 #include "net/url_request/url_request_context_getter.h" | 36 #include "net/url_request/url_request_context_getter.h" |
| 35 #include "net/url_request/url_request_status.h" | 37 #include "net/url_request/url_request_status.h" |
| 36 | 38 |
| 37 namespace precache { | 39 namespace precache { |
| 38 | 40 |
| 39 // The following flags are for privacy reasons. For example, if a user clears | 41 // The following flags are for privacy reasons. For example, if a user clears |
| 40 // their cookies, but a tracking beacon is prefetched and the beacon specifies | 42 // their cookies, but a tracking beacon is prefetched and the beacon specifies |
| 41 // its source URL in a URL param, the beacon site would be able to rebuild a | 43 // its source URL in a URL param, the beacon site would be able to rebuild a |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 83 | 85 |
| 84 #if defined(PRECACHE_MANIFEST_URL_PREFIX) | 86 #if defined(PRECACHE_MANIFEST_URL_PREFIX) |
| 85 return PRECACHE_MANIFEST_URL_PREFIX; | 87 return PRECACHE_MANIFEST_URL_PREFIX; |
| 86 #else | 88 #else |
| 87 // The precache manifest URL prefix could not be determined, so return an | 89 // The precache manifest URL prefix could not be determined, so return an |
| 88 // empty string. | 90 // empty string. |
| 89 return std::string(); | 91 return std::string(); |
| 90 #endif | 92 #endif |
| 91 } | 93 } |
| 92 | 94 |
| 93 // Construct the URL of the precache manifest for the given name (either host or | |
| 94 // URL). The server is expecting a request for a URL consisting of the manifest | |
| 95 // URL prefix followed by the doubly escaped name. | |
| 96 std::string ConstructManifestURL(const std::string& prefix, | |
| 97 const std::string& name) { | |
| 98 return prefix + net::EscapeQueryParamValue( | |
| 99 net::EscapeQueryParamValue(name, false), false); | |
| 100 } | |
| 101 | |
| 102 // Attempts to parse a protobuf message from the response string of a | 95 // Attempts to parse a protobuf message from the response string of a |
| 103 // URLFetcher. If parsing is successful, the message parameter will contain the | 96 // URLFetcher. If parsing is successful, the message parameter will contain the |
| 104 // parsed protobuf and this function will return true. Otherwise, returns false. | 97 // parsed protobuf and this function will return true. Otherwise, returns false. |
| 105 bool ParseProtoFromFetchResponse(const net::URLFetcher& source, | 98 bool ParseProtoFromFetchResponse(const net::URLFetcher& source, |
| 106 ::google::protobuf::MessageLite* message) { | 99 ::google::protobuf::MessageLite* message) { |
| 107 std::string response_string; | 100 std::string response_string; |
| 108 | 101 |
| 109 if (!source.GetStatus().is_success()) { | 102 if (!source.GetStatus().is_success()) { |
| 110 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec(); | 103 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec(); |
| 111 return false; | 104 return false; |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 152 int num_bytes, | 145 int num_bytes, |
| 153 const net::CompletionCallback& callback) override { | 146 const net::CompletionCallback& callback) override { |
| 154 return num_bytes; | 147 return num_bytes; |
| 155 } | 148 } |
| 156 | 149 |
| 157 int Finish(const net::CompletionCallback& callback) override { | 150 int Finish(const net::CompletionCallback& callback) override { |
| 158 return net::OK; | 151 return net::OK; |
| 159 } | 152 } |
| 160 }; | 153 }; |
| 161 | 154 |
| 162 void AppendManifestURLIfValidAndNew( | 155 // Returns the base64 encoded resource URL hashes. The resource URLs are hashed |
| 163 const std::string& prefix, | 156 // individually, and 8 bytes of each hash is appended together, which is then |
| 164 const std::string& name, | 157 // encoded to base64. |
| 165 base::hash_set<std::string>* seen_manifest_urls, | 158 std::string GetResourceURLBase64Hash(const std::deque<GURL>& urls) { |
| 166 std::list<GURL>* unique_manifest_urls) { | 159 std::string hashes; |
|
sclittle
2016/08/11 22:52:35
nit: to avoid a bunch of reallocations, you could
Raj
2016/08/12 19:04:20
Done.
| |
| 167 const std::string manifest_url = ConstructManifestURL(prefix, name); | 160 for (const auto& url : urls) { |
| 168 bool first_seen = seen_manifest_urls->insert(manifest_url).second; | 161 std::string url_spec = url.spec(); |
|
sclittle
2016/08/11 22:52:35
nit: change to const std::string& to avoid string
Raj
2016/08/12 19:04:20
Done.
| |
| 169 if (first_seen) { | 162 unsigned char sha1_hash[base::kSHA1Length]; |
| 170 GURL url(manifest_url); | 163 base::SHA1HashBytes( |
|
bengr
2016/08/11 18:49:14
test that this works on an empty string.
| |
| 171 if (url.is_valid()) | 164 reinterpret_cast<const unsigned char*>(url_spec.c_str()), |
| 172 unique_manifest_urls->push_back(url); | 165 url_spec.size(), sha1_hash); |
| 166 // Each resource hash uses 8 bytes. | |
| 167 hashes.append(reinterpret_cast<const char*>(sha1_hash), 8); | |
|
sclittle
2016/08/11 22:52:35
Replace "8" with "arraysize(sha1_hash)".
Raj
2016/08/12 19:04:21
hmm. sha1 is actually 20 bytes. But we are using o
sclittle
2016/08/15 20:13:09
Oh, OK, that makes sense. Could you explain that h
| |
| 173 } | 168 } |
| 169 base::Base64Encode(hashes, &hashes); | |
| 170 return hashes; | |
| 174 } | 171 } |
| 175 | 172 |
| 176 } // namespace | 173 } // namespace |
| 177 | 174 |
| 178 PrecacheFetcher::Fetcher::Fetcher( | 175 PrecacheFetcher::Fetcher::Fetcher( |
| 179 net::URLRequestContextGetter* request_context, | 176 net::URLRequestContextGetter* request_context, |
| 180 const GURL& url, | 177 const GURL& url, |
| 178 const std::string& referrer, | |
| 181 const base::Callback<void(const Fetcher&)>& callback, | 179 const base::Callback<void(const Fetcher&)>& callback, |
| 182 bool is_resource_request, | 180 bool is_resource_request, |
| 183 size_t max_bytes) | 181 size_t max_bytes) |
| 184 : request_context_(request_context), | 182 : request_context_(request_context), |
| 185 url_(url), | 183 url_(url), |
| 184 referrer_(referrer), | |
| 186 callback_(callback), | 185 callback_(callback), |
| 187 is_resource_request_(is_resource_request), | 186 is_resource_request_(is_resource_request), |
| 188 max_bytes_(max_bytes), | 187 max_bytes_(max_bytes), |
| 189 response_bytes_(0), | 188 response_bytes_(0), |
| 190 network_response_bytes_(0) { | 189 network_response_bytes_(0), |
| 190 was_cached_(false) { | |
| 191 DCHECK(url.is_valid()); | |
| 191 if (is_resource_request_) | 192 if (is_resource_request_) |
| 192 LoadFromCache(); | 193 LoadFromCache(); |
| 193 else | 194 else |
| 194 LoadFromNetwork(); | 195 LoadFromNetwork(); |
| 195 } | 196 } |
| 196 | 197 |
| 197 PrecacheFetcher::Fetcher::~Fetcher() {} | 198 PrecacheFetcher::Fetcher::~Fetcher() {} |
| 198 | 199 |
| 199 void PrecacheFetcher::Fetcher::LoadFromCache() { | 200 void PrecacheFetcher::Fetcher::LoadFromCache() { |
| 200 fetch_stage_ = FetchStage::CACHE; | 201 fetch_stage_ = FetchStage::CACHE; |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 233 const net::URLFetcher* source, | 234 const net::URLFetcher* source, |
| 234 int64_t current, | 235 int64_t current, |
| 235 int64_t total) { | 236 int64_t total) { |
| 236 // If going over the per-resource download cap. | 237 // If going over the per-resource download cap. |
| 237 if (fetch_stage_ == FetchStage::NETWORK && | 238 if (fetch_stage_ == FetchStage::NETWORK && |
| 238 // |current| is guaranteed to be non-negative, so this cast is safe. | 239 // |current| is guaranteed to be non-negative, so this cast is safe. |
| 239 static_cast<size_t>(std::max(current, total)) > max_bytes_) { | 240 static_cast<size_t>(std::max(current, total)) > max_bytes_) { |
| 240 VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total | 241 VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total |
| 241 << ") is over " << max_bytes_; | 242 << ") is over " << max_bytes_; |
| 242 | 243 |
| 243 // Cancel the download. | |
| 244 network_url_fetcher_.reset(); | |
| 245 | |
| 246 // Call the completion callback, to attempt the next download, or to trigger | 244 // Call the completion callback, to attempt the next download, or to trigger |
| 247 // cleanup in precache_delegate_->OnDone(). | 245 // cleanup in precache_delegate_->OnDone(). |
| 248 response_bytes_ = network_response_bytes_ = current; | 246 response_bytes_ = network_response_bytes_ = current; |
| 247 was_cached_ = source->WasCached(); | |
| 249 | 248 |
| 249 // Cancel the download. | |
| 250 network_url_fetcher_.reset(); | |
| 250 callback_.Run(*this); | 251 callback_.Run(*this); |
| 251 } | 252 } |
| 252 } | 253 } |
| 253 | 254 |
| 254 void PrecacheFetcher::Fetcher::OnURLFetchComplete( | 255 void PrecacheFetcher::Fetcher::OnURLFetchComplete( |
| 255 const net::URLFetcher* source) { | 256 const net::URLFetcher* source) { |
| 256 CHECK(source); | 257 CHECK(source); |
| 257 if (fetch_stage_ == FetchStage::CACHE && | 258 if (fetch_stage_ == FetchStage::CACHE && |
| 258 (source->GetStatus().error() == net::ERR_CACHE_MISS || | 259 (source->GetStatus().error() == net::ERR_CACHE_MISS || |
| 259 (source->GetResponseHeaders() && | 260 (source->GetResponseHeaders() && |
| 260 source->GetResponseHeaders()->HasValidators()))) { | 261 source->GetResponseHeaders()->HasValidators()))) { |
| 261 // If the resource was not found in the cache, request it from the | 262 // If the resource was not found in the cache, request it from the |
| 262 // network. | 263 // network. |
| 263 // | 264 // |
| 264 // If the resource was found in the cache, but contains validators, | 265 // If the resource was found in the cache, but contains validators, |
| 265 // request a refresh. The presence of validators increases the chance that | 266 // request a refresh. The presence of validators increases the chance that |
| 266 // we get a 304 response rather than a full one, thus allowing us to | 267 // we get a 304 response rather than a full one, thus allowing us to |
| 267 // refresh the cache with minimal network load. | 268 // refresh the cache with minimal network load. |
| 268 LoadFromNetwork(); | 269 LoadFromNetwork(); |
| 269 return; | 270 return; |
| 270 } | 271 } |
| 271 | 272 |
| 272 // If any of: | 273 // If any of: |
| 273 // - The request was for a config or manifest. | 274 // - The request was for a config or manifest. |
| 274 // - The resource was a cache hit without validators. | 275 // - The resource was a cache hit without validators. |
| 275 // - The response came from the network. | 276 // - The response came from the network. |
| 276 // Then Fetcher is done with this URL and can return control to the caller. | 277 // Then Fetcher is done with this URL and can return control to the caller. |
| 277 response_bytes_ = source->GetReceivedResponseContentLength(); | 278 response_bytes_ = source->GetReceivedResponseContentLength(); |
| 278 network_response_bytes_ = source->GetTotalReceivedBytes(); | 279 network_response_bytes_ = source->GetTotalReceivedBytes(); |
| 280 was_cached_ = source->WasCached(); | |
| 279 callback_.Run(*this); | 281 callback_.Run(*this); |
| 280 } | 282 } |
| 281 | 283 |
| 282 // static | 284 // static |
| 283 void PrecacheFetcher::RecordCompletionStatistics( | 285 void PrecacheFetcher::RecordCompletionStatistics( |
| 284 const PrecacheUnfinishedWork& unfinished_work, | 286 const PrecacheUnfinishedWork& unfinished_work, |
| 285 size_t remaining_manifest_urls_to_fetch, | 287 size_t remaining_manifest_urls_to_fetch, |
| 286 size_t remaining_resource_urls_to_fetch) { | 288 size_t remaining_resource_urls_to_fetch) { |
| 287 // These may be unset in tests. | 289 // These may be unset in tests. |
| 288 if (!unfinished_work.has_start_time()) | 290 if (!unfinished_work.has_start_time()) |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 319 1, kMaxResponseBytes, | 321 1, kMaxResponseBytes, |
| 320 100); | 322 100); |
| 321 } | 323 } |
| 322 | 324 |
| 323 PrecacheFetcher::PrecacheFetcher( | 325 PrecacheFetcher::PrecacheFetcher( |
| 324 net::URLRequestContextGetter* request_context, | 326 net::URLRequestContextGetter* request_context, |
| 325 const GURL& config_url, | 327 const GURL& config_url, |
| 326 const std::string& manifest_url_prefix, | 328 const std::string& manifest_url_prefix, |
| 327 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, | 329 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, |
| 328 uint32_t experiment_id, | 330 uint32_t experiment_id, |
| 331 base::WeakPtr<PrecacheDatabase> precache_database, | |
|
sclittle
2016/08/11 22:52:35
nit: pass by const ref to avoid extra Add/Remove r
Raj
2016/08/12 19:04:21
Done.
| |
| 332 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner, | |
| 329 PrecacheFetcher::PrecacheDelegate* precache_delegate) | 333 PrecacheFetcher::PrecacheDelegate* precache_delegate) |
| 330 : request_context_(request_context), | 334 : request_context_(request_context), |
| 331 config_url_(config_url), | 335 config_url_(config_url), |
| 332 manifest_url_prefix_(manifest_url_prefix), | 336 manifest_url_prefix_(manifest_url_prefix), |
| 337 precache_database_(precache_database), | |
| 338 db_task_runner_(std::move(db_task_runner)), | |
| 333 precache_delegate_(precache_delegate), | 339 precache_delegate_(precache_delegate), |
| 334 pool_(kMaxParallelFetches), | 340 pool_(kMaxParallelFetches), |
| 335 experiment_id_(experiment_id) { | 341 experiment_id_(experiment_id) { |
| 336 DCHECK(request_context_.get()); // Request context must be non-NULL. | 342 DCHECK(request_context_.get()); // Request context must be non-NULL. |
| 337 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. | 343 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. |
| 338 | 344 |
| 339 DCHECK_NE(GURL(), GetDefaultConfigURL()) | 345 DCHECK_NE(GURL(), GetDefaultConfigURL()) |
| 340 << "Could not determine the precache config settings URL."; | 346 << "Could not determine the precache config settings URL."; |
| 341 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) | 347 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) |
| 342 << "Could not determine the default precache manifest URL prefix."; | 348 << "Could not determine the default precache manifest URL prefix."; |
| 343 DCHECK(unfinished_work); | 349 DCHECK(unfinished_work); |
| 344 | 350 |
| 345 // Copy manifests and resources to member variables as a convenience. | 351 // Copy resources to member variable as a convenience. |
|
sclittle
2016/08/11 22:52:35
You're already changing the member variables so mu
Raj
2016/08/12 19:04:20
hmm. That is a good suggestion.
Added a TODO for n
| |
| 346 // TODO(bengr): Consider accessing these directly from the proto. | |
|
Raj
2016/08/09 22:56:48
Since the proto datastructure does not support rem
| |
| 347 for (const auto& manifest : unfinished_work->manifest()) { | |
| 348 if (manifest.has_url()) | |
| 349 manifest_urls_to_fetch_.push_back(GURL(manifest.url())); | |
| 350 } | |
| 351 for (const auto& resource : unfinished_work->resource()) { | 352 for (const auto& resource : unfinished_work->resource()) { |
| 352 if (resource.has_url()) | 353 if (resource.has_url() && resource.has_tophostname()) { |
| 353 resource_urls_to_fetch_.push_back(GURL(resource.url())); | 354 resources_to_fetch_.emplace_back( |
| 355 std::make_pair(GURL(resource.url()), resource.tophostname())); | |
| 356 } | |
| 354 } | 357 } |
| 355 unfinished_work_ = std::move(unfinished_work); | 358 unfinished_work_ = std::move(unfinished_work); |
| 356 } | 359 } |
| 357 | 360 |
| 358 PrecacheFetcher::~PrecacheFetcher() { | 361 PrecacheFetcher::~PrecacheFetcher() { |
| 359 } | 362 } |
| 360 | 363 |
| 361 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { | 364 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { |
| 362 // This could get called multiple times, and it should be handled gracefully. | 365 // This could get called multiple times, and it should be handled gracefully. |
| 363 if (!unfinished_work_) | 366 if (!unfinished_work_) |
| 364 return nullptr; | 367 return nullptr; |
| 365 | 368 |
| 366 unfinished_work_->clear_manifest(); | |
| 367 unfinished_work_->clear_resource(); | 369 unfinished_work_->clear_resource(); |
| 368 for (const auto& manifest : manifest_urls_to_fetch_) | 370 if (top_hosts_to_fetch_) { |
| 369 unfinished_work_->add_manifest()->set_url(manifest.spec()); | 371 unfinished_work_->clear_top_host(); |
| 370 for (const auto& resource : resource_urls_to_fetch_) | 372 for (const auto& top_host : *top_hosts_to_fetch_) { |
| 371 unfinished_work_->add_resource()->set_url(resource.spec()); | 373 unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
| 374 } | |
| 375 } | |
| 376 for (const auto& resource : resources_to_fetch_) { | |
| 377 auto new_resource = unfinished_work_->add_resource(); | |
| 378 new_resource->set_url(resource.first.spec()); | |
| 379 new_resource->set_tophostname(resource.second); | |
| 380 } | |
| 372 for (const auto& it : pool_.elements()) { | 381 for (const auto& it : pool_.elements()) { |
| 373 const Fetcher* fetcher = it.first; | 382 const Fetcher* fetcher = it.first; |
| 374 if (fetcher->is_resource_request()) | 383 GURL config_url = |
| 375 unfinished_work_->add_resource()->set_url(fetcher->url().spec()); | 384 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
| 376 else if (fetcher->url() != config_url_) | 385 if (fetcher->is_resource_request()) { |
| 377 unfinished_work_->add_manifest()->set_url(fetcher->url().spec()); | 386 auto resource = unfinished_work_->add_resource(); |
| 387 resource->set_url(fetcher->url().spec()); | |
| 388 resource->set_tophostname(fetcher->referrer()); | |
| 389 } else if (fetcher->url() != config_url) { | |
| 390 unfinished_work_->add_top_host()->set_hostname(fetcher->referrer()); | |
| 391 } | |
| 378 } | 392 } |
| 379 manifest_urls_to_fetch_.clear(); | 393 top_hosts_to_fetch_.reset(); |
| 380 resource_urls_to_fetch_.clear(); | 394 resources_to_fetch_.clear(); |
| 381 pool_.DeleteAll(); | 395 pool_.DeleteAll(); |
| 382 return std::move(unfinished_work_); | 396 return std::move(unfinished_work_); |
| 383 } | 397 } |
| 384 | 398 |
| 385 void PrecacheFetcher::Start() { | 399 void PrecacheFetcher::Start() { |
| 386 if (unfinished_work_->has_config_settings()) { | 400 if (unfinished_work_->has_config_settings()) { |
| 387 DCHECK(unfinished_work_->has_start_time()); | 401 DCHECK(unfinished_work_->has_start_time()); |
| 388 DetermineManifests(); | 402 DetermineManifests(); |
| 389 return; | 403 return; |
| 390 } | 404 } |
| 391 | 405 |
| 392 GURL config_url = | 406 GURL config_url = |
| 393 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; | 407 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
| 394 | 408 |
| 395 DCHECK(config_url.is_valid()) << "Config URL not valid: " | 409 DCHECK(config_url.is_valid()) << "Config URL not valid: " |
| 396 << config_url.possibly_invalid_spec(); | 410 << config_url.possibly_invalid_spec(); |
| 397 | 411 |
| 398 // Fetch the precache configuration settings from the server. | 412 // Fetch the precache configuration settings from the server. |
| 399 DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; | 413 DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; |
| 400 VLOG(3) << "Fetching " << config_url; | 414 VLOG(3) << "Fetching " << config_url; |
| 401 pool_.Add(base::WrapUnique(new Fetcher( | 415 pool_.Add(base::WrapUnique(new Fetcher( |
| 402 request_context_.get(), config_url, | 416 request_context_.get(), config_url, std::string(), |
| 403 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, | 417 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), |
| 404 base::Unretained(this)), | |
| 405 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); | 418 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); |
| 406 } | 419 } |
| 407 | 420 |
| 408 void PrecacheFetcher::StartNextResourceFetch() { | 421 void PrecacheFetcher::StartNextResourceFetch() { |
| 409 DCHECK(unfinished_work_->has_config_settings()); | 422 DCHECK(unfinished_work_->has_config_settings()); |
| 410 while (!resource_urls_to_fetch_.empty() && pool_.IsAvailable()) { | 423 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { |
| 424 const auto& resource = resources_to_fetch_.front(); | |
| 411 const size_t max_bytes = | 425 const size_t max_bytes = |
| 412 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), | 426 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), |
| 413 unfinished_work_->config_settings().max_bytes_total() - | 427 unfinished_work_->config_settings().max_bytes_total() - |
| 414 unfinished_work_->total_bytes()); | 428 unfinished_work_->total_bytes()); |
| 415 VLOG(3) << "Fetching " << resource_urls_to_fetch_.front(); | 429 VLOG(3) << "Fetching " << resource.first << " " << resource.second; |
| 416 pool_.Add(base::WrapUnique( | 430 pool_.Add(base::WrapUnique(new Fetcher( |
| 417 new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(), | 431 request_context_.get(), resource.first, resource.second, |
| 418 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, | 432 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), |
| 419 base::Unretained(this)), | 433 true /* is_resource_request */, max_bytes))); |
| 420 true /* is_resource_request */, max_bytes))); | |
| 421 | 434 |
| 422 resource_urls_to_fetch_.pop_front(); | 435 resources_to_fetch_.pop_front(); |
| 423 } | 436 } |
| 424 } | 437 } |
| 425 | 438 |
| 426 void PrecacheFetcher::StartNextManifestFetch() { | 439 void PrecacheFetcher::StartNextManifestFetch() { |
| 427 if (manifest_urls_to_fetch_.empty() || !pool_.IsAvailable()) | 440 if (!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty() || |
| 441 !pool_.IsAvailable()) | |
| 428 return; | 442 return; |
| 429 | 443 |
| 430 // We only fetch one manifest at a time to keep the size of | 444 // We only fetch one manifest at a time to keep the size of |
| 431 // resource_urls_to_fetch_ as small as possible. | 445 // resources_to_fetch_ as small as possible. |
| 432 VLOG(3) << "Fetching " << manifest_urls_to_fetch_.front(); | 446 VLOG(3) << "Fetching " << top_hosts_to_fetch_->front().manifest_url; |
| 433 pool_.Add(base::WrapUnique(new Fetcher( | 447 pool_.Add(base::WrapUnique(new Fetcher( |
| 434 request_context_.get(), manifest_urls_to_fetch_.front(), | 448 request_context_.get(), top_hosts_to_fetch_->front().manifest_url, |
| 435 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, | 449 top_hosts_to_fetch_->front().hostname, |
| 436 base::Unretained(this)), | 450 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()), |
| 437 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); | 451 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); |
| 438 | 452 top_hosts_to_fetch_->pop_front(); |
| 439 manifest_urls_to_fetch_.pop_front(); | |
| 440 } | 453 } |
| 441 | 454 |
| 442 void PrecacheFetcher::NotifyDone( | 455 void PrecacheFetcher::NotifyDone( |
| 443 size_t remaining_manifest_urls_to_fetch, | 456 size_t remaining_manifest_urls_to_fetch, |
| 444 size_t remaining_resource_urls_to_fetch) { | 457 size_t remaining_resource_urls_to_fetch) { |
| 445 RecordCompletionStatistics(*unfinished_work_, | 458 RecordCompletionStatistics(*unfinished_work_, |
| 446 remaining_manifest_urls_to_fetch, | 459 remaining_manifest_urls_to_fetch, |
| 447 remaining_resource_urls_to_fetch); | 460 remaining_resource_urls_to_fetch); |
| 448 precache_delegate_->OnDone(); | 461 precache_delegate_->OnDone(); |
| 449 } | 462 } |
| 450 | 463 |
| 451 void PrecacheFetcher::StartNextFetch() { | 464 void PrecacheFetcher::StartNextFetch() { |
| 452 DCHECK(unfinished_work_->has_config_settings()); | 465 DCHECK(unfinished_work_->has_config_settings()); |
| 453 // If over the precache total size cap, then stop prefetching. | 466 // If over the precache total size cap, then stop prefetching. |
| 454 if (unfinished_work_->total_bytes() > | 467 if (unfinished_work_->total_bytes() > |
| 455 unfinished_work_->config_settings().max_bytes_total()) { | 468 unfinished_work_->config_settings().max_bytes_total()) { |
| 456 size_t pending_manifests_in_pool = 0; | 469 size_t pending_manifests_in_pool = 0; |
| 457 size_t pending_resources_in_pool = 0; | 470 size_t pending_resources_in_pool = 0; |
| 458 for (const auto& element_pair : pool_.elements()) { | 471 for (const auto& element_pair : pool_.elements()) { |
| 459 const Fetcher* fetcher = element_pair.first; | 472 const Fetcher* fetcher = element_pair.first; |
| 460 if (fetcher->is_resource_request()) | 473 if (fetcher->is_resource_request()) |
| 461 pending_resources_in_pool++; | 474 pending_resources_in_pool++; |
| 462 else if (fetcher->url() != config_url_) | 475 else if (fetcher->url() != config_url_) |
| 463 pending_manifests_in_pool++; | 476 pending_manifests_in_pool++; |
| 464 } | 477 } |
| 465 pool_.DeleteAll(); | 478 pool_.DeleteAll(); |
| 466 NotifyDone(manifest_urls_to_fetch_.size() + pending_manifests_in_pool, | 479 int pending_top_hosts = |
| 467 resource_urls_to_fetch_.size() + pending_resources_in_pool); | 480 top_hosts_to_fetch_ ? top_hosts_to_fetch_->size() : 0; |
| 481 NotifyDone(pending_top_hosts + pending_manifests_in_pool, | |
| 482 resources_to_fetch_.size() + pending_resources_in_pool); | |
| 468 return; | 483 return; |
| 469 } | 484 } |
| 470 | 485 |
| 471 StartNextResourceFetch(); | 486 StartNextResourceFetch(); |
| 472 StartNextManifestFetch(); | 487 StartNextManifestFetch(); |
| 473 if (pool_.IsEmpty()) { | 488 if ((!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty()) && |
| 489 resources_to_fetch_.empty() && pool_.IsEmpty()) { | |
| 474 // There are no more URLs to fetch, so end the precache cycle. | 490 // There are no more URLs to fetch, so end the precache cycle. |
| 475 NotifyDone(0, 0); | 491 NotifyDone(0, 0); |
| 476 // OnDone may have deleted this PrecacheFetcher, so don't do anything after | 492 // OnDone may have deleted this PrecacheFetcher, so don't do anything after |
| 477 // it is called. | 493 // it is called. |
| 478 } | 494 } |
| 479 } | 495 } |
| 480 | 496 |
| 481 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { | 497 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
| 482 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 498 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
| 483 if (source.network_url_fetcher() == nullptr) { | 499 if (source.network_url_fetcher() == nullptr) { |
| 484 pool_.DeleteAll(); // Cancel any other ongoing request. | 500 pool_.DeleteAll(); // Cancel any other ongoing request. |
| 485 } else { | 501 } else { |
| 486 // Attempt to parse the config proto. On failure, continue on with the | 502 // Attempt to parse the config proto. On failure, continue on with the |
| 487 // default configuration. | 503 // default configuration. |
| 488 ParseProtoFromFetchResponse( | 504 ParseProtoFromFetchResponse( |
| 489 *source.network_url_fetcher(), | 505 *source.network_url_fetcher(), |
| 490 unfinished_work_->mutable_config_settings()); | 506 unfinished_work_->mutable_config_settings()); |
| 491 pool_.Delete(source); | 507 pool_.Delete(source); |
| 492 DetermineManifests(); | 508 DetermineManifests(); |
| 493 } | 509 } |
| 494 } | 510 } |
| 495 | 511 |
| 496 void PrecacheFetcher::DetermineManifests() { | 512 void PrecacheFetcher::DetermineManifests() { |
| 497 DCHECK(unfinished_work_->has_config_settings()); | 513 DCHECK(unfinished_work_->has_config_settings()); |
| 498 std::string prefix = manifest_url_prefix_.empty() | |
| 499 ? GetDefaultManifestURLPrefix() | |
| 500 : manifest_url_prefix_; | |
| 501 DCHECK_NE(std::string(), prefix) | |
| 502 << "Could not determine the precache manifest URL prefix."; | |
| 503 | 514 |
| 504 // Keep track of manifest URLs that are being fetched, in order to elide | 515 // Keep track of manifest URLs that are being fetched, in order to elide |
| 505 // duplicates. | 516 // duplicates. |
| 506 base::hash_set<std::string> seen_manifest_urls; | 517 std::set<std::string> seen_top_hosts; |
|
sclittle
2016/08/11 22:52:35
You should move this into the below if-statement s
Raj
2016/08/12 19:04:20
Done.
| |
| 518 std::unique_ptr<std::deque<std::string>> top_hosts_to_fetch( | |
|
sclittle
2016/08/11 22:52:35
You can remove the std::unique_ptr around this if
Raj
2016/08/12 19:04:20
Nice. It works. But top_hosts_to_fetch is a local
| |
| 519 new std::deque<std::string>); | |
| 520 std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info( | |
| 521 new std::deque<ManifestHostInfo>); | |
| 507 | 522 |
| 508 // Attempt to fetch manifests for starting hosts up to the maximum top sites | 523 // Attempt to fetch manifests for starting hosts up to the maximum top sites |
| 509 // count. If a manifest does not exist for a particular starting host, then | 524 // count. If a manifest does not exist for a particular starting host, then |
| 510 // the fetch will fail, and that starting host will be ignored. Starting | 525 // the fetch will fail, and that starting host will be ignored. Starting |
| 511 // hosts are not added if this is a continuation from a previous precache | 526 // hosts are not added if this is a continuation from a previous precache |
| 512 // session. | 527 // session. |
| 513 if (manifest_urls_to_fetch_.empty() && | 528 if (top_hosts_to_fetch->empty() && resources_to_fetch_.empty()) { |
|
sclittle
2016/08/11 22:52:35
|top_hosts_to_fetch| will always be empty at this
Raj
2016/08/12 19:04:20
Yep.
| |
| 514 resource_urls_to_fetch_.empty()) { | 529 int64_t rank = 0; |
| 515 int64_t rank = 0; | 530 for (const auto& host : unfinished_work_->top_host()) { |
| 516 for (const auto& host : unfinished_work_->top_host()) { | 531 ++rank; |
| 517 ++rank; | 532 if (rank > unfinished_work_->config_settings().top_sites_count()) |
| 518 if (rank > unfinished_work_->config_settings().top_sites_count()) | 533 break; |
| 519 break; | 534 if (seen_top_hosts.insert(host.hostname()).second) |
| 520 AppendManifestURLIfValidAndNew(prefix, host.hostname(), | 535 top_hosts_to_fetch->emplace_back(host.hostname()); |
|
sclittle
2016/08/11 22:52:35
nit: I think you can just use push_back here, it'l
Raj
2016/08/12 19:04:20
Done.
| |
| 521 &seen_manifest_urls, | 536 } |
| 522 &manifest_urls_to_fetch_); | |
| 523 } | |
| 524 | 537 |
| 525 for (const std::string& host | 538 for (const std::string& host : |
| 526 : unfinished_work_->config_settings().forced_site()) { | 539 unfinished_work_->config_settings().forced_site()) { |
| 527 AppendManifestURLIfValidAndNew(prefix, host, &seen_manifest_urls, | 540 if (seen_top_hosts.insert(host).second) |
| 528 &manifest_urls_to_fetch_); | 541 top_hosts_to_fetch->emplace_back(host); |
|
sclittle
2016/08/11 22:52:35
nit: I think you can replace emplace_back with pus
Raj
2016/08/12 19:04:21
Done.
| |
| 529 } | |
| 530 } | 542 } |
| 531 unfinished_work_->set_num_manifest_urls(manifest_urls_to_fetch_.size()); | 543 } |
| 532 StartNextFetch(); | 544 // We only fetch one manifest at a time to keep the size of |
| 545 // resources_to_fetch_ as small as possible. | |
| 546 auto retrieve_manifest_callback = | |
| 547 base::Bind(&PrecacheFetcher::RetrieveManifestInfo, AsWeakPtr(), | |
| 548 base::Passed(&top_hosts_to_fetch), top_hosts_info.get()); | |
| 549 db_task_runner_->PostTaskAndReply( | |
| 550 FROM_HERE, retrieve_manifest_callback, | |
| 551 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr(), | |
| 552 base::Passed(&top_hosts_info))); | |
| 533 } | 553 } |
| 534 | 554 |
| 555 void PrecacheFetcher::RetrieveManifestInfo( | |
| 556 std::unique_ptr<std::deque<std::string>> hosts_to_fetch, | |
| 557 std::deque<ManifestHostInfo>* hosts_info) { | |
| 558 for (const auto& host : *hosts_to_fetch) { | |
| 559 auto referrer_host_info = precache_database_->GetReferrerHost(host); | |
| 560 if (referrer_host_info.id != PrecacheReferrerHostEntry::INVALID_ID) { | |
| 561 std::deque<GURL> used_urls, unused_urls; | |
|
sclittle
2016/08/11 22:52:35
nit: Can this just be an std::vector? Typically an
Raj
2016/08/12 19:04:20
Done.
| |
| 562 precache_database_->GetURLListForReferrerHost(referrer_host_info.id, | |
| 563 used_urls, unused_urls); | |
| 564 hosts_info->emplace_back(ManifestHostInfo( | |
| 565 referrer_host_info.id, host, GetResourceURLBase64Hash(used_urls), | |
| 566 GetResourceURLBase64Hash(unused_urls))); | |
| 567 } else { | |
| 568 hosts_info->emplace_back( | |
| 569 ManifestHostInfo(PrecacheReferrerHostEntry::INVALID_ID, host, | |
| 570 std::string(), std::string())); | |
| 571 } | |
| 572 } | |
| 573 } | |
| 574 | |
| 575 void PrecacheFetcher::OnManifestInfoRetrieved( | |
| 576 std::unique_ptr<std::deque<ManifestHostInfo>> manifests_info) { | |
| 577 DCHECK(manifests_info); | |
| 578 const std::string prefix = manifest_url_prefix_.empty() | |
| 579 ? GetDefaultManifestURLPrefix() | |
| 580 : manifest_url_prefix_; | |
| 581 top_hosts_to_fetch_.reset(new std::deque<ManifestHostInfo>()); | |
| 582 for (auto& manifest : *manifests_info) { | |
| 583 GURL manifest_url( | |
| 584 prefix + | |
| 585 net::EscapeQueryParamValue( | |
| 586 net::EscapeQueryParamValue(manifest.hostname, false), false)); | |
|
sclittle
2016/08/11 22:52:35
Can't you just add all the query params when you c
Raj
2016/08/12 19:04:20
I do not see any GURL constructor that takes query
sclittle
2016/08/15 20:13:09
Ok, this is probably fine then. I don't see a nice
| |
| 587 if (manifest_url.is_valid() && | |
| 588 manifest.manifest_id != PrecacheReferrerHostEntry::INVALID_ID) { | |
| 589 manifest_url = net::AppendOrReplaceQueryParameter( | |
| 590 manifest_url, "manifest", std::to_string(manifest.manifest_id)); | |
| 591 manifest_url = net::AppendOrReplaceQueryParameter( | |
| 592 manifest_url, "used_resources", manifest.used_url_hash); | |
| 593 manifest_url = net::AppendOrReplaceQueryParameter( | |
| 594 manifest_url, "unused_resources", manifest.unused_url_hash); | |
| 595 DCHECK(manifest_url.is_valid()); | |
| 596 } | |
| 597 manifest.manifest_url = manifest_url; | |
| 598 if (manifest_url.is_valid()) | |
| 599 top_hosts_to_fetch_->emplace_back(manifest); | |
| 600 } | |
| 601 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_->size()); | |
| 602 StartNextFetch(); | |
| 603 } | |
| 604 | |
| 605 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, | |
| 606 const std::string& hostname, | |
| 607 const std::string& used_url_hash, | |
| 608 const std::string& unused_url_hash) | |
| 609 : manifest_id(manifest_id), | |
| 610 hostname(hostname), | |
| 611 used_url_hash(used_url_hash), | |
| 612 unused_url_hash(unused_url_hash) {} | |
| 613 | |
| 614 ManifestHostInfo::~ManifestHostInfo() {} | |
| 615 | |
| 616 ManifestHostInfo::ManifestHostInfo(const ManifestHostInfo& other) = default; | |
| 617 | |
| 535 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { | 618 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
| 536 DCHECK(unfinished_work_->has_config_settings()); | 619 DCHECK(unfinished_work_->has_config_settings()); |
| 537 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 620 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
| 538 if (source.network_url_fetcher() == nullptr) { | 621 if (source.network_url_fetcher() == nullptr) { |
| 539 pool_.DeleteAll(); // Cancel any other ongoing request. | 622 pool_.DeleteAll(); // Cancel any other ongoing request. |
| 540 } else { | 623 } else { |
| 541 PrecacheManifest manifest; | 624 PrecacheManifest manifest; |
| 542 | 625 |
| 543 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { | 626 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { |
| 544 const int32_t len = | 627 const int32_t len = |
| 545 std::min(manifest.resource_size(), | 628 std::min(manifest.resource_size(), |
| 546 unfinished_work_->config_settings().top_resources_count()); | 629 unfinished_work_->config_settings().top_resources_count()); |
| 547 const uint64_t resource_bitset = | 630 const uint64_t resource_bitset = |
| 548 GetResourceBitset(manifest, experiment_id_); | 631 GetResourceBitset(manifest, experiment_id_); |
| 549 for (int i = 0; i < len; ++i) { | 632 for (int i = 0; i < len; ++i) { |
| 550 if (((0x1ULL << i) & resource_bitset) && | 633 if (((0x1ULL << i) & resource_bitset) && |
| 551 manifest.resource(i).has_url()) { | 634 manifest.resource(i).has_url()) { |
| 552 GURL url(manifest.resource(i).url()); | 635 GURL url(manifest.resource(i).url()); |
| 553 if (url.is_valid()) | 636 if (url.is_valid()) { |
| 554 resource_urls_to_fetch_.push_back(url); | 637 resources_to_fetch_.emplace_back( |
| 638 std::make_pair(url, source.referrer())); | |
| 639 } | |
| 555 } | 640 } |
| 556 } | 641 } |
| 642 db_task_runner_->PostTask( | |
| 643 FROM_HERE, | |
| 644 base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost, | |
| 645 precache_database_, source.referrer(), | |
| 646 manifest.id().timestamp().seconds(), base::Time::Now())); | |
| 557 } | 647 } |
| 558 } | 648 } |
| 559 | 649 |
| 560 pool_.Delete(source); | 650 pool_.Delete(source); |
| 561 StartNextFetch(); | 651 StartNextFetch(); |
| 562 } | 652 } |
| 563 | 653 |
| 564 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { | 654 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
| 565 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 655 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
| 656 | |
| 657 db_task_runner_->PostTask( | |
| 658 FROM_HERE, | |
| 659 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_, | |
| 660 source.url(), source.referrer(), base::Time::Now(), | |
| 661 source.was_cached(), source.response_bytes())); | |
| 662 | |
| 566 pool_.Delete(source); | 663 pool_.Delete(source); |
| 664 | |
| 567 // The resource has already been put in the cache during the fetch process, so | 665 // The resource has already been put in the cache during the fetch process, so |
| 568 // nothing more needs to be done for the resource. | 666 // nothing more needs to be done for the resource. |
| 569 StartNextFetch(); | 667 StartNextFetch(); |
| 570 } | 668 } |
| 571 | 669 |
| 572 void PrecacheFetcher::UpdateStats(int64_t response_bytes, | 670 void PrecacheFetcher::UpdateStats(int64_t response_bytes, |
| 573 int64_t network_response_bytes) { | 671 int64_t network_response_bytes) { |
| 574 unfinished_work_->set_total_bytes( | 672 unfinished_work_->set_total_bytes( |
| 575 unfinished_work_->total_bytes() + response_bytes); | 673 unfinished_work_->total_bytes() + response_bytes); |
| 576 unfinished_work_->set_network_bytes( | 674 unfinished_work_->set_network_bytes( |
| 577 unfinished_work_->network_bytes() + network_response_bytes); | 675 unfinished_work_->network_bytes() + network_response_bytes); |
| 578 } | 676 } |
| 579 | 677 |
| 580 } // namespace precache | 678 } // namespace precache |
| OLD | NEW |