Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: components/precache/core/precache_fetcher.cc

Issue 2229983002: Send the list of used and unused resources for precache (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/precache/core/precache_fetcher.h" 5 #include "components/precache/core/precache_fetcher.h"
6 6
7 #include <algorithm>
sclittle 2016/08/11 22:52:35 You still need <algorithm> for std::min.
Raj 2016/08/12 19:04:20 Done.
8 #include <limits> 7 #include <limits>
9 #include <string>
10 #include <utility> 8 #include <utility>
11 #include <vector>
12 9
10 #include "base/base64.h"
13 #include "base/bind.h" 11 #include "base/bind.h"
14 #include "base/bind_helpers.h" 12 #include "base/bind_helpers.h"
15 #include "base/callback.h" 13 #include "base/callback.h"
16 #include "base/command_line.h" 14 #include "base/command_line.h"
17 #include "base/compiler_specific.h" 15 #include "base/compiler_specific.h"
18 #include "base/containers/hash_tables.h" 16 #include "base/containers/hash_tables.h"
19 #include "base/location.h" 17 #include "base/location.h"
20 #include "base/logging.h" 18 #include "base/logging.h"
21 #include "base/memory/ptr_util.h" 19 #include "base/memory/ptr_util.h"
22 #include "base/memory/ref_counted.h" 20 #include "base/memory/ref_counted.h"
23 #include "base/metrics/histogram_macros.h" 21 #include "base/metrics/histogram_macros.h"
22 #include "base/sha1.h"
23 #include "base/task_runner_util.h"
24 #include "components/precache/core/precache_database.h"
24 #include "components/precache/core/precache_switches.h" 25 #include "components/precache/core/precache_switches.h"
25 #include "components/precache/core/proto/precache.pb.h" 26 #include "components/precache/core/proto/precache.pb.h"
26 #include "components/precache/core/proto/unfinished_work.pb.h" 27 #include "components/precache/core/proto/unfinished_work.pb.h"
27 #include "net/base/completion_callback.h" 28 #include "net/base/completion_callback.h"
28 #include "net/base/escape.h" 29 #include "net/base/escape.h"
29 #include "net/base/io_buffer.h" 30 #include "net/base/io_buffer.h"
30 #include "net/base/load_flags.h" 31 #include "net/base/load_flags.h"
31 #include "net/base/net_errors.h" 32 #include "net/base/net_errors.h"
33 #include "net/base/url_util.h"
32 #include "net/http/http_response_headers.h" 34 #include "net/http/http_response_headers.h"
33 #include "net/url_request/url_fetcher_response_writer.h" 35 #include "net/url_request/url_fetcher_response_writer.h"
34 #include "net/url_request/url_request_context_getter.h" 36 #include "net/url_request/url_request_context_getter.h"
35 #include "net/url_request/url_request_status.h" 37 #include "net/url_request/url_request_status.h"
36 38
37 namespace precache { 39 namespace precache {
38 40
39 // The following flags are for privacy reasons. For example, if a user clears 41 // The following flags are for privacy reasons. For example, if a user clears
40 // their cookies, but a tracking beacon is prefetched and the beacon specifies 42 // their cookies, but a tracking beacon is prefetched and the beacon specifies
41 // its source URL in a URL param, the beacon site would be able to rebuild a 43 // its source URL in a URL param, the beacon site would be able to rebuild a
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
83 85
84 #if defined(PRECACHE_MANIFEST_URL_PREFIX) 86 #if defined(PRECACHE_MANIFEST_URL_PREFIX)
85 return PRECACHE_MANIFEST_URL_PREFIX; 87 return PRECACHE_MANIFEST_URL_PREFIX;
86 #else 88 #else
87 // The precache manifest URL prefix could not be determined, so return an 89 // The precache manifest URL prefix could not be determined, so return an
88 // empty string. 90 // empty string.
89 return std::string(); 91 return std::string();
90 #endif 92 #endif
91 } 93 }
92 94
93 // Construct the URL of the precache manifest for the given name (either host or
94 // URL). The server is expecting a request for a URL consisting of the manifest
95 // URL prefix followed by the doubly escaped name.
96 std::string ConstructManifestURL(const std::string& prefix,
97 const std::string& name) {
98 return prefix + net::EscapeQueryParamValue(
99 net::EscapeQueryParamValue(name, false), false);
100 }
101
102 // Attempts to parse a protobuf message from the response string of a 95 // Attempts to parse a protobuf message from the response string of a
103 // URLFetcher. If parsing is successful, the message parameter will contain the 96 // URLFetcher. If parsing is successful, the message parameter will contain the
104 // parsed protobuf and this function will return true. Otherwise, returns false. 97 // parsed protobuf and this function will return true. Otherwise, returns false.
105 bool ParseProtoFromFetchResponse(const net::URLFetcher& source, 98 bool ParseProtoFromFetchResponse(const net::URLFetcher& source,
106 ::google::protobuf::MessageLite* message) { 99 ::google::protobuf::MessageLite* message) {
107 std::string response_string; 100 std::string response_string;
108 101
109 if (!source.GetStatus().is_success()) { 102 if (!source.GetStatus().is_success()) {
110 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec(); 103 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec();
111 return false; 104 return false;
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
152 int num_bytes, 145 int num_bytes,
153 const net::CompletionCallback& callback) override { 146 const net::CompletionCallback& callback) override {
154 return num_bytes; 147 return num_bytes;
155 } 148 }
156 149
157 int Finish(const net::CompletionCallback& callback) override { 150 int Finish(const net::CompletionCallback& callback) override {
158 return net::OK; 151 return net::OK;
159 } 152 }
160 }; 153 };
161 154
162 void AppendManifestURLIfValidAndNew( 155 // Returns the base64 encoded resource URL hashes. The resource URLs are hashed
163 const std::string& prefix, 156 // individually, and 8 bytes of each hash is appended together, which is then
164 const std::string& name, 157 // encoded to base64.
165 base::hash_set<std::string>* seen_manifest_urls, 158 std::string GetResourceURLBase64Hash(const std::deque<GURL>& urls) {
166 std::list<GURL>* unique_manifest_urls) { 159 std::string hashes;
sclittle 2016/08/11 22:52:35 nit: to avoid a bunch of reallocations, you could
Raj 2016/08/12 19:04:20 Done.
167 const std::string manifest_url = ConstructManifestURL(prefix, name); 160 for (const auto& url : urls) {
168 bool first_seen = seen_manifest_urls->insert(manifest_url).second; 161 std::string url_spec = url.spec();
sclittle 2016/08/11 22:52:35 nit: change to const std::string& to avoid string
Raj 2016/08/12 19:04:20 Done.
169 if (first_seen) { 162 unsigned char sha1_hash[base::kSHA1Length];
170 GURL url(manifest_url); 163 base::SHA1HashBytes(
bengr 2016/08/11 18:49:14 test that this works on an empty string.
171 if (url.is_valid()) 164 reinterpret_cast<const unsigned char*>(url_spec.c_str()),
172 unique_manifest_urls->push_back(url); 165 url_spec.size(), sha1_hash);
166 // Each resource hash uses 8 bytes.
167 hashes.append(reinterpret_cast<const char*>(sha1_hash), 8);
sclittle 2016/08/11 22:52:35 Replace "8" with "arraysize(sha1_hash)".
Raj 2016/08/12 19:04:21 hmm. sha1 is actually 20 bytes. But we are using o
sclittle 2016/08/15 20:13:09 Oh, OK, that makes sense. Could you explain that h
173 } 168 }
169 base::Base64Encode(hashes, &hashes);
170 return hashes;
174 } 171 }
175 172
176 } // namespace 173 } // namespace
177 174
178 PrecacheFetcher::Fetcher::Fetcher( 175 PrecacheFetcher::Fetcher::Fetcher(
179 net::URLRequestContextGetter* request_context, 176 net::URLRequestContextGetter* request_context,
180 const GURL& url, 177 const GURL& url,
178 const std::string& referrer,
181 const base::Callback<void(const Fetcher&)>& callback, 179 const base::Callback<void(const Fetcher&)>& callback,
182 bool is_resource_request, 180 bool is_resource_request,
183 size_t max_bytes) 181 size_t max_bytes)
184 : request_context_(request_context), 182 : request_context_(request_context),
185 url_(url), 183 url_(url),
184 referrer_(referrer),
186 callback_(callback), 185 callback_(callback),
187 is_resource_request_(is_resource_request), 186 is_resource_request_(is_resource_request),
188 max_bytes_(max_bytes), 187 max_bytes_(max_bytes),
189 response_bytes_(0), 188 response_bytes_(0),
190 network_response_bytes_(0) { 189 network_response_bytes_(0),
190 was_cached_(false) {
191 DCHECK(url.is_valid());
191 if (is_resource_request_) 192 if (is_resource_request_)
192 LoadFromCache(); 193 LoadFromCache();
193 else 194 else
194 LoadFromNetwork(); 195 LoadFromNetwork();
195 } 196 }
196 197
197 PrecacheFetcher::Fetcher::~Fetcher() {} 198 PrecacheFetcher::Fetcher::~Fetcher() {}
198 199
199 void PrecacheFetcher::Fetcher::LoadFromCache() { 200 void PrecacheFetcher::Fetcher::LoadFromCache() {
200 fetch_stage_ = FetchStage::CACHE; 201 fetch_stage_ = FetchStage::CACHE;
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 const net::URLFetcher* source, 234 const net::URLFetcher* source,
234 int64_t current, 235 int64_t current,
235 int64_t total) { 236 int64_t total) {
236 // If going over the per-resource download cap. 237 // If going over the per-resource download cap.
237 if (fetch_stage_ == FetchStage::NETWORK && 238 if (fetch_stage_ == FetchStage::NETWORK &&
238 // |current| is guaranteed to be non-negative, so this cast is safe. 239 // |current| is guaranteed to be non-negative, so this cast is safe.
239 static_cast<size_t>(std::max(current, total)) > max_bytes_) { 240 static_cast<size_t>(std::max(current, total)) > max_bytes_) {
240 VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total 241 VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total
241 << ") is over " << max_bytes_; 242 << ") is over " << max_bytes_;
242 243
243 // Cancel the download.
244 network_url_fetcher_.reset();
245
246 // Call the completion callback, to attempt the next download, or to trigger 244 // Call the completion callback, to attempt the next download, or to trigger
247 // cleanup in precache_delegate_->OnDone(). 245 // cleanup in precache_delegate_->OnDone().
248 response_bytes_ = network_response_bytes_ = current; 246 response_bytes_ = network_response_bytes_ = current;
247 was_cached_ = source->WasCached();
249 248
249 // Cancel the download.
250 network_url_fetcher_.reset();
250 callback_.Run(*this); 251 callback_.Run(*this);
251 } 252 }
252 } 253 }
253 254
254 void PrecacheFetcher::Fetcher::OnURLFetchComplete( 255 void PrecacheFetcher::Fetcher::OnURLFetchComplete(
255 const net::URLFetcher* source) { 256 const net::URLFetcher* source) {
256 CHECK(source); 257 CHECK(source);
257 if (fetch_stage_ == FetchStage::CACHE && 258 if (fetch_stage_ == FetchStage::CACHE &&
258 (source->GetStatus().error() == net::ERR_CACHE_MISS || 259 (source->GetStatus().error() == net::ERR_CACHE_MISS ||
259 (source->GetResponseHeaders() && 260 (source->GetResponseHeaders() &&
260 source->GetResponseHeaders()->HasValidators()))) { 261 source->GetResponseHeaders()->HasValidators()))) {
261 // If the resource was not found in the cache, request it from the 262 // If the resource was not found in the cache, request it from the
262 // network. 263 // network.
263 // 264 //
264 // If the resource was found in the cache, but contains validators, 265 // If the resource was found in the cache, but contains validators,
265 // request a refresh. The presence of validators increases the chance that 266 // request a refresh. The presence of validators increases the chance that
266 // we get a 304 response rather than a full one, thus allowing us to 267 // we get a 304 response rather than a full one, thus allowing us to
267 // refresh the cache with minimal network load. 268 // refresh the cache with minimal network load.
268 LoadFromNetwork(); 269 LoadFromNetwork();
269 return; 270 return;
270 } 271 }
271 272
272 // If any of: 273 // If any of:
273 // - The request was for a config or manifest. 274 // - The request was for a config or manifest.
274 // - The resource was a cache hit without validators. 275 // - The resource was a cache hit without validators.
275 // - The response came from the network. 276 // - The response came from the network.
276 // Then Fetcher is done with this URL and can return control to the caller. 277 // Then Fetcher is done with this URL and can return control to the caller.
277 response_bytes_ = source->GetReceivedResponseContentLength(); 278 response_bytes_ = source->GetReceivedResponseContentLength();
278 network_response_bytes_ = source->GetTotalReceivedBytes(); 279 network_response_bytes_ = source->GetTotalReceivedBytes();
280 was_cached_ = source->WasCached();
279 callback_.Run(*this); 281 callback_.Run(*this);
280 } 282 }
281 283
282 // static 284 // static
283 void PrecacheFetcher::RecordCompletionStatistics( 285 void PrecacheFetcher::RecordCompletionStatistics(
284 const PrecacheUnfinishedWork& unfinished_work, 286 const PrecacheUnfinishedWork& unfinished_work,
285 size_t remaining_manifest_urls_to_fetch, 287 size_t remaining_manifest_urls_to_fetch,
286 size_t remaining_resource_urls_to_fetch) { 288 size_t remaining_resource_urls_to_fetch) {
287 // These may be unset in tests. 289 // These may be unset in tests.
288 if (!unfinished_work.has_start_time()) 290 if (!unfinished_work.has_start_time())
(...skipping 30 matching lines...) Expand all
319 1, kMaxResponseBytes, 321 1, kMaxResponseBytes,
320 100); 322 100);
321 } 323 }
322 324
323 PrecacheFetcher::PrecacheFetcher( 325 PrecacheFetcher::PrecacheFetcher(
324 net::URLRequestContextGetter* request_context, 326 net::URLRequestContextGetter* request_context,
325 const GURL& config_url, 327 const GURL& config_url,
326 const std::string& manifest_url_prefix, 328 const std::string& manifest_url_prefix,
327 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, 329 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
328 uint32_t experiment_id, 330 uint32_t experiment_id,
331 base::WeakPtr<PrecacheDatabase> precache_database,
sclittle 2016/08/11 22:52:35 nit: pass by const ref to avoid extra Add/Remove r
Raj 2016/08/12 19:04:21 Done.
332 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner,
329 PrecacheFetcher::PrecacheDelegate* precache_delegate) 333 PrecacheFetcher::PrecacheDelegate* precache_delegate)
330 : request_context_(request_context), 334 : request_context_(request_context),
331 config_url_(config_url), 335 config_url_(config_url),
332 manifest_url_prefix_(manifest_url_prefix), 336 manifest_url_prefix_(manifest_url_prefix),
337 precache_database_(precache_database),
338 db_task_runner_(std::move(db_task_runner)),
333 precache_delegate_(precache_delegate), 339 precache_delegate_(precache_delegate),
334 pool_(kMaxParallelFetches), 340 pool_(kMaxParallelFetches),
335 experiment_id_(experiment_id) { 341 experiment_id_(experiment_id) {
336 DCHECK(request_context_.get()); // Request context must be non-NULL. 342 DCHECK(request_context_.get()); // Request context must be non-NULL.
337 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. 343 DCHECK(precache_delegate_); // Precache delegate must be non-NULL.
338 344
339 DCHECK_NE(GURL(), GetDefaultConfigURL()) 345 DCHECK_NE(GURL(), GetDefaultConfigURL())
340 << "Could not determine the precache config settings URL."; 346 << "Could not determine the precache config settings URL.";
341 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) 347 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
342 << "Could not determine the default precache manifest URL prefix."; 348 << "Could not determine the default precache manifest URL prefix.";
343 DCHECK(unfinished_work); 349 DCHECK(unfinished_work);
344 350
345 // Copy manifests and resources to member variables as a convenience. 351 // Copy resources to member variable as a convenience.
sclittle 2016/08/11 22:52:35 You're already changing the member variables so mu
Raj 2016/08/12 19:04:20 hmm. That is a good suggestion. Added a TODO for n
346 // TODO(bengr): Consider accessing these directly from the proto.
Raj 2016/08/09 22:56:48 Since the proto datastructure does not support rem
347 for (const auto& manifest : unfinished_work->manifest()) {
348 if (manifest.has_url())
349 manifest_urls_to_fetch_.push_back(GURL(manifest.url()));
350 }
351 for (const auto& resource : unfinished_work->resource()) { 352 for (const auto& resource : unfinished_work->resource()) {
352 if (resource.has_url()) 353 if (resource.has_url() && resource.has_tophostname()) {
353 resource_urls_to_fetch_.push_back(GURL(resource.url())); 354 resources_to_fetch_.emplace_back(
355 std::make_pair(GURL(resource.url()), resource.tophostname()));
356 }
354 } 357 }
355 unfinished_work_ = std::move(unfinished_work); 358 unfinished_work_ = std::move(unfinished_work);
356 } 359 }
357 360
358 PrecacheFetcher::~PrecacheFetcher() { 361 PrecacheFetcher::~PrecacheFetcher() {
359 } 362 }
360 363
361 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { 364 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
362 // This could get called multiple times, and it should be handled gracefully. 365 // This could get called multiple times, and it should be handled gracefully.
363 if (!unfinished_work_) 366 if (!unfinished_work_)
364 return nullptr; 367 return nullptr;
365 368
366 unfinished_work_->clear_manifest();
367 unfinished_work_->clear_resource(); 369 unfinished_work_->clear_resource();
368 for (const auto& manifest : manifest_urls_to_fetch_) 370 if (top_hosts_to_fetch_) {
369 unfinished_work_->add_manifest()->set_url(manifest.spec()); 371 unfinished_work_->clear_top_host();
370 for (const auto& resource : resource_urls_to_fetch_) 372 for (const auto& top_host : *top_hosts_to_fetch_) {
371 unfinished_work_->add_resource()->set_url(resource.spec()); 373 unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
374 }
375 }
376 for (const auto& resource : resources_to_fetch_) {
377 auto new_resource = unfinished_work_->add_resource();
378 new_resource->set_url(resource.first.spec());
379 new_resource->set_tophostname(resource.second);
380 }
372 for (const auto& it : pool_.elements()) { 381 for (const auto& it : pool_.elements()) {
373 const Fetcher* fetcher = it.first; 382 const Fetcher* fetcher = it.first;
374 if (fetcher->is_resource_request()) 383 GURL config_url =
375 unfinished_work_->add_resource()->set_url(fetcher->url().spec()); 384 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
376 else if (fetcher->url() != config_url_) 385 if (fetcher->is_resource_request()) {
377 unfinished_work_->add_manifest()->set_url(fetcher->url().spec()); 386 auto resource = unfinished_work_->add_resource();
387 resource->set_url(fetcher->url().spec());
388 resource->set_tophostname(fetcher->referrer());
389 } else if (fetcher->url() != config_url) {
390 unfinished_work_->add_top_host()->set_hostname(fetcher->referrer());
391 }
378 } 392 }
379 manifest_urls_to_fetch_.clear(); 393 top_hosts_to_fetch_.reset();
380 resource_urls_to_fetch_.clear(); 394 resources_to_fetch_.clear();
381 pool_.DeleteAll(); 395 pool_.DeleteAll();
382 return std::move(unfinished_work_); 396 return std::move(unfinished_work_);
383 } 397 }
384 398
385 void PrecacheFetcher::Start() { 399 void PrecacheFetcher::Start() {
386 if (unfinished_work_->has_config_settings()) { 400 if (unfinished_work_->has_config_settings()) {
387 DCHECK(unfinished_work_->has_start_time()); 401 DCHECK(unfinished_work_->has_start_time());
388 DetermineManifests(); 402 DetermineManifests();
389 return; 403 return;
390 } 404 }
391 405
392 GURL config_url = 406 GURL config_url =
393 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; 407 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
394 408
395 DCHECK(config_url.is_valid()) << "Config URL not valid: " 409 DCHECK(config_url.is_valid()) << "Config URL not valid: "
396 << config_url.possibly_invalid_spec(); 410 << config_url.possibly_invalid_spec();
397 411
398 // Fetch the precache configuration settings from the server. 412 // Fetch the precache configuration settings from the server.
399 DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; 413 DCHECK(pool_.IsEmpty()) << "All parallel requests should be available";
400 VLOG(3) << "Fetching " << config_url; 414 VLOG(3) << "Fetching " << config_url;
401 pool_.Add(base::WrapUnique(new Fetcher( 415 pool_.Add(base::WrapUnique(new Fetcher(
402 request_context_.get(), config_url, 416 request_context_.get(), config_url, std::string(),
403 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, 417 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()),
404 base::Unretained(this)),
405 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); 418 false /* is_resource_request */, std::numeric_limits<int32_t>::max())));
406 } 419 }
407 420
408 void PrecacheFetcher::StartNextResourceFetch() { 421 void PrecacheFetcher::StartNextResourceFetch() {
409 DCHECK(unfinished_work_->has_config_settings()); 422 DCHECK(unfinished_work_->has_config_settings());
410 while (!resource_urls_to_fetch_.empty() && pool_.IsAvailable()) { 423 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) {
424 const auto& resource = resources_to_fetch_.front();
411 const size_t max_bytes = 425 const size_t max_bytes =
412 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), 426 std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
413 unfinished_work_->config_settings().max_bytes_total() - 427 unfinished_work_->config_settings().max_bytes_total() -
414 unfinished_work_->total_bytes()); 428 unfinished_work_->total_bytes());
415 VLOG(3) << "Fetching " << resource_urls_to_fetch_.front(); 429 VLOG(3) << "Fetching " << resource.first << " " << resource.second;
416 pool_.Add(base::WrapUnique( 430 pool_.Add(base::WrapUnique(new Fetcher(
417 new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(), 431 request_context_.get(), resource.first, resource.second,
418 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, 432 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()),
419 base::Unretained(this)), 433 true /* is_resource_request */, max_bytes)));
420 true /* is_resource_request */, max_bytes)));
421 434
422 resource_urls_to_fetch_.pop_front(); 435 resources_to_fetch_.pop_front();
423 } 436 }
424 } 437 }
425 438
426 void PrecacheFetcher::StartNextManifestFetch() { 439 void PrecacheFetcher::StartNextManifestFetch() {
427 if (manifest_urls_to_fetch_.empty() || !pool_.IsAvailable()) 440 if (!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty() ||
441 !pool_.IsAvailable())
428 return; 442 return;
429 443
430 // We only fetch one manifest at a time to keep the size of 444 // We only fetch one manifest at a time to keep the size of
431 // resource_urls_to_fetch_ as small as possible. 445 // resources_to_fetch_ as small as possible.
432 VLOG(3) << "Fetching " << manifest_urls_to_fetch_.front(); 446 VLOG(3) << "Fetching " << top_hosts_to_fetch_->front().manifest_url;
433 pool_.Add(base::WrapUnique(new Fetcher( 447 pool_.Add(base::WrapUnique(new Fetcher(
434 request_context_.get(), manifest_urls_to_fetch_.front(), 448 request_context_.get(), top_hosts_to_fetch_->front().manifest_url,
435 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, 449 top_hosts_to_fetch_->front().hostname,
436 base::Unretained(this)), 450 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()),
437 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); 451 false /* is_resource_request */, std::numeric_limits<int32_t>::max())));
438 452 top_hosts_to_fetch_->pop_front();
439 manifest_urls_to_fetch_.pop_front();
440 } 453 }
441 454
442 void PrecacheFetcher::NotifyDone( 455 void PrecacheFetcher::NotifyDone(
443 size_t remaining_manifest_urls_to_fetch, 456 size_t remaining_manifest_urls_to_fetch,
444 size_t remaining_resource_urls_to_fetch) { 457 size_t remaining_resource_urls_to_fetch) {
445 RecordCompletionStatistics(*unfinished_work_, 458 RecordCompletionStatistics(*unfinished_work_,
446 remaining_manifest_urls_to_fetch, 459 remaining_manifest_urls_to_fetch,
447 remaining_resource_urls_to_fetch); 460 remaining_resource_urls_to_fetch);
448 precache_delegate_->OnDone(); 461 precache_delegate_->OnDone();
449 } 462 }
450 463
451 void PrecacheFetcher::StartNextFetch() { 464 void PrecacheFetcher::StartNextFetch() {
452 DCHECK(unfinished_work_->has_config_settings()); 465 DCHECK(unfinished_work_->has_config_settings());
453 // If over the precache total size cap, then stop prefetching. 466 // If over the precache total size cap, then stop prefetching.
454 if (unfinished_work_->total_bytes() > 467 if (unfinished_work_->total_bytes() >
455 unfinished_work_->config_settings().max_bytes_total()) { 468 unfinished_work_->config_settings().max_bytes_total()) {
456 size_t pending_manifests_in_pool = 0; 469 size_t pending_manifests_in_pool = 0;
457 size_t pending_resources_in_pool = 0; 470 size_t pending_resources_in_pool = 0;
458 for (const auto& element_pair : pool_.elements()) { 471 for (const auto& element_pair : pool_.elements()) {
459 const Fetcher* fetcher = element_pair.first; 472 const Fetcher* fetcher = element_pair.first;
460 if (fetcher->is_resource_request()) 473 if (fetcher->is_resource_request())
461 pending_resources_in_pool++; 474 pending_resources_in_pool++;
462 else if (fetcher->url() != config_url_) 475 else if (fetcher->url() != config_url_)
463 pending_manifests_in_pool++; 476 pending_manifests_in_pool++;
464 } 477 }
465 pool_.DeleteAll(); 478 pool_.DeleteAll();
466 NotifyDone(manifest_urls_to_fetch_.size() + pending_manifests_in_pool, 479 int pending_top_hosts =
467 resource_urls_to_fetch_.size() + pending_resources_in_pool); 480 top_hosts_to_fetch_ ? top_hosts_to_fetch_->size() : 0;
481 NotifyDone(pending_top_hosts + pending_manifests_in_pool,
482 resources_to_fetch_.size() + pending_resources_in_pool);
468 return; 483 return;
469 } 484 }
470 485
471 StartNextResourceFetch(); 486 StartNextResourceFetch();
472 StartNextManifestFetch(); 487 StartNextManifestFetch();
473 if (pool_.IsEmpty()) { 488 if ((!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty()) &&
489 resources_to_fetch_.empty() && pool_.IsEmpty()) {
474 // There are no more URLs to fetch, so end the precache cycle. 490 // There are no more URLs to fetch, so end the precache cycle.
475 NotifyDone(0, 0); 491 NotifyDone(0, 0);
476 // OnDone may have deleted this PrecacheFetcher, so don't do anything after 492 // OnDone may have deleted this PrecacheFetcher, so don't do anything after
477 // it is called. 493 // it is called.
478 } 494 }
479 } 495 }
480 496
481 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { 497 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
482 UpdateStats(source.response_bytes(), source.network_response_bytes()); 498 UpdateStats(source.response_bytes(), source.network_response_bytes());
483 if (source.network_url_fetcher() == nullptr) { 499 if (source.network_url_fetcher() == nullptr) {
484 pool_.DeleteAll(); // Cancel any other ongoing request. 500 pool_.DeleteAll(); // Cancel any other ongoing request.
485 } else { 501 } else {
486 // Attempt to parse the config proto. On failure, continue on with the 502 // Attempt to parse the config proto. On failure, continue on with the
487 // default configuration. 503 // default configuration.
488 ParseProtoFromFetchResponse( 504 ParseProtoFromFetchResponse(
489 *source.network_url_fetcher(), 505 *source.network_url_fetcher(),
490 unfinished_work_->mutable_config_settings()); 506 unfinished_work_->mutable_config_settings());
491 pool_.Delete(source); 507 pool_.Delete(source);
492 DetermineManifests(); 508 DetermineManifests();
493 } 509 }
494 } 510 }
495 511
496 void PrecacheFetcher::DetermineManifests() { 512 void PrecacheFetcher::DetermineManifests() {
497 DCHECK(unfinished_work_->has_config_settings()); 513 DCHECK(unfinished_work_->has_config_settings());
498 std::string prefix = manifest_url_prefix_.empty()
499 ? GetDefaultManifestURLPrefix()
500 : manifest_url_prefix_;
501 DCHECK_NE(std::string(), prefix)
502 << "Could not determine the precache manifest URL prefix.";
503 514
504 // Keep track of manifest URLs that are being fetched, in order to elide 515 // Keep track of manifest URLs that are being fetched, in order to elide
505 // duplicates. 516 // duplicates.
506 base::hash_set<std::string> seen_manifest_urls; 517 std::set<std::string> seen_top_hosts;
sclittle 2016/08/11 22:52:35 You should move this into the below if-statement s
Raj 2016/08/12 19:04:20 Done.
518 std::unique_ptr<std::deque<std::string>> top_hosts_to_fetch(
sclittle 2016/08/11 22:52:35 You can remove the std::unique_ptr around this if
Raj 2016/08/12 19:04:20 Nice. It works. But top_hosts_to_fetch is a local
519 new std::deque<std::string>);
520 std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info(
521 new std::deque<ManifestHostInfo>);
507 522
508 // Attempt to fetch manifests for starting hosts up to the maximum top sites 523 // Attempt to fetch manifests for starting hosts up to the maximum top sites
509 // count. If a manifest does not exist for a particular starting host, then 524 // count. If a manifest does not exist for a particular starting host, then
510 // the fetch will fail, and that starting host will be ignored. Starting 525 // the fetch will fail, and that starting host will be ignored. Starting
511 // hosts are not added if this is a continuation from a previous precache 526 // hosts are not added if this is a continuation from a previous precache
512 // session. 527 // session.
513 if (manifest_urls_to_fetch_.empty() && 528 if (top_hosts_to_fetch->empty() && resources_to_fetch_.empty()) {
sclittle 2016/08/11 22:52:35 |top_hosts_to_fetch| will always be empty at this
Raj 2016/08/12 19:04:20 Yep.
514 resource_urls_to_fetch_.empty()) { 529 int64_t rank = 0;
515 int64_t rank = 0; 530 for (const auto& host : unfinished_work_->top_host()) {
516 for (const auto& host : unfinished_work_->top_host()) { 531 ++rank;
517 ++rank; 532 if (rank > unfinished_work_->config_settings().top_sites_count())
518 if (rank > unfinished_work_->config_settings().top_sites_count()) 533 break;
519 break; 534 if (seen_top_hosts.insert(host.hostname()).second)
520 AppendManifestURLIfValidAndNew(prefix, host.hostname(), 535 top_hosts_to_fetch->emplace_back(host.hostname());
sclittle 2016/08/11 22:52:35 nit: I think you can just use push_back here, it'l
Raj 2016/08/12 19:04:20 Done.
521 &seen_manifest_urls, 536 }
522 &manifest_urls_to_fetch_);
523 }
524 537
525 for (const std::string& host 538 for (const std::string& host :
526 : unfinished_work_->config_settings().forced_site()) { 539 unfinished_work_->config_settings().forced_site()) {
527 AppendManifestURLIfValidAndNew(prefix, host, &seen_manifest_urls, 540 if (seen_top_hosts.insert(host).second)
528 &manifest_urls_to_fetch_); 541 top_hosts_to_fetch->emplace_back(host);
sclittle 2016/08/11 22:52:35 nit: I think you can replace emplace_back with pus
Raj 2016/08/12 19:04:21 Done.
529 }
530 } 542 }
531 unfinished_work_->set_num_manifest_urls(manifest_urls_to_fetch_.size()); 543 }
532 StartNextFetch(); 544 // We only fetch one manifest at a time to keep the size of
545 // resources_to_fetch_ as small as possible.
546 auto retrieve_manifest_callback =
547 base::Bind(&PrecacheFetcher::RetrieveManifestInfo, AsWeakPtr(),
548 base::Passed(&top_hosts_to_fetch), top_hosts_info.get());
549 db_task_runner_->PostTaskAndReply(
550 FROM_HERE, retrieve_manifest_callback,
551 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr(),
552 base::Passed(&top_hosts_info)));
533 } 553 }
534 554
555 void PrecacheFetcher::RetrieveManifestInfo(
556 std::unique_ptr<std::deque<std::string>> hosts_to_fetch,
557 std::deque<ManifestHostInfo>* hosts_info) {
558 for (const auto& host : *hosts_to_fetch) {
559 auto referrer_host_info = precache_database_->GetReferrerHost(host);
560 if (referrer_host_info.id != PrecacheReferrerHostEntry::INVALID_ID) {
561 std::deque<GURL> used_urls, unused_urls;
sclittle 2016/08/11 22:52:35 nit: Can this just be an std::vector? Typically an
Raj 2016/08/12 19:04:20 Done.
562 precache_database_->GetURLListForReferrerHost(referrer_host_info.id,
563 used_urls, unused_urls);
564 hosts_info->emplace_back(ManifestHostInfo(
565 referrer_host_info.id, host, GetResourceURLBase64Hash(used_urls),
566 GetResourceURLBase64Hash(unused_urls)));
567 } else {
568 hosts_info->emplace_back(
569 ManifestHostInfo(PrecacheReferrerHostEntry::INVALID_ID, host,
570 std::string(), std::string()));
571 }
572 }
573 }
574
575 void PrecacheFetcher::OnManifestInfoRetrieved(
576 std::unique_ptr<std::deque<ManifestHostInfo>> manifests_info) {
577 DCHECK(manifests_info);
578 const std::string prefix = manifest_url_prefix_.empty()
579 ? GetDefaultManifestURLPrefix()
580 : manifest_url_prefix_;
581 top_hosts_to_fetch_.reset(new std::deque<ManifestHostInfo>());
582 for (auto& manifest : *manifests_info) {
583 GURL manifest_url(
584 prefix +
585 net::EscapeQueryParamValue(
586 net::EscapeQueryParamValue(manifest.hostname, false), false));
sclittle 2016/08/11 22:52:35 Can't you just add all the query params when you c
Raj 2016/08/12 19:04:20 I do not see any GURL constructor that takes query
sclittle 2016/08/15 20:13:09 Ok, this is probably fine then. I don't see a nice
587 if (manifest_url.is_valid() &&
588 manifest.manifest_id != PrecacheReferrerHostEntry::INVALID_ID) {
589 manifest_url = net::AppendOrReplaceQueryParameter(
590 manifest_url, "manifest", std::to_string(manifest.manifest_id));
591 manifest_url = net::AppendOrReplaceQueryParameter(
592 manifest_url, "used_resources", manifest.used_url_hash);
593 manifest_url = net::AppendOrReplaceQueryParameter(
594 manifest_url, "unused_resources", manifest.unused_url_hash);
595 DCHECK(manifest_url.is_valid());
596 }
597 manifest.manifest_url = manifest_url;
598 if (manifest_url.is_valid())
599 top_hosts_to_fetch_->emplace_back(manifest);
600 }
601 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_->size());
602 StartNextFetch();
603 }
604
605 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id,
606 const std::string& hostname,
607 const std::string& used_url_hash,
608 const std::string& unused_url_hash)
609 : manifest_id(manifest_id),
610 hostname(hostname),
611 used_url_hash(used_url_hash),
612 unused_url_hash(unused_url_hash) {}
613
614 ManifestHostInfo::~ManifestHostInfo() {}
615
616 ManifestHostInfo::ManifestHostInfo(const ManifestHostInfo& other) = default;
617
535 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { 618 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) {
536 DCHECK(unfinished_work_->has_config_settings()); 619 DCHECK(unfinished_work_->has_config_settings());
537 UpdateStats(source.response_bytes(), source.network_response_bytes()); 620 UpdateStats(source.response_bytes(), source.network_response_bytes());
538 if (source.network_url_fetcher() == nullptr) { 621 if (source.network_url_fetcher() == nullptr) {
539 pool_.DeleteAll(); // Cancel any other ongoing request. 622 pool_.DeleteAll(); // Cancel any other ongoing request.
540 } else { 623 } else {
541 PrecacheManifest manifest; 624 PrecacheManifest manifest;
542 625
543 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { 626 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) {
544 const int32_t len = 627 const int32_t len =
545 std::min(manifest.resource_size(), 628 std::min(manifest.resource_size(),
546 unfinished_work_->config_settings().top_resources_count()); 629 unfinished_work_->config_settings().top_resources_count());
547 const uint64_t resource_bitset = 630 const uint64_t resource_bitset =
548 GetResourceBitset(manifest, experiment_id_); 631 GetResourceBitset(manifest, experiment_id_);
549 for (int i = 0; i < len; ++i) { 632 for (int i = 0; i < len; ++i) {
550 if (((0x1ULL << i) & resource_bitset) && 633 if (((0x1ULL << i) & resource_bitset) &&
551 manifest.resource(i).has_url()) { 634 manifest.resource(i).has_url()) {
552 GURL url(manifest.resource(i).url()); 635 GURL url(manifest.resource(i).url());
553 if (url.is_valid()) 636 if (url.is_valid()) {
554 resource_urls_to_fetch_.push_back(url); 637 resources_to_fetch_.emplace_back(
638 std::make_pair(url, source.referrer()));
639 }
555 } 640 }
556 } 641 }
642 db_task_runner_->PostTask(
643 FROM_HERE,
644 base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost,
645 precache_database_, source.referrer(),
646 manifest.id().timestamp().seconds(), base::Time::Now()));
557 } 647 }
558 } 648 }
559 649
560 pool_.Delete(source); 650 pool_.Delete(source);
561 StartNextFetch(); 651 StartNextFetch();
562 } 652 }
563 653
564 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { 654 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
565 UpdateStats(source.response_bytes(), source.network_response_bytes()); 655 UpdateStats(source.response_bytes(), source.network_response_bytes());
656
657 db_task_runner_->PostTask(
658 FROM_HERE,
659 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_,
660 source.url(), source.referrer(), base::Time::Now(),
661 source.was_cached(), source.response_bytes()));
662
566 pool_.Delete(source); 663 pool_.Delete(source);
664
567 // The resource has already been put in the cache during the fetch process, so 665 // The resource has already been put in the cache during the fetch process, so
568 // nothing more needs to be done for the resource. 666 // nothing more needs to be done for the resource.
569 StartNextFetch(); 667 StartNextFetch();
570 } 668 }
571 669
572 void PrecacheFetcher::UpdateStats(int64_t response_bytes, 670 void PrecacheFetcher::UpdateStats(int64_t response_bytes,
573 int64_t network_response_bytes) { 671 int64_t network_response_bytes) {
574 unfinished_work_->set_total_bytes( 672 unfinished_work_->set_total_bytes(
575 unfinished_work_->total_bytes() + response_bytes); 673 unfinished_work_->total_bytes() + response_bytes);
576 unfinished_work_->set_network_bytes( 674 unfinished_work_->set_network_bytes(
577 unfinished_work_->network_bytes() + network_response_bytes); 675 unfinished_work_->network_bytes() + network_response_bytes);
578 } 676 }
579 677
580 } // namespace precache 678 } // namespace precache
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698