OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/precache/core/precache_fetcher.h" | 5 #include "components/precache/core/precache_fetcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <limits> | 8 #include <limits> |
9 #include <set> | |
9 #include <utility> | 10 #include <utility> |
10 #include <vector> | 11 #include <vector> |
11 | 12 |
12 #include "base/base64.h" | 13 #include "base/base64.h" |
13 #include "base/bind.h" | 14 #include "base/bind.h" |
14 #include "base/bind_helpers.h" | 15 #include "base/bind_helpers.h" |
15 #include "base/callback.h" | 16 #include "base/callback.h" |
16 #include "base/command_line.h" | 17 #include "base/command_line.h" |
17 #include "base/compiler_specific.h" | 18 #include "base/compiler_specific.h" |
18 #include "base/containers/hash_tables.h" | 19 #include "base/containers/hash_tables.h" |
(...skipping 27 matching lines...) Expand all Loading... | |
46 // The following flags are for privacy reasons. For example, if a user clears | 47 // The following flags are for privacy reasons. For example, if a user clears |
47 // their cookies, but a tracking beacon is prefetched and the beacon specifies | 48 // their cookies, but a tracking beacon is prefetched and the beacon specifies |
48 // its source URL in a URL param, the beacon site would be able to rebuild a | 49 // its source URL in a URL param, the beacon site would be able to rebuild a |
49 // profile of the user. All three flags should occur together, or not at all, | 50 // profile of the user. All three flags should occur together, or not at all, |
50 // per | 51 // per |
51 // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussi on. | 52 // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussi on. |
52 const int kNoTracking = | 53 const int kNoTracking = |
53 net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | | 54 net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | |
54 net::LOAD_DO_NOT_SEND_AUTH_DATA; | 55 net::LOAD_DO_NOT_SEND_AUTH_DATA; |
55 | 56 |
57 // The maximum number of URLFetcher requests that can be on flight in parallel. | |
58 // Note that OnManifestFetchComplete and OnResourceFetchComplete perform | |
59 // remove_if operations which are O(kMaxParallelFetches). Those should be | |
60 // optimized before increasing this value significantly. | |
61 const int kMaxParallelFetches = 10; | |
62 | |
56 namespace { | 63 namespace { |
57 | 64 |
58 // The maximum number of URLFetcher requests that can be on flight in parallel. | |
59 const int kMaxParallelFetches = 10; | |
60 | |
61 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to | 65 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to |
62 // a number we expect to be in the 99th percentile for the histogram, give or | 66 // a number we expect to be in the 99th percentile for the histogram, give or |
63 // take. | 67 // take. |
64 const int kMaxResponseBytes = 500 * 1024 * 1024; | 68 const int kMaxResponseBytes = 500 * 1024 * 1024; |
65 | 69 |
66 GURL GetDefaultConfigURL() { | 70 GURL GetDefaultConfigURL() { |
67 const base::CommandLine& command_line = | 71 const base::CommandLine& command_line = |
68 *base::CommandLine::ForCurrentProcess(); | 72 *base::CommandLine::ForCurrentProcess(); |
69 if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) { | 73 if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) { |
70 return GURL( | 74 return GURL( |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
176 hashes.append(reinterpret_cast<const char*>(sha1_hash), kHashBytesSize); | 180 hashes.append(reinterpret_cast<const char*>(sha1_hash), kHashBytesSize); |
177 } | 181 } |
178 base::Base64Encode(hashes, &hashes); | 182 base::Base64Encode(hashes, &hashes); |
179 return hashes; | 183 return hashes; |
180 } | 184 } |
181 | 185 |
182 // Retrieves the manifest info on the DB thread. Manifest info for each of the | 186 // Retrieves the manifest info on the DB thread. Manifest info for each of the |
183 // hosts in |hosts_to_fetch|, is added to |hosts_info|. | 187 // hosts in |hosts_to_fetch|, is added to |hosts_info|. |
184 std::deque<ManifestHostInfo> RetrieveManifestInfo( | 188 std::deque<ManifestHostInfo> RetrieveManifestInfo( |
185 const base::WeakPtr<PrecacheDatabase>& precache_database, | 189 const base::WeakPtr<PrecacheDatabase>& precache_database, |
186 std::vector<std::string> hosts_to_fetch) { | 190 std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) { |
191 VLOG(9) << "RetrieveManifestInfo"; | |
187 std::deque<ManifestHostInfo> hosts_info; | 192 std::deque<ManifestHostInfo> hosts_info; |
188 if (!precache_database) | 193 if (!precache_database) |
189 return hosts_info; | 194 return hosts_info; |
190 | 195 |
191 for (const auto& host : hosts_to_fetch) { | 196 for (const auto& host : hosts_to_fetch) { |
192 auto referrer_host_info = precache_database->GetReferrerHost(host); | 197 auto referrer_host_info = precache_database->GetReferrerHost(host.first); |
193 if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { | 198 if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { |
194 std::vector<GURL> used_urls, unused_urls; | 199 std::vector<GURL> used_urls, unused_urls; |
195 precache_database->GetURLListForReferrerHost(referrer_host_info.id, | 200 precache_database->GetURLListForReferrerHost(referrer_host_info.id, |
196 &used_urls, &unused_urls); | 201 &used_urls, &unused_urls); |
197 hosts_info.push_back( | 202 hosts_info.push_back( |
198 ManifestHostInfo(referrer_host_info.manifest_id, host, | 203 ManifestHostInfo(referrer_host_info.manifest_id, host.first, |
199 GetResourceURLBase64Hash(used_urls), | 204 host.second, GetResourceURLBase64Hash(used_urls), |
200 GetResourceURLBase64Hash(unused_urls))); | 205 GetResourceURLBase64Hash(unused_urls))); |
201 } else { | 206 } else { |
202 hosts_info.push_back( | 207 hosts_info.push_back( |
203 ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host, | 208 ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first, |
204 std::string(), std::string())); | 209 host.second, std::string(), std::string())); |
205 } | 210 } |
206 } | 211 } |
207 return hosts_info; | 212 return hosts_info; |
208 } | 213 } |
209 | 214 |
210 PrecacheQuota RetrieveQuotaInfo( | 215 PrecacheQuota RetrieveQuotaInfo( |
211 const base::WeakPtr<PrecacheDatabase>& precache_database) { | 216 const base::WeakPtr<PrecacheDatabase>& precache_database) { |
217 VLOG(9) << "RetrieveQuotaInfo"; | |
212 PrecacheQuota quota; | 218 PrecacheQuota quota; |
213 if (precache_database) { | 219 if (precache_database) { |
214 quota = precache_database->GetQuota(); | 220 quota = precache_database->GetQuota(); |
215 } | 221 } |
216 return quota; | 222 return quota; |
217 } | 223 } |
218 | 224 |
219 // Returns true if the |quota| time has expired. | 225 // Returns true if the |quota| time has expired. |
220 bool IsQuotaTimeExpired(const PrecacheQuota& quota, | 226 bool IsQuotaTimeExpired(const PrecacheQuota& quota, |
221 const base::Time& time_now) { | 227 const base::Time& time_now) { |
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
356 // These may be unset in tests. | 362 // These may be unset in tests. |
357 if (!unfinished_work.has_start_time()) | 363 if (!unfinished_work.has_start_time()) |
358 return; | 364 return; |
359 base::TimeDelta time_to_fetch = | 365 base::TimeDelta time_to_fetch = |
360 base::Time::Now() - | 366 base::Time::Now() - |
361 base::Time::FromInternalValue(unfinished_work.start_time()); | 367 base::Time::FromInternalValue(unfinished_work.start_time()); |
362 UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch, | 368 UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch, |
363 base::TimeDelta::FromSeconds(1), | 369 base::TimeDelta::FromSeconds(1), |
364 base::TimeDelta::FromHours(4), 50); | 370 base::TimeDelta::FromHours(4), 50); |
365 | 371 |
366 // Number of manifests for which we have downloaded all resources. | 372 int num_total_resources = unfinished_work.num_resource_urls(); |
367 int manifests_completed = | 373 int percent_completed = |
368 unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch; | 374 num_total_resources == 0 |
375 ? 0 | |
376 : (100 * (static_cast<double>(num_total_resources - | |
377 remaining_resource_urls_to_fetch) / | |
378 num_total_resources)); | |
369 | 379 |
370 // If there are resource URLs left to fetch, the last manifest is not yet | 380 VLOG(6) << "Percent completed: " << percent_completed; |
bengr
2016/10/14 21:52:19
Do you need these VLOG statements?
twifkak
2016/10/14 22:41:45
They were very helpful during debugging, but I can
bengr
2016/10/18 18:57:20
Chromium style is to remove logging statements, so
twifkak
2016/10/18 19:52:27
Done.
| |
371 // completed. | |
372 if (remaining_resource_urls_to_fetch > 0) | |
373 --manifests_completed; | |
374 | |
375 DCHECK_GE(manifests_completed, 0); | |
376 int percent_completed = unfinished_work.num_manifest_urls() == 0 | |
377 ? 0 | |
378 : (static_cast<double>(manifests_completed) / | |
379 unfinished_work.num_manifest_urls() * 100); | |
380 | 381 |
381 UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted", | 382 UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted", |
382 percent_completed); | 383 percent_completed); |
383 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", | 384 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", |
384 unfinished_work.total_bytes(), | 385 unfinished_work.total_bytes(), 1, |
385 1, kMaxResponseBytes, 100); | 386 kMaxResponseBytes, 100); |
386 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network", | 387 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network", |
387 unfinished_work.network_bytes(), | 388 unfinished_work.network_bytes(), 1, |
388 1, kMaxResponseBytes, | 389 kMaxResponseBytes, 100); |
389 100); | |
390 } | 390 } |
391 | 391 |
392 // static | 392 // static |
393 std::string PrecacheFetcher::GetResourceURLBase64HashForTesting( | 393 std::string PrecacheFetcher::GetResourceURLBase64HashForTesting( |
394 const std::vector<GURL>& urls) { | 394 const std::vector<GURL>& urls) { |
395 return GetResourceURLBase64Hash(urls); | 395 return GetResourceURLBase64Hash(urls); |
396 } | 396 } |
397 | 397 |
398 PrecacheFetcher::PrecacheFetcher( | 398 PrecacheFetcher::PrecacheFetcher( |
399 net::URLRequestContextGetter* request_context, | 399 net::URLRequestContextGetter* request_context, |
400 const GURL& config_url, | 400 const GURL& config_url, |
401 const std::string& manifest_url_prefix, | 401 const std::string& manifest_url_prefix, |
402 bool global_ranking, | |
402 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, | 403 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, |
403 uint32_t experiment_id, | 404 uint32_t experiment_id, |
404 const base::WeakPtr<PrecacheDatabase>& precache_database, | 405 const base::WeakPtr<PrecacheDatabase>& precache_database, |
405 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner, | 406 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner, |
406 PrecacheFetcher::PrecacheDelegate* precache_delegate) | 407 PrecacheFetcher::PrecacheDelegate* precache_delegate) |
407 : request_context_(request_context), | 408 : request_context_(request_context), |
408 config_url_(config_url), | 409 config_url_(config_url), |
409 manifest_url_prefix_(manifest_url_prefix), | 410 manifest_url_prefix_(manifest_url_prefix), |
411 global_ranking_(global_ranking), | |
410 precache_database_(precache_database), | 412 precache_database_(precache_database), |
411 db_task_runner_(std::move(db_task_runner)), | 413 db_task_runner_(std::move(db_task_runner)), |
412 precache_delegate_(precache_delegate), | 414 precache_delegate_(precache_delegate), |
413 pool_(kMaxParallelFetches), | 415 pool_(kMaxParallelFetches), |
414 experiment_id_(experiment_id) { | 416 experiment_id_(experiment_id) { |
415 DCHECK(request_context_.get()); // Request context must be non-NULL. | 417 DCHECK(request_context_.get()); // Request context must be non-NULL. |
416 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. | 418 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. |
417 | 419 |
418 DCHECK_NE(GURL(), GetDefaultConfigURL()) | 420 DCHECK_NE(GURL(), GetDefaultConfigURL()) |
419 << "Could not determine the precache config settings URL."; | 421 << "Could not determine the precache config settings URL."; |
420 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) | 422 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) |
421 << "Could not determine the default precache manifest URL prefix."; | 423 << "Could not determine the default precache manifest URL prefix."; |
422 DCHECK(unfinished_work); | 424 DCHECK(unfinished_work); |
423 | 425 |
424 // Copy resources to member variable as a convenience. | 426 // Copy resources to member variable as a convenience. |
425 // TODO(rajendrant): Consider accessing these directly from the proto, by | 427 // TODO(rajendrant): Consider accessing these directly from the proto, by |
426 // keeping track of the current resource index. | 428 // keeping track of the current resource index. |
427 for (const auto& resource : unfinished_work->resource()) { | 429 for (const auto& resource : unfinished_work->resource()) { |
428 if (resource.has_url() && resource.has_top_host_name()) { | 430 if (resource.has_url() && resource.has_top_host_name()) { |
431 // Weight doesn't matter, as the resources have already been sorted by | |
432 // this point. | |
429 resources_to_fetch_.emplace_back(GURL(resource.url()), | 433 resources_to_fetch_.emplace_back(GURL(resource.url()), |
430 resource.top_host_name()); | 434 resource.top_host_name(), 0); |
431 } | 435 } |
432 } | 436 } |
433 unfinished_work_ = std::move(unfinished_work); | 437 unfinished_work_ = std::move(unfinished_work); |
434 } | 438 } |
435 | 439 |
436 PrecacheFetcher::~PrecacheFetcher() { | 440 PrecacheFetcher::~PrecacheFetcher() { |
437 } | 441 } |
438 | 442 |
439 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { | 443 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { |
440 // This could get called multiple times, and it should be handled gracefully. | 444 // This could get called multiple times, and it should be handled gracefully. |
441 if (!unfinished_work_) | 445 if (!unfinished_work_) |
442 return nullptr; | 446 return nullptr; |
443 | 447 |
444 unfinished_work_->clear_resource(); | 448 unfinished_work_->clear_resource(); |
445 if (unfinished_work_->has_config_settings()) { | 449 if (unfinished_work_->has_config_settings()) { |
446 // If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and | 450 // If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and |
447 // top hosts should be left as is in |unfinished_work_|. | 451 // top hosts should be left as is in |unfinished_work_|. |
448 unfinished_work_->clear_top_host(); | 452 unfinished_work_->clear_top_host(); |
453 for (const auto& top_host : top_hosts_fetching_) { | |
bengr
2016/10/14 21:52:19
Remove curly braces.
twifkak
2016/10/14 22:41:45
Done.
| |
454 unfinished_work_->add_top_host()->set_hostname(top_host.hostname); | |
455 } | |
449 for (const auto& top_host : top_hosts_to_fetch_) { | 456 for (const auto& top_host : top_hosts_to_fetch_) { |
bengr
2016/10/14 21:52:19
Remove curly braces.
twifkak
2016/10/14 22:41:45
Done.
| |
450 unfinished_work_->add_top_host()->set_hostname(top_host.hostname); | 457 unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
451 } | 458 } |
452 } | 459 } |
460 for (const auto& resource : resources_fetching_) { | |
461 auto new_resource = unfinished_work_->add_resource(); | |
462 new_resource->set_url(resource.url.spec()); | |
463 new_resource->set_top_host_name(resource.referrer); | |
464 } | |
453 for (const auto& resource : resources_to_fetch_) { | 465 for (const auto& resource : resources_to_fetch_) { |
454 auto new_resource = unfinished_work_->add_resource(); | 466 auto new_resource = unfinished_work_->add_resource(); |
455 new_resource->set_url(resource.first.spec()); | 467 new_resource->set_url(resource.url.spec()); |
456 new_resource->set_top_host_name(resource.second); | 468 new_resource->set_top_host_name(resource.referrer); |
457 } | 469 } |
458 for (const auto& it : pool_.elements()) { | 470 top_hosts_fetching_.clear(); |
459 const Fetcher* fetcher = it.first; | |
460 GURL config_url = | |
461 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; | |
462 if (fetcher->is_resource_request()) { | |
463 auto resource = unfinished_work_->add_resource(); | |
464 resource->set_url(fetcher->url().spec()); | |
465 resource->set_top_host_name(fetcher->referrer()); | |
466 } else if (fetcher->url() != config_url) { | |
467 unfinished_work_->add_top_host()->set_hostname(fetcher->referrer()); | |
468 } | |
469 } | |
470 top_hosts_to_fetch_.clear(); | 471 top_hosts_to_fetch_.clear(); |
472 resources_fetching_.clear(); | |
471 resources_to_fetch_.clear(); | 473 resources_to_fetch_.clear(); |
472 pool_.DeleteAll(); | 474 pool_.DeleteAll(); |
473 return std::move(unfinished_work_); | 475 return std::move(unfinished_work_); |
474 } | 476 } |
475 | 477 |
476 void PrecacheFetcher::Start() { | 478 void PrecacheFetcher::Start() { |
477 if (unfinished_work_->has_config_settings()) { | 479 if (unfinished_work_->has_config_settings()) { |
478 DCHECK(unfinished_work_->has_start_time()); | 480 DCHECK(unfinished_work_->has_start_time()); |
479 DetermineManifests(); | 481 DetermineManifests(); |
480 return; | 482 return; |
(...skipping 10 matching lines...) Expand all Loading... | |
491 VLOG(3) << "Fetching " << config_url; | 493 VLOG(3) << "Fetching " << config_url; |
492 pool_.Add(base::MakeUnique<Fetcher>( | 494 pool_.Add(base::MakeUnique<Fetcher>( |
493 request_context_.get(), config_url, std::string(), | 495 request_context_.get(), config_url, std::string(), |
494 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), | 496 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), |
495 false /* is_resource_request */, std::numeric_limits<int32_t>::max())); | 497 false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
496 } | 498 } |
497 | 499 |
498 void PrecacheFetcher::StartNextResourceFetch() { | 500 void PrecacheFetcher::StartNextResourceFetch() { |
499 DCHECK(unfinished_work_->has_config_settings()); | 501 DCHECK(unfinished_work_->has_config_settings()); |
500 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { | 502 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { |
501 const auto& resource = resources_to_fetch_.front(); | 503 ResourceInfo& resource = resources_to_fetch_.front(); |
502 const size_t max_bytes = std::min( | 504 const size_t max_bytes = std::min( |
503 quota_.remaining(), | 505 quota_.remaining(), |
504 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), | 506 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), |
505 unfinished_work_->config_settings().max_bytes_total() - | 507 unfinished_work_->config_settings().max_bytes_total() - |
506 unfinished_work_->total_bytes())); | 508 unfinished_work_->total_bytes())); |
507 VLOG(3) << "Fetching " << resource.first << " " << resource.second; | 509 VLOG(3) << "Fetching " << resource.url << " " << resource.referrer; |
508 pool_.Add(base::MakeUnique<Fetcher>( | 510 pool_.Add(base::MakeUnique<Fetcher>( |
509 request_context_.get(), resource.first, resource.second, | 511 request_context_.get(), resource.url, resource.referrer, |
510 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), | 512 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), |
511 true /* is_resource_request */, max_bytes)); | 513 true /* is_resource_request */, max_bytes)); |
512 | 514 |
515 resources_fetching_.push_back(std::move(resource)); | |
513 resources_to_fetch_.pop_front(); | 516 resources_to_fetch_.pop_front(); |
514 } | 517 } |
515 } | 518 } |
516 | 519 |
517 void PrecacheFetcher::StartNextManifestFetch() { | 520 void PrecacheFetcher::StartNextManifestFetches() { |
518 if (top_hosts_to_fetch_.empty() || !pool_.IsAvailable()) | 521 // We fetch as many manifests at a time as possible, as we need all resource |
519 return; | 522 // URLs in memory in order to rank them. |
520 | 523 while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) { |
521 // We only fetch one manifest at a time to keep the size of | 524 ManifestHostInfo& top_host = top_hosts_to_fetch_.front(); |
522 // resources_to_fetch_ as small as possible. | 525 VLOG(3) << "Fetching " << top_host.manifest_url; |
bengr
2016/10/14 21:52:19
Do we need the VLOG (here and below)?
twifkak
2016/10/14 22:41:45
This VLOG is not new. The ones I added are all --v
| |
523 VLOG(3) << "Fetching " << top_hosts_to_fetch_.front().manifest_url; | 526 pool_.Add(base::MakeUnique<Fetcher>( |
524 pool_.Add(base::MakeUnique<Fetcher>( | 527 request_context_.get(), top_host.manifest_url, top_host.hostname, |
525 request_context_.get(), top_hosts_to_fetch_.front().manifest_url, | 528 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(), |
526 top_hosts_to_fetch_.front().hostname, | 529 top_host.visits), |
527 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()), | 530 false /* is_resource_request */, std::numeric_limits<int32_t>::max())); |
528 false /* is_resource_request */, std::numeric_limits<int32_t>::max())); | 531 top_hosts_fetching_.push_back(std::move(top_host)); |
529 top_hosts_to_fetch_.pop_front(); | 532 top_hosts_to_fetch_.pop_front(); |
533 } | |
530 } | 534 } |
531 | 535 |
532 void PrecacheFetcher::NotifyDone( | 536 void PrecacheFetcher::NotifyDone( |
533 size_t remaining_manifest_urls_to_fetch, | 537 size_t remaining_manifest_urls_to_fetch, |
534 size_t remaining_resource_urls_to_fetch) { | 538 size_t remaining_resource_urls_to_fetch) { |
539 VLOG(9) << "NotifyDone"; | |
535 RecordCompletionStatistics(*unfinished_work_, | 540 RecordCompletionStatistics(*unfinished_work_, |
536 remaining_manifest_urls_to_fetch, | 541 remaining_manifest_urls_to_fetch, |
537 remaining_resource_urls_to_fetch); | 542 remaining_resource_urls_to_fetch); |
538 precache_delegate_->OnDone(); | 543 precache_delegate_->OnDone(); |
539 } | 544 } |
540 | 545 |
541 void PrecacheFetcher::StartNextFetch() { | 546 void PrecacheFetcher::StartNextFetch() { |
547 VLOG(9) << "StartNextFetch"; | |
542 DCHECK(unfinished_work_->has_config_settings()); | 548 DCHECK(unfinished_work_->has_config_settings()); |
543 | 549 |
544 // If over the precache total size cap or daily quota, then stop prefetching. | 550 // If over the precache total size cap or daily quota, then stop prefetching. |
545 if ((unfinished_work_->total_bytes() > | 551 if ((unfinished_work_->total_bytes() > |
546 unfinished_work_->config_settings().max_bytes_total()) || | 552 unfinished_work_->config_settings().max_bytes_total()) || |
547 quota_.remaining() == 0) { | 553 quota_.remaining() == 0) { |
548 size_t pending_manifests_in_pool = 0; | |
549 size_t pending_resources_in_pool = 0; | |
550 for (const auto& element_pair : pool_.elements()) { | |
551 const Fetcher* fetcher = element_pair.first; | |
552 if (fetcher->is_resource_request()) | |
553 pending_resources_in_pool++; | |
554 else if (fetcher->url() != config_url_) | |
555 pending_manifests_in_pool++; | |
556 } | |
557 pool_.DeleteAll(); | 554 pool_.DeleteAll(); |
558 NotifyDone(top_hosts_to_fetch_.size() + pending_manifests_in_pool, | 555 NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(), |
559 resources_to_fetch_.size() + pending_resources_in_pool); | 556 resources_to_fetch_.size() + resources_fetching_.size()); |
560 return; | 557 return; |
561 } | 558 } |
562 | 559 |
563 StartNextResourceFetch(); | 560 StartNextResourceFetch(); |
564 StartNextManifestFetch(); | 561 StartNextManifestFetches(); |
565 if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() && | 562 if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() && |
566 pool_.IsEmpty()) { | 563 pool_.IsEmpty()) { |
567 // There are no more URLs to fetch, so end the precache cycle. | 564 // There are no more URLs to fetch, so end the precache cycle. |
568 NotifyDone(0, 0); | 565 NotifyDone(0, 0); |
569 // OnDone may have deleted this PrecacheFetcher, so don't do anything after | 566 // OnDone may have deleted this PrecacheFetcher, so don't do anything after |
570 // it is called. | 567 // it is called. |
571 } | 568 } |
572 } | 569 } |
573 | 570 |
574 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { | 571 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
572 VLOG(9) << "OnConfigFetchComplete"; | |
575 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 573 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
576 if (source.network_url_fetcher() == nullptr) { | 574 if (source.network_url_fetcher() == nullptr) { |
577 pool_.DeleteAll(); // Cancel any other ongoing request. | 575 pool_.DeleteAll(); // Cancel any other ongoing request. |
578 } else { | 576 } else { |
579 // Attempt to parse the config proto. On failure, continue on with the | 577 // Attempt to parse the config proto. On failure, continue on with the |
580 // default configuration. | 578 // default configuration. |
581 ParseProtoFromFetchResponse( | 579 ParseProtoFromFetchResponse( |
582 *source.network_url_fetcher(), | 580 *source.network_url_fetcher(), |
583 unfinished_work_->mutable_config_settings()); | 581 unfinished_work_->mutable_config_settings()); |
584 pool_.Delete(source); | 582 pool_.Delete(source); |
585 DetermineManifests(); | 583 DetermineManifests(); |
586 } | 584 } |
587 } | 585 } |
588 | 586 |
589 void PrecacheFetcher::DetermineManifests() { | 587 void PrecacheFetcher::DetermineManifests() { |
590 DCHECK(unfinished_work_->has_config_settings()); | 588 DCHECK(unfinished_work_->has_config_settings()); |
591 | 589 |
592 std::vector<std::string> top_hosts_to_fetch; | 590 std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch; |
593 std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info( | |
594 new std::deque<ManifestHostInfo>); | |
595 // Keep track of manifest URLs that are being fetched, in order to elide | 591 // Keep track of manifest URLs that are being fetched, in order to elide |
596 // duplicates. | 592 // duplicates. |
597 std::set<base::StringPiece> seen_top_hosts; | 593 std::set<base::StringPiece> seen_top_hosts; |
598 int64_t rank = 0; | 594 int64_t rank = 0; |
599 | 595 |
600 for (const auto& host : unfinished_work_->top_host()) { | 596 for (const auto& host : unfinished_work_->top_host()) { |
601 ++rank; | 597 ++rank; |
602 if (rank > unfinished_work_->config_settings().top_sites_count()) | 598 if (rank > unfinished_work_->config_settings().top_sites_count()) |
603 break; | 599 break; |
604 if (seen_top_hosts.insert(host.hostname()).second) | 600 if (seen_top_hosts.insert(host.hostname()).second) |
605 top_hosts_to_fetch.push_back(host.hostname()); | 601 top_hosts_to_fetch.emplace_back(host.hostname(), host.visits()); |
606 } | 602 } |
607 | 603 |
608 // Attempt to fetch manifests for starting hosts up to the maximum top sites | 604 // Attempt to fetch manifests for starting hosts up to the maximum top sites |
609 // count. If a manifest does not exist for a particular starting host, then | 605 // count. If a manifest does not exist for a particular starting host, then |
610 // the fetch will fail, and that starting host will be ignored. Starting | 606 // the fetch will fail, and that starting host will be ignored. Starting |
611 // hosts are not added if this is a continuation from a previous precache | 607 // hosts are not added if this is a continuation from a previous precache |
612 // session. | 608 // session. |
613 if (resources_to_fetch_.empty()) { | 609 if (resources_to_fetch_.empty()) { |
614 for (const std::string& host : | 610 for (const std::string& host : |
615 unfinished_work_->config_settings().forced_site()) { | 611 unfinished_work_->config_settings().forced_site()) { |
612 // We add a forced site with visits == 0, which means its resources will | |
613 // be downloaded last. TODO(twifkak): Consider removing support for | |
614 // forced_site. | |
616 if (seen_top_hosts.insert(host).second) | 615 if (seen_top_hosts.insert(host).second) |
617 top_hosts_to_fetch.push_back(host); | 616 top_hosts_to_fetch.emplace_back(host, 0); |
618 } | 617 } |
619 } | 618 } |
620 // We only fetch one manifest at a time to keep the size of | 619 // We retrieve manifest usage and quota info from the local database before |
621 // resources_to_fetch_ as small as possible. | 620 // fetching the manifests. |
622 PostTaskAndReplyWithResult( | 621 PostTaskAndReplyWithResult( |
623 db_task_runner_.get(), FROM_HERE, | 622 db_task_runner_.get(), FROM_HERE, |
624 base::Bind(&RetrieveManifestInfo, precache_database_, | 623 base::Bind(&RetrieveManifestInfo, precache_database_, |
625 std::move(top_hosts_to_fetch)), | 624 std::move(top_hosts_to_fetch)), |
626 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr())); | 625 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr())); |
627 } | 626 } |
628 | 627 |
629 void PrecacheFetcher::OnManifestInfoRetrieved( | 628 void PrecacheFetcher::OnManifestInfoRetrieved( |
630 std::deque<ManifestHostInfo> manifests_info) { | 629 std::deque<ManifestHostInfo> manifests_info) { |
630 VLOG(9) << "OnManifestInfoRetrieved"; | |
631 const std::string prefix = manifest_url_prefix_.empty() | 631 const std::string prefix = manifest_url_prefix_.empty() |
632 ? GetDefaultManifestURLPrefix() | 632 ? GetDefaultManifestURLPrefix() |
633 : manifest_url_prefix_; | 633 : manifest_url_prefix_; |
634 if (!GURL(prefix).is_valid()) { | 634 if (!GURL(prefix).is_valid()) { |
635 // Don't attempt to fetch any manifests if the manifest URL prefix | 635 // Don't attempt to fetch any manifests if the manifest URL prefix |
636 // is invalid. | 636 // is invalid. |
637 top_hosts_to_fetch_.clear(); | 637 top_hosts_to_fetch_.clear(); |
638 unfinished_work_->set_num_manifest_urls(manifests_info.size()); | 638 unfinished_work_->set_num_manifest_urls(manifests_info.size()); |
639 NotifyDone(manifests_info.size(), resources_to_fetch_.size()); | 639 NotifyDone(manifests_info.size(), resources_to_rank_.size()); |
640 return; | 640 return; |
641 } | 641 } |
642 | 642 |
643 top_hosts_to_fetch_ = std::move(manifests_info); | 643 top_hosts_to_fetch_ = std::move(manifests_info); |
644 for (auto& manifest : top_hosts_to_fetch_) { | 644 for (auto& manifest : top_hosts_to_fetch_) { |
645 manifest.manifest_url = | 645 manifest.manifest_url = |
646 GURL(prefix + | 646 GURL(prefix + |
647 net::EscapeQueryParamValue( | 647 net::EscapeQueryParamValue( |
648 net::EscapeQueryParamValue(manifest.hostname, false), false)); | 648 net::EscapeQueryParamValue(manifest.hostname, false), false)); |
649 if (manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) { | 649 if (manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) { |
650 manifest.manifest_url = net::AppendOrReplaceQueryParameter( | 650 manifest.manifest_url = net::AppendOrReplaceQueryParameter( |
651 manifest.manifest_url, "manifest", | 651 manifest.manifest_url, "manifest", |
652 std::to_string(manifest.manifest_id)); | 652 std::to_string(manifest.manifest_id)); |
653 manifest.manifest_url = net::AppendOrReplaceQueryParameter( | 653 manifest.manifest_url = net::AppendOrReplaceQueryParameter( |
654 manifest.manifest_url, "used_resources", manifest.used_url_hash); | 654 manifest.manifest_url, "used_resources", manifest.used_url_hash); |
655 manifest.manifest_url = net::AppendOrReplaceQueryParameter( | 655 manifest.manifest_url = net::AppendOrReplaceQueryParameter( |
656 manifest.manifest_url, "unused_resources", manifest.unused_url_hash); | 656 manifest.manifest_url, "unused_resources", manifest.unused_url_hash); |
657 DCHECK(manifest.manifest_url.is_valid()); | 657 DCHECK(manifest.manifest_url.is_valid()); |
658 } | 658 } |
659 } | 659 } |
660 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_.size()); | 660 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_.size()); |
661 | 661 |
662 PostTaskAndReplyWithResult( | 662 PostTaskAndReplyWithResult( |
663 db_task_runner_.get(), FROM_HERE, | 663 db_task_runner_.get(), FROM_HERE, |
664 base::Bind(&RetrieveQuotaInfo, precache_database_), | 664 base::Bind(&RetrieveQuotaInfo, precache_database_), |
665 base::Bind(&PrecacheFetcher::OnQuotaInfoRetrieved, AsWeakPtr())); | 665 base::Bind(&PrecacheFetcher::OnQuotaInfoRetrieved, AsWeakPtr())); |
666 } | 666 } |
667 | 667 |
668 void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { | 668 void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { |
669 VLOG(9) << "OnQuotaInfoRetrieved"; | |
669 quota_ = quota; | 670 quota_ = quota; |
670 base::Time time_now = base::Time::Now(); | 671 base::Time time_now = base::Time::Now(); |
671 if (IsQuotaTimeExpired(quota_, time_now)) { | 672 if (IsQuotaTimeExpired(quota_, time_now)) { |
672 // This is a new day. Update daily quota, that starts today and expires by | 673 // This is a new day. Update daily quota, that starts today and expires by |
673 // end of today. | 674 // end of today. |
674 quota_.set_start_time(time_now.LocalMidnight().ToInternalValue()); | 675 quota_.set_start_time(time_now.LocalMidnight().ToInternalValue()); |
675 quota_.set_remaining( | 676 quota_.set_remaining( |
676 unfinished_work_->config_settings().daily_quota_total()); | 677 unfinished_work_->config_settings().daily_quota_total()); |
677 db_task_runner_->PostTask( | 678 db_task_runner_->PostTask( |
678 FROM_HERE, | 679 FROM_HERE, |
679 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); | 680 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); |
680 } | 681 } |
681 StartNextFetch(); | 682 StartNextFetch(); |
682 } | 683 } |
683 | 684 |
684 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, | 685 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, |
685 const std::string& hostname, | 686 const std::string& hostname, |
687 int64_t visits, | |
686 const std::string& used_url_hash, | 688 const std::string& used_url_hash, |
687 const std::string& unused_url_hash) | 689 const std::string& unused_url_hash) |
688 : manifest_id(manifest_id), | 690 : manifest_id(manifest_id), |
689 hostname(hostname), | 691 hostname(hostname), |
692 visits(visits), | |
690 used_url_hash(used_url_hash), | 693 used_url_hash(used_url_hash), |
691 unused_url_hash(unused_url_hash) {} | 694 unused_url_hash(unused_url_hash) {} |
692 | 695 |
693 ManifestHostInfo::~ManifestHostInfo() {} | 696 ManifestHostInfo::~ManifestHostInfo() {} |
694 | 697 |
695 ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; | 698 ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; |
696 | 699 |
697 ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; | 700 ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; |
698 | 701 |
699 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { | 702 ResourceInfo::ResourceInfo(const GURL& url, |
703 const std::string& referrer, | |
704 double weight) | |
705 : url(url), referrer(referrer), weight(weight) {} | |
706 | |
707 ResourceInfo::~ResourceInfo() {} | |
708 | |
709 ResourceInfo::ResourceInfo(ResourceInfo&&) = default; | |
710 | |
711 ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default; | |
712 | |
713 void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits, | |
714 const Fetcher& source) { | |
715 VLOG(9) << "OnManifestFetchComplete " << source.referrer(); | |
700 DCHECK(unfinished_work_->has_config_settings()); | 716 DCHECK(unfinished_work_->has_config_settings()); |
701 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 717 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
702 if (source.network_url_fetcher() == nullptr) { | 718 if (source.network_url_fetcher() == nullptr) { |
703 pool_.DeleteAll(); // Cancel any other ongoing request. | 719 pool_.DeleteAll(); // Cancel any other ongoing request. |
704 } else { | 720 } else { |
705 PrecacheManifest manifest; | 721 PrecacheManifest manifest; |
706 | 722 |
707 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { | 723 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { |
708 const int32_t len = | 724 int32_t len = manifest.resource_size(); |
709 std::min(manifest.resource_size(), | 725 if (!global_ranking_) |
bengr
2016/10/14 21:52:19
Add curly braces.
twifkak
2016/10/14 22:41:45
Done.
| |
710 unfinished_work_->config_settings().top_resources_count()); | 726 len = std::min( |
727 len, unfinished_work_->config_settings().top_resources_count()); | |
711 const uint64_t resource_bitset = | 728 const uint64_t resource_bitset = |
712 GetResourceBitset(manifest, experiment_id_); | 729 GetResourceBitset(manifest, experiment_id_); |
713 for (int i = 0; i < len; ++i) { | 730 for (int i = 0; i < len; ++i) { |
714 if (((0x1ULL << i) & resource_bitset) && | 731 if (((0x1ULL << i) & resource_bitset) && |
715 manifest.resource(i).has_url()) { | 732 manifest.resource(i).has_url()) { |
716 GURL url(manifest.resource(i).url()); | 733 GURL url(manifest.resource(i).url()); |
717 if (url.is_valid()) { | 734 if (url.is_valid()) { |
718 resources_to_fetch_.emplace_back(url, source.referrer()); | 735 VLOG(9) << "Adding resource " << url.spec(); |
736 double weight = manifest.resource(i).weight_ratio() * host_visits; | |
737 if (weight >= unfinished_work_->config_settings().min_weight()) | |
738 resources_to_rank_.emplace_back(url, source.referrer(), weight); | |
719 } | 739 } |
720 } | 740 } |
721 } | 741 } |
722 db_task_runner_->PostTask( | 742 db_task_runner_->PostTask( |
723 FROM_HERE, base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost, | 743 FROM_HERE, base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost, |
724 precache_database_, source.referrer(), | 744 precache_database_, source.referrer(), |
725 manifest.id().id(), base::Time::Now())); | 745 manifest.id().id(), base::Time::Now())); |
726 } | 746 } |
727 } | 747 } |
728 | 748 |
749 top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) { | |
750 return top_host.manifest_url == source.url(); | |
751 }); | |
752 | |
729 pool_.Delete(source); | 753 pool_.Delete(source); |
754 | |
755 if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty()) { | |
756 VLOG(9) << "Ranking resources."; | |
757 // Done fetching manifests. Now sort resources_to_rank_ into | |
758 // resources_to_fetch_, by descending weight. When StartNextFetch runs, it | |
759 // will begin fetching resources. | |
760 resources_to_fetch_ = std::move(resources_to_rank_); | |
761 if (global_ranking_) { | |
762 std::stable_sort( | |
763 resources_to_fetch_.begin(), resources_to_fetch_.end(), | |
764 [](const ResourceInfo& first, const ResourceInfo& second) { | |
765 return first.weight > second.weight; | |
766 }); | |
767 } | |
768 // Truncate to size |total_resources_count|. | |
769 const size_t num_resources = std::min( | |
770 resources_to_fetch_.size(), | |
771 static_cast<size_t>( | |
772 unfinished_work_->config_settings().total_resources_count())); | |
773 resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources, | |
774 resources_to_fetch_.end()); | |
775 // Save denominator for PercentCompleted UMA. | |
776 unfinished_work_->set_num_resource_urls(resources_to_fetch_.size()); | |
777 } | |
778 | |
730 StartNextFetch(); | 779 StartNextFetch(); |
731 } | 780 } |
732 | 781 |
733 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { | 782 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
734 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 783 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
735 | 784 |
736 db_task_runner_->PostTask( | 785 db_task_runner_->PostTask( |
737 FROM_HERE, | 786 FROM_HERE, |
738 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_, | 787 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_, |
739 source.url(), source.referrer(), base::Time::Now(), | 788 source.url(), source.referrer(), base::Time::Now(), |
740 source.was_cached(), source.response_bytes())); | 789 source.was_cached(), source.response_bytes())); |
741 | 790 |
791 resources_fetching_.remove_if([&source](const ResourceInfo& resource) { | |
792 return resource.url == source.url(); | |
793 }); | |
794 | |
742 pool_.Delete(source); | 795 pool_.Delete(source); |
743 | 796 |
744 // The resource has already been put in the cache during the fetch process, so | 797 // The resource has already been put in the cache during the fetch process, so |
745 // nothing more needs to be done for the resource. | 798 // nothing more needs to be done for the resource. |
746 StartNextFetch(); | 799 StartNextFetch(); |
747 } | 800 } |
748 | 801 |
749 void PrecacheFetcher::UpdateStats(int64_t response_bytes, | 802 void PrecacheFetcher::UpdateStats(int64_t response_bytes, |
750 int64_t network_response_bytes) { | 803 int64_t network_response_bytes) { |
751 DCHECK_LE(0, response_bytes); | 804 DCHECK_LE(0, response_bytes); |
(...skipping 12 matching lines...) Expand all Loading... | |
764 remaining = 0; | 817 remaining = 0; |
765 quota_.set_remaining( | 818 quota_.set_remaining( |
766 used_bytes > quota_.remaining() ? 0U : quota_.remaining() - used_bytes); | 819 used_bytes > quota_.remaining() ? 0U : quota_.remaining() - used_bytes); |
767 db_task_runner_->PostTask( | 820 db_task_runner_->PostTask( |
768 FROM_HERE, | 821 FROM_HERE, |
769 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); | 822 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); |
770 } | 823 } |
771 } | 824 } |
772 | 825 |
773 } // namespace precache | 826 } // namespace precache |
OLD | NEW |