Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(215)

Side by Side Diff: components/precache/core/precache_fetcher.cc

Issue 2403193002: Precache: Optionally rank resources-to-precache globally. (Closed)
Patch Set: Code readability improvements per bengr. Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/precache/core/precache_fetcher.h" 5 #include "components/precache/core/precache_fetcher.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <limits> 8 #include <limits>
9 #include <set>
9 #include <utility> 10 #include <utility>
10 #include <vector> 11 #include <vector>
11 12
12 #include "base/base64.h" 13 #include "base/base64.h"
13 #include "base/bind.h" 14 #include "base/bind.h"
14 #include "base/bind_helpers.h" 15 #include "base/bind_helpers.h"
15 #include "base/callback.h" 16 #include "base/callback.h"
16 #include "base/command_line.h" 17 #include "base/command_line.h"
17 #include "base/compiler_specific.h" 18 #include "base/compiler_specific.h"
18 #include "base/containers/hash_tables.h" 19 #include "base/containers/hash_tables.h"
(...skipping 27 matching lines...) Expand all
46 // The following flags are for privacy reasons. For example, if a user clears 47 // The following flags are for privacy reasons. For example, if a user clears
47 // their cookies, but a tracking beacon is prefetched and the beacon specifies 48 // their cookies, but a tracking beacon is prefetched and the beacon specifies
48 // its source URL in a URL param, the beacon site would be able to rebuild a 49 // its source URL in a URL param, the beacon site would be able to rebuild a
49 // profile of the user. All three flags should occur together, or not at all, 50 // profile of the user. All three flags should occur together, or not at all,
50 // per 51 // per
51 // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussi on. 52 // https://groups.google.com/a/chromium.org/d/topic/net-dev/vvcodRV6SdM/discussi on.
52 const int kNoTracking = 53 const int kNoTracking =
53 net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | 54 net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
54 net::LOAD_DO_NOT_SEND_AUTH_DATA; 55 net::LOAD_DO_NOT_SEND_AUTH_DATA;
55 56
57 // The maximum number of URLFetcher requests that can be on flight in parallel.
bengr 2016/10/18 18:57:20 nit: on -> in
twifkak 2016/10/18 19:52:27 Done.
58 // Note that OnManifestFetchComplete and OnResourceFetchComplete perform
59 // remove_if operations which are O(kMaxParallelFetches). Those should be
60 // optimized before increasing this value significantly.
61 const int kMaxParallelFetches = 10;
62
56 namespace { 63 namespace {
57 64
58 // The maximum number of URLFetcher requests that can be on flight in parallel.
59 const int kMaxParallelFetches = 10;
60
61 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to 65 // The maximum for the Precache.Fetch.ResponseBytes.* histograms. We set this to
62 // a number we expect to be in the 99th percentile for the histogram, give or 66 // a number we expect to be in the 99th percentile for the histogram, give or
63 // take. 67 // take.
64 const int kMaxResponseBytes = 500 * 1024 * 1024; 68 const int kMaxResponseBytes = 500 * 1024 * 1024;
65 69
66 GURL GetDefaultConfigURL() { 70 GURL GetDefaultConfigURL() {
67 const base::CommandLine& command_line = 71 const base::CommandLine& command_line =
68 *base::CommandLine::ForCurrentProcess(); 72 *base::CommandLine::ForCurrentProcess();
69 if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) { 73 if (command_line.HasSwitch(switches::kPrecacheConfigSettingsURL)) {
70 return GURL( 74 return GURL(
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after
176 hashes.append(reinterpret_cast<const char*>(sha1_hash), kHashBytesSize); 180 hashes.append(reinterpret_cast<const char*>(sha1_hash), kHashBytesSize);
177 } 181 }
178 base::Base64Encode(hashes, &hashes); 182 base::Base64Encode(hashes, &hashes);
179 return hashes; 183 return hashes;
180 } 184 }
181 185
182 // Retrieves the manifest info on the DB thread. Manifest info for each of the 186 // Retrieves the manifest info on the DB thread. Manifest info for each of the
183 // hosts in |hosts_to_fetch|, is added to |hosts_info|. 187 // hosts in |hosts_to_fetch|, is added to |hosts_info|.
184 std::deque<ManifestHostInfo> RetrieveManifestInfo( 188 std::deque<ManifestHostInfo> RetrieveManifestInfo(
185 const base::WeakPtr<PrecacheDatabase>& precache_database, 189 const base::WeakPtr<PrecacheDatabase>& precache_database,
186 std::vector<std::string> hosts_to_fetch) { 190 std::vector<std::pair<std::string, int64_t>> hosts_to_fetch) {
191 VLOG(9) << "RetrieveManifestInfo";
187 std::deque<ManifestHostInfo> hosts_info; 192 std::deque<ManifestHostInfo> hosts_info;
188 if (!precache_database) 193 if (!precache_database)
189 return hosts_info; 194 return hosts_info;
190 195
191 for (const auto& host : hosts_to_fetch) { 196 for (const auto& host : hosts_to_fetch) {
192 auto referrer_host_info = precache_database->GetReferrerHost(host); 197 auto referrer_host_info = precache_database->GetReferrerHost(host.first);
193 if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { 198 if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) {
194 std::vector<GURL> used_urls, unused_urls; 199 std::vector<GURL> used_urls, unused_urls;
195 precache_database->GetURLListForReferrerHost(referrer_host_info.id, 200 precache_database->GetURLListForReferrerHost(referrer_host_info.id,
196 &used_urls, &unused_urls); 201 &used_urls, &unused_urls);
197 hosts_info.push_back( 202 hosts_info.push_back(
198 ManifestHostInfo(referrer_host_info.manifest_id, host, 203 ManifestHostInfo(referrer_host_info.manifest_id, host.first,
199 GetResourceURLBase64Hash(used_urls), 204 host.second, GetResourceURLBase64Hash(used_urls),
200 GetResourceURLBase64Hash(unused_urls))); 205 GetResourceURLBase64Hash(unused_urls)));
201 } else { 206 } else {
202 hosts_info.push_back( 207 hosts_info.push_back(
203 ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host, 208 ManifestHostInfo(PrecacheReferrerHostEntry::kInvalidId, host.first,
204 std::string(), std::string())); 209 host.second, std::string(), std::string()));
205 } 210 }
206 } 211 }
207 return hosts_info; 212 return hosts_info;
208 } 213 }
209 214
210 PrecacheQuota RetrieveQuotaInfo( 215 PrecacheQuota RetrieveQuotaInfo(
211 const base::WeakPtr<PrecacheDatabase>& precache_database) { 216 const base::WeakPtr<PrecacheDatabase>& precache_database) {
217 VLOG(9) << "RetrieveQuotaInfo";
212 PrecacheQuota quota; 218 PrecacheQuota quota;
213 if (precache_database) { 219 if (precache_database) {
214 quota = precache_database->GetQuota(); 220 quota = precache_database->GetQuota();
215 } 221 }
216 return quota; 222 return quota;
217 } 223 }
218 224
219 // Returns true if the |quota| time has expired. 225 // Returns true if the |quota| time has expired.
220 bool IsQuotaTimeExpired(const PrecacheQuota& quota, 226 bool IsQuotaTimeExpired(const PrecacheQuota& quota,
221 const base::Time& time_now) { 227 const base::Time& time_now) {
(...skipping 134 matching lines...) Expand 10 before | Expand all | Expand 10 after
356 // These may be unset in tests. 362 // These may be unset in tests.
357 if (!unfinished_work.has_start_time()) 363 if (!unfinished_work.has_start_time())
358 return; 364 return;
359 base::TimeDelta time_to_fetch = 365 base::TimeDelta time_to_fetch =
360 base::Time::Now() - 366 base::Time::Now() -
361 base::Time::FromInternalValue(unfinished_work.start_time()); 367 base::Time::FromInternalValue(unfinished_work.start_time());
362 UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch, 368 UMA_HISTOGRAM_CUSTOM_TIMES("Precache.Fetch.TimeToComplete", time_to_fetch,
363 base::TimeDelta::FromSeconds(1), 369 base::TimeDelta::FromSeconds(1),
364 base::TimeDelta::FromHours(4), 50); 370 base::TimeDelta::FromHours(4), 50);
365 371
366 // Number of manifests for which we have downloaded all resources. 372 int num_total_resources = unfinished_work.num_resource_urls();
367 int manifests_completed = 373 int percent_completed =
368 unfinished_work.num_manifest_urls() - remaining_manifest_urls_to_fetch; 374 num_total_resources == 0
bengr 2016/10/18 18:57:20 I wonder if we should be reporting num_total_resou
twifkak 2016/10/18 19:52:27 I'm amenable to that. My intuition is that this is
bengr 2016/10/18 21:50:31 sgtm
twifkak 2016/10/18 22:55:47 Done.
375 ? 0
376 : (100 * (static_cast<double>(num_total_resources -
377 remaining_resource_urls_to_fetch) /
378 num_total_resources));
369 379
370 // If there are resource URLs left to fetch, the last manifest is not yet 380 VLOG(6) << "Percent completed: " << percent_completed;
371 // completed.
372 if (remaining_resource_urls_to_fetch > 0)
373 --manifests_completed;
374
375 DCHECK_GE(manifests_completed, 0);
376 int percent_completed = unfinished_work.num_manifest_urls() == 0
377 ? 0
378 : (static_cast<double>(manifests_completed) /
379 unfinished_work.num_manifest_urls() * 100);
380 381
381 UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted", 382 UMA_HISTOGRAM_PERCENTAGE("Precache.Fetch.PercentCompleted",
382 percent_completed); 383 percent_completed);
383 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total", 384 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Total",
384 unfinished_work.total_bytes(), 385 unfinished_work.total_bytes(), 1,
385 1, kMaxResponseBytes, 100); 386 kMaxResponseBytes, 100);
386 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network", 387 UMA_HISTOGRAM_CUSTOM_COUNTS("Precache.Fetch.ResponseBytes.Network",
387 unfinished_work.network_bytes(), 388 unfinished_work.network_bytes(), 1,
388 1, kMaxResponseBytes, 389 kMaxResponseBytes, 100);
389 100);
390 } 390 }
391 391
392 // static 392 // static
393 std::string PrecacheFetcher::GetResourceURLBase64HashForTesting( 393 std::string PrecacheFetcher::GetResourceURLBase64HashForTesting(
394 const std::vector<GURL>& urls) { 394 const std::vector<GURL>& urls) {
395 return GetResourceURLBase64Hash(urls); 395 return GetResourceURLBase64Hash(urls);
396 } 396 }
397 397
398 PrecacheFetcher::PrecacheFetcher( 398 PrecacheFetcher::PrecacheFetcher(
399 net::URLRequestContextGetter* request_context, 399 net::URLRequestContextGetter* request_context,
400 const GURL& config_url, 400 const GURL& config_url,
401 const std::string& manifest_url_prefix, 401 const std::string& manifest_url_prefix,
402 bool global_ranking,
402 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, 403 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work,
403 uint32_t experiment_id, 404 uint32_t experiment_id,
404 const base::WeakPtr<PrecacheDatabase>& precache_database, 405 const base::WeakPtr<PrecacheDatabase>& precache_database,
405 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner, 406 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner,
406 PrecacheFetcher::PrecacheDelegate* precache_delegate) 407 PrecacheFetcher::PrecacheDelegate* precache_delegate)
407 : request_context_(request_context), 408 : request_context_(request_context),
408 config_url_(config_url), 409 config_url_(config_url),
409 manifest_url_prefix_(manifest_url_prefix), 410 manifest_url_prefix_(manifest_url_prefix),
411 global_ranking_(global_ranking),
410 precache_database_(precache_database), 412 precache_database_(precache_database),
411 db_task_runner_(std::move(db_task_runner)), 413 db_task_runner_(std::move(db_task_runner)),
412 precache_delegate_(precache_delegate), 414 precache_delegate_(precache_delegate),
413 pool_(kMaxParallelFetches), 415 pool_(kMaxParallelFetches),
414 experiment_id_(experiment_id) { 416 experiment_id_(experiment_id) {
415 DCHECK(request_context_.get()); // Request context must be non-NULL. 417 DCHECK(request_context_.get()); // Request context must be non-NULL.
416 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. 418 DCHECK(precache_delegate_); // Precache delegate must be non-NULL.
417 419
418 DCHECK_NE(GURL(), GetDefaultConfigURL()) 420 DCHECK_NE(GURL(), GetDefaultConfigURL())
419 << "Could not determine the precache config settings URL."; 421 << "Could not determine the precache config settings URL.";
420 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) 422 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix())
421 << "Could not determine the default precache manifest URL prefix."; 423 << "Could not determine the default precache manifest URL prefix.";
422 DCHECK(unfinished_work); 424 DCHECK(unfinished_work);
423 425
424 // Copy resources to member variable as a convenience. 426 // Copy resources to member variable as a convenience.
425 // TODO(rajendrant): Consider accessing these directly from the proto, by 427 // TODO(rajendrant): Consider accessing these directly from the proto, by
426 // keeping track of the current resource index. 428 // keeping track of the current resource index.
427 for (const auto& resource : unfinished_work->resource()) { 429 for (const auto& resource : unfinished_work->resource()) {
428 if (resource.has_url() && resource.has_top_host_name()) { 430 if (resource.has_url() && resource.has_top_host_name()) {
431 // Weight doesn't matter, as the resources have already been sorted by
432 // this point.
429 resources_to_fetch_.emplace_back(GURL(resource.url()), 433 resources_to_fetch_.emplace_back(GURL(resource.url()),
430 resource.top_host_name()); 434 resource.top_host_name(), 0);
431 } 435 }
432 } 436 }
433 unfinished_work_ = std::move(unfinished_work); 437 unfinished_work_ = std::move(unfinished_work);
434 } 438 }
435 439
436 PrecacheFetcher::~PrecacheFetcher() { 440 PrecacheFetcher::~PrecacheFetcher() {
437 } 441 }
438 442
439 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { 443 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() {
440 // This could get called multiple times, and it should be handled gracefully. 444 // This could get called multiple times, and it should be handled gracefully.
441 if (!unfinished_work_) 445 if (!unfinished_work_)
442 return nullptr; 446 return nullptr;
443 447
444 unfinished_work_->clear_resource(); 448 unfinished_work_->clear_resource();
445 if (unfinished_work_->has_config_settings()) { 449 if (unfinished_work_->has_config_settings()) {
446 // If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and 450 // If config fetch is incomplete, |top_hosts_to_fetch_| will be empty and
447 // top hosts should be left as is in |unfinished_work_|. 451 // top hosts should be left as is in |unfinished_work_|.
448 unfinished_work_->clear_top_host(); 452 unfinished_work_->clear_top_host();
449 for (const auto& top_host : top_hosts_to_fetch_) { 453 for (const auto& top_host : top_hosts_fetching_)
450 unfinished_work_->add_top_host()->set_hostname(top_host.hostname); 454 unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
451 } 455 for (const auto& top_host : top_hosts_to_fetch_)
456 unfinished_work_->add_top_host()->set_hostname(top_host.hostname);
457 }
458 for (const auto& resource : resources_fetching_) {
459 auto new_resource = unfinished_work_->add_resource();
460 new_resource->set_url(resource.url.spec());
461 new_resource->set_top_host_name(resource.referrer);
452 } 462 }
453 for (const auto& resource : resources_to_fetch_) { 463 for (const auto& resource : resources_to_fetch_) {
454 auto new_resource = unfinished_work_->add_resource(); 464 auto new_resource = unfinished_work_->add_resource();
455 new_resource->set_url(resource.first.spec()); 465 new_resource->set_url(resource.url.spec());
456 new_resource->set_top_host_name(resource.second); 466 new_resource->set_top_host_name(resource.referrer);
457 } 467 }
458 for (const auto& it : pool_.elements()) { 468 top_hosts_fetching_.clear();
459 const Fetcher* fetcher = it.first;
460 GURL config_url =
461 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_;
462 if (fetcher->is_resource_request()) {
463 auto resource = unfinished_work_->add_resource();
464 resource->set_url(fetcher->url().spec());
465 resource->set_top_host_name(fetcher->referrer());
466 } else if (fetcher->url() != config_url) {
467 unfinished_work_->add_top_host()->set_hostname(fetcher->referrer());
468 }
469 }
470 top_hosts_to_fetch_.clear(); 469 top_hosts_to_fetch_.clear();
470 resources_fetching_.clear();
471 resources_to_fetch_.clear(); 471 resources_to_fetch_.clear();
472 pool_.DeleteAll(); 472 pool_.DeleteAll();
473 return std::move(unfinished_work_); 473 return std::move(unfinished_work_);
474 } 474 }
475 475
476 void PrecacheFetcher::Start() { 476 void PrecacheFetcher::Start() {
477 if (unfinished_work_->has_config_settings()) { 477 if (unfinished_work_->has_config_settings()) {
478 DCHECK(unfinished_work_->has_start_time()); 478 DCHECK(unfinished_work_->has_start_time());
479 DetermineManifests(); 479 DetermineManifests();
480 return; 480 return;
(...skipping 10 matching lines...) Expand all
491 VLOG(3) << "Fetching " << config_url; 491 VLOG(3) << "Fetching " << config_url;
492 pool_.Add(base::MakeUnique<Fetcher>( 492 pool_.Add(base::MakeUnique<Fetcher>(
493 request_context_.get(), config_url, std::string(), 493 request_context_.get(), config_url, std::string(),
494 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), 494 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()),
495 false /* is_resource_request */, std::numeric_limits<int32_t>::max())); 495 false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
496 } 496 }
497 497
498 void PrecacheFetcher::StartNextResourceFetch() { 498 void PrecacheFetcher::StartNextResourceFetch() {
499 DCHECK(unfinished_work_->has_config_settings()); 499 DCHECK(unfinished_work_->has_config_settings());
500 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { 500 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) {
501 const auto& resource = resources_to_fetch_.front(); 501 ResourceInfo& resource = resources_to_fetch_.front();
502 const size_t max_bytes = std::min( 502 const size_t max_bytes = std::min(
503 quota_.remaining(), 503 quota_.remaining(),
504 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), 504 std::min(unfinished_work_->config_settings().max_bytes_per_resource(),
505 unfinished_work_->config_settings().max_bytes_total() - 505 unfinished_work_->config_settings().max_bytes_total() -
506 unfinished_work_->total_bytes())); 506 unfinished_work_->total_bytes()));
507 VLOG(3) << "Fetching " << resource.first << " " << resource.second; 507 VLOG(3) << "Fetching " << resource.url << " " << resource.referrer;
508 pool_.Add(base::MakeUnique<Fetcher>( 508 pool_.Add(base::MakeUnique<Fetcher>(
509 request_context_.get(), resource.first, resource.second, 509 request_context_.get(), resource.url, resource.referrer,
510 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), 510 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()),
511 true /* is_resource_request */, max_bytes)); 511 true /* is_resource_request */, max_bytes));
512 512
513 resources_fetching_.push_back(std::move(resource));
513 resources_to_fetch_.pop_front(); 514 resources_to_fetch_.pop_front();
514 } 515 }
515 } 516 }
516 517
517 void PrecacheFetcher::StartNextManifestFetch() { 518 void PrecacheFetcher::StartNextManifestFetches() {
518 if (top_hosts_to_fetch_.empty() || !pool_.IsAvailable()) 519 // We fetch as many manifests at a time as possible, as we need all resource
519 return; 520 // URLs in memory in order to rank them.
520 521 while (!top_hosts_to_fetch_.empty() && pool_.IsAvailable()) {
521 // We only fetch one manifest at a time to keep the size of 522 ManifestHostInfo& top_host = top_hosts_to_fetch_.front();
522 // resources_to_fetch_ as small as possible. 523 VLOG(3) << "Fetching " << top_host.manifest_url;
bengr 2016/10/18 18:57:20 Please remove all VLOG statements.
twifkak 2016/10/18 19:52:27 Done.
523 VLOG(3) << "Fetching " << top_hosts_to_fetch_.front().manifest_url; 524 pool_.Add(base::MakeUnique<Fetcher>(
524 pool_.Add(base::MakeUnique<Fetcher>( 525 request_context_.get(), top_host.manifest_url, top_host.hostname,
525 request_context_.get(), top_hosts_to_fetch_.front().manifest_url, 526 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr(),
526 top_hosts_to_fetch_.front().hostname, 527 top_host.visits),
527 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()), 528 false /* is_resource_request */, std::numeric_limits<int32_t>::max()));
528 false /* is_resource_request */, std::numeric_limits<int32_t>::max())); 529 top_hosts_fetching_.push_back(std::move(top_host));
529 top_hosts_to_fetch_.pop_front(); 530 top_hosts_to_fetch_.pop_front();
531 }
530 } 532 }
531 533
532 void PrecacheFetcher::NotifyDone( 534 void PrecacheFetcher::NotifyDone(
533 size_t remaining_manifest_urls_to_fetch, 535 size_t remaining_manifest_urls_to_fetch,
534 size_t remaining_resource_urls_to_fetch) { 536 size_t remaining_resource_urls_to_fetch) {
537 VLOG(9) << "NotifyDone";
535 RecordCompletionStatistics(*unfinished_work_, 538 RecordCompletionStatistics(*unfinished_work_,
536 remaining_manifest_urls_to_fetch, 539 remaining_manifest_urls_to_fetch,
537 remaining_resource_urls_to_fetch); 540 remaining_resource_urls_to_fetch);
538 precache_delegate_->OnDone(); 541 precache_delegate_->OnDone();
539 } 542 }
540 543
541 void PrecacheFetcher::StartNextFetch() { 544 void PrecacheFetcher::StartNextFetch() {
545 VLOG(9) << "StartNextFetch";
542 DCHECK(unfinished_work_->has_config_settings()); 546 DCHECK(unfinished_work_->has_config_settings());
543 547
544 // If over the precache total size cap or daily quota, then stop prefetching. 548 // If over the precache total size cap or daily quota, then stop prefetching.
545 if ((unfinished_work_->total_bytes() > 549 if ((unfinished_work_->total_bytes() >
546 unfinished_work_->config_settings().max_bytes_total()) || 550 unfinished_work_->config_settings().max_bytes_total()) ||
547 quota_.remaining() == 0) { 551 quota_.remaining() == 0) {
548 size_t pending_manifests_in_pool = 0;
549 size_t pending_resources_in_pool = 0;
550 for (const auto& element_pair : pool_.elements()) {
551 const Fetcher* fetcher = element_pair.first;
552 if (fetcher->is_resource_request())
553 pending_resources_in_pool++;
554 else if (fetcher->url() != config_url_)
555 pending_manifests_in_pool++;
556 }
557 pool_.DeleteAll(); 552 pool_.DeleteAll();
558 NotifyDone(top_hosts_to_fetch_.size() + pending_manifests_in_pool, 553 NotifyDone(top_hosts_to_fetch_.size() + top_hosts_fetching_.size(),
559 resources_to_fetch_.size() + pending_resources_in_pool); 554 resources_to_fetch_.size() + resources_fetching_.size());
560 return; 555 return;
561 } 556 }
562 557
563 StartNextResourceFetch(); 558 StartNextResourceFetch();
564 StartNextManifestFetch(); 559 StartNextManifestFetches();
565 if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() && 560 if (top_hosts_to_fetch_.empty() && resources_to_fetch_.empty() &&
566 pool_.IsEmpty()) { 561 pool_.IsEmpty()) {
567 // There are no more URLs to fetch, so end the precache cycle. 562 // There are no more URLs to fetch, so end the precache cycle.
568 NotifyDone(0, 0); 563 NotifyDone(0, 0);
569 // OnDone may have deleted this PrecacheFetcher, so don't do anything after 564 // OnDone may have deleted this PrecacheFetcher, so don't do anything after
570 // it is called. 565 // it is called.
571 } 566 }
572 } 567 }
573 568
574 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { 569 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) {
570 VLOG(9) << "OnConfigFetchComplete";
575 UpdateStats(source.response_bytes(), source.network_response_bytes()); 571 UpdateStats(source.response_bytes(), source.network_response_bytes());
576 if (source.network_url_fetcher() == nullptr) { 572 if (source.network_url_fetcher() == nullptr) {
577 pool_.DeleteAll(); // Cancel any other ongoing request. 573 pool_.DeleteAll(); // Cancel any other ongoing request.
578 } else { 574 } else {
579 // Attempt to parse the config proto. On failure, continue on with the 575 // Attempt to parse the config proto. On failure, continue on with the
580 // default configuration. 576 // default configuration.
581 ParseProtoFromFetchResponse( 577 ParseProtoFromFetchResponse(
582 *source.network_url_fetcher(), 578 *source.network_url_fetcher(),
583 unfinished_work_->mutable_config_settings()); 579 unfinished_work_->mutable_config_settings());
584 pool_.Delete(source); 580 pool_.Delete(source);
585 DetermineManifests(); 581 DetermineManifests();
586 } 582 }
587 } 583 }
588 584
589 void PrecacheFetcher::DetermineManifests() { 585 void PrecacheFetcher::DetermineManifests() {
590 DCHECK(unfinished_work_->has_config_settings()); 586 DCHECK(unfinished_work_->has_config_settings());
591 587
592 std::vector<std::string> top_hosts_to_fetch; 588 std::vector<std::pair<std::string, int64_t>> top_hosts_to_fetch;
593 std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info(
594 new std::deque<ManifestHostInfo>);
595 // Keep track of manifest URLs that are being fetched, in order to elide 589 // Keep track of manifest URLs that are being fetched, in order to elide
596 // duplicates. 590 // duplicates.
597 std::set<base::StringPiece> seen_top_hosts; 591 std::set<base::StringPiece> seen_top_hosts;
598 int64_t rank = 0; 592 int64_t rank = 0;
599 593
600 for (const auto& host : unfinished_work_->top_host()) { 594 for (const auto& host : unfinished_work_->top_host()) {
601 ++rank; 595 ++rank;
602 if (rank > unfinished_work_->config_settings().top_sites_count()) 596 if (rank > unfinished_work_->config_settings().top_sites_count())
603 break; 597 break;
604 if (seen_top_hosts.insert(host.hostname()).second) 598 if (seen_top_hosts.insert(host.hostname()).second)
605 top_hosts_to_fetch.push_back(host.hostname()); 599 top_hosts_to_fetch.emplace_back(host.hostname(), host.visits());
606 } 600 }
607 601
608 // Attempt to fetch manifests for starting hosts up to the maximum top sites 602 // Attempt to fetch manifests for starting hosts up to the maximum top sites
609 // count. If a manifest does not exist for a particular starting host, then 603 // count. If a manifest does not exist for a particular starting host, then
610 // the fetch will fail, and that starting host will be ignored. Starting 604 // the fetch will fail, and that starting host will be ignored. Starting
611 // hosts are not added if this is a continuation from a previous precache 605 // hosts are not added if this is a continuation from a previous precache
612 // session. 606 // session.
613 if (resources_to_fetch_.empty()) { 607 if (resources_to_fetch_.empty()) {
614 for (const std::string& host : 608 for (const std::string& host :
615 unfinished_work_->config_settings().forced_site()) { 609 unfinished_work_->config_settings().forced_site()) {
610 // We add a forced site with visits == 0, which means its resources will
611 // be downloaded last. TODO(twifkak): Consider removing support for
612 // forced_site.
616 if (seen_top_hosts.insert(host).second) 613 if (seen_top_hosts.insert(host).second)
617 top_hosts_to_fetch.push_back(host); 614 top_hosts_to_fetch.emplace_back(host, 0);
618 } 615 }
619 } 616 }
620 // We only fetch one manifest at a time to keep the size of 617 // We retrieve manifest usage and quota info from the local database before
621 // resources_to_fetch_ as small as possible. 618 // fetching the manifests.
622 PostTaskAndReplyWithResult( 619 PostTaskAndReplyWithResult(
623 db_task_runner_.get(), FROM_HERE, 620 db_task_runner_.get(), FROM_HERE,
624 base::Bind(&RetrieveManifestInfo, precache_database_, 621 base::Bind(&RetrieveManifestInfo, precache_database_,
625 std::move(top_hosts_to_fetch)), 622 std::move(top_hosts_to_fetch)),
626 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr())); 623 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, AsWeakPtr()));
627 } 624 }
628 625
629 void PrecacheFetcher::OnManifestInfoRetrieved( 626 void PrecacheFetcher::OnManifestInfoRetrieved(
630 std::deque<ManifestHostInfo> manifests_info) { 627 std::deque<ManifestHostInfo> manifests_info) {
628 VLOG(9) << "OnManifestInfoRetrieved";
631 const std::string prefix = manifest_url_prefix_.empty() 629 const std::string prefix = manifest_url_prefix_.empty()
632 ? GetDefaultManifestURLPrefix() 630 ? GetDefaultManifestURLPrefix()
633 : manifest_url_prefix_; 631 : manifest_url_prefix_;
634 if (!GURL(prefix).is_valid()) { 632 if (!GURL(prefix).is_valid()) {
635 // Don't attempt to fetch any manifests if the manifest URL prefix 633 // Don't attempt to fetch any manifests if the manifest URL prefix
636 // is invalid. 634 // is invalid.
637 top_hosts_to_fetch_.clear(); 635 top_hosts_to_fetch_.clear();
638 unfinished_work_->set_num_manifest_urls(manifests_info.size()); 636 unfinished_work_->set_num_manifest_urls(manifests_info.size());
639 NotifyDone(manifests_info.size(), resources_to_fetch_.size()); 637 NotifyDone(manifests_info.size(), resources_to_rank_.size());
640 return; 638 return;
641 } 639 }
642 640
643 top_hosts_to_fetch_ = std::move(manifests_info); 641 top_hosts_to_fetch_ = std::move(manifests_info);
644 for (auto& manifest : top_hosts_to_fetch_) { 642 for (auto& manifest : top_hosts_to_fetch_) {
645 manifest.manifest_url = 643 manifest.manifest_url =
646 GURL(prefix + 644 GURL(prefix +
647 net::EscapeQueryParamValue( 645 net::EscapeQueryParamValue(
648 net::EscapeQueryParamValue(manifest.hostname, false), false)); 646 net::EscapeQueryParamValue(manifest.hostname, false), false));
649 if (manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) { 647 if (manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) {
650 manifest.manifest_url = net::AppendOrReplaceQueryParameter( 648 manifest.manifest_url = net::AppendOrReplaceQueryParameter(
651 manifest.manifest_url, "manifest", 649 manifest.manifest_url, "manifest",
652 std::to_string(manifest.manifest_id)); 650 std::to_string(manifest.manifest_id));
653 manifest.manifest_url = net::AppendOrReplaceQueryParameter( 651 manifest.manifest_url = net::AppendOrReplaceQueryParameter(
654 manifest.manifest_url, "used_resources", manifest.used_url_hash); 652 manifest.manifest_url, "used_resources", manifest.used_url_hash);
655 manifest.manifest_url = net::AppendOrReplaceQueryParameter( 653 manifest.manifest_url = net::AppendOrReplaceQueryParameter(
656 manifest.manifest_url, "unused_resources", manifest.unused_url_hash); 654 manifest.manifest_url, "unused_resources", manifest.unused_url_hash);
657 DCHECK(manifest.manifest_url.is_valid()); 655 DCHECK(manifest.manifest_url.is_valid());
658 } 656 }
659 } 657 }
660 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_.size()); 658 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_.size());
661 659
662 PostTaskAndReplyWithResult( 660 PostTaskAndReplyWithResult(
663 db_task_runner_.get(), FROM_HERE, 661 db_task_runner_.get(), FROM_HERE,
664 base::Bind(&RetrieveQuotaInfo, precache_database_), 662 base::Bind(&RetrieveQuotaInfo, precache_database_),
665 base::Bind(&PrecacheFetcher::OnQuotaInfoRetrieved, AsWeakPtr())); 663 base::Bind(&PrecacheFetcher::OnQuotaInfoRetrieved, AsWeakPtr()));
666 } 664 }
667 665
668 void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) { 666 void PrecacheFetcher::OnQuotaInfoRetrieved(const PrecacheQuota& quota) {
667 VLOG(9) << "OnQuotaInfoRetrieved";
669 quota_ = quota; 668 quota_ = quota;
670 base::Time time_now = base::Time::Now(); 669 base::Time time_now = base::Time::Now();
671 if (IsQuotaTimeExpired(quota_, time_now)) { 670 if (IsQuotaTimeExpired(quota_, time_now)) {
672 // This is a new day. Update daily quota, that starts today and expires by 671 // This is a new day. Update daily quota, that starts today and expires by
673 // end of today. 672 // end of today.
674 quota_.set_start_time(time_now.LocalMidnight().ToInternalValue()); 673 quota_.set_start_time(time_now.LocalMidnight().ToInternalValue());
675 quota_.set_remaining( 674 quota_.set_remaining(
676 unfinished_work_->config_settings().daily_quota_total()); 675 unfinished_work_->config_settings().daily_quota_total());
677 db_task_runner_->PostTask( 676 db_task_runner_->PostTask(
678 FROM_HERE, 677 FROM_HERE,
679 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); 678 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_));
680 } 679 }
681 StartNextFetch(); 680 StartNextFetch();
682 } 681 }
683 682
684 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, 683 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id,
685 const std::string& hostname, 684 const std::string& hostname,
685 int64_t visits,
686 const std::string& used_url_hash, 686 const std::string& used_url_hash,
687 const std::string& unused_url_hash) 687 const std::string& unused_url_hash)
688 : manifest_id(manifest_id), 688 : manifest_id(manifest_id),
689 hostname(hostname), 689 hostname(hostname),
690 visits(visits),
690 used_url_hash(used_url_hash), 691 used_url_hash(used_url_hash),
691 unused_url_hash(unused_url_hash) {} 692 unused_url_hash(unused_url_hash) {}
692 693
693 ManifestHostInfo::~ManifestHostInfo() {} 694 ManifestHostInfo::~ManifestHostInfo() {}
694 695
695 ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; 696 ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default;
696 697
697 ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; 698 ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default;
698 699
699 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { 700 ResourceInfo::ResourceInfo(const GURL& url,
701 const std::string& referrer,
702 double weight)
703 : url(url), referrer(referrer), weight(weight) {}
704
705 ResourceInfo::~ResourceInfo() {}
706
707 ResourceInfo::ResourceInfo(ResourceInfo&&) = default;
708
709 ResourceInfo& ResourceInfo::operator=(ResourceInfo&&) = default;
710
711 void PrecacheFetcher::OnManifestFetchComplete(int64_t host_visits,
712 const Fetcher& source) {
713 VLOG(9) << "OnManifestFetchComplete " << source.referrer();
700 DCHECK(unfinished_work_->has_config_settings()); 714 DCHECK(unfinished_work_->has_config_settings());
701 UpdateStats(source.response_bytes(), source.network_response_bytes()); 715 UpdateStats(source.response_bytes(), source.network_response_bytes());
702 if (source.network_url_fetcher() == nullptr) { 716 if (source.network_url_fetcher() == nullptr) {
703 pool_.DeleteAll(); // Cancel any other ongoing request. 717 pool_.DeleteAll(); // Cancel any other ongoing request.
704 } else { 718 } else {
705 PrecacheManifest manifest; 719 PrecacheManifest manifest;
706 720
707 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { 721 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) {
708 const int32_t len = 722 int32_t len = manifest.resource_size();
709 std::min(manifest.resource_size(), 723 if (!global_ranking_) {
710 unfinished_work_->config_settings().top_resources_count()); 724 len = std::min(
725 len, unfinished_work_->config_settings().top_resources_count());
726 }
711 const uint64_t resource_bitset = 727 const uint64_t resource_bitset =
712 GetResourceBitset(manifest, experiment_id_); 728 GetResourceBitset(manifest, experiment_id_);
713 for (int i = 0; i < len; ++i) { 729 for (int i = 0; i < len; ++i) {
714 if (((0x1ULL << i) & resource_bitset) && 730 if (((0x1ULL << i) & resource_bitset) &&
715 manifest.resource(i).has_url()) { 731 manifest.resource(i).has_url()) {
716 GURL url(manifest.resource(i).url()); 732 GURL url(manifest.resource(i).url());
717 if (url.is_valid()) { 733 if (url.is_valid()) {
718 resources_to_fetch_.emplace_back(url, source.referrer()); 734 VLOG(9) << "Adding resource " << url.spec();
735 double weight = manifest.resource(i).weight_ratio() * host_visits;
736 if (weight >= unfinished_work_->config_settings().min_weight())
737 resources_to_rank_.emplace_back(url, source.referrer(), weight);
719 } 738 }
720 } 739 }
721 } 740 }
722 db_task_runner_->PostTask( 741 db_task_runner_->PostTask(
723 FROM_HERE, base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost, 742 FROM_HERE, base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost,
724 precache_database_, source.referrer(), 743 precache_database_, source.referrer(),
725 manifest.id().id(), base::Time::Now())); 744 manifest.id().id(), base::Time::Now()));
726 } 745 }
727 } 746 }
728 747
748 top_hosts_fetching_.remove_if([&source](const ManifestHostInfo& top_host) {
749 return top_host.manifest_url == source.url();
750 });
751
729 pool_.Delete(source); 752 pool_.Delete(source);
753
754 if (top_hosts_to_fetch_.empty() && top_hosts_fetching_.empty()) {
755 VLOG(9) << "Ranking resources.";
756 // Done fetching manifests. Now sort resources_to_rank_ into
bengr 2016/10/18 18:57:20 Not a requirement, but this seems like it's gettin
twifkak 2016/10/18 19:52:27 Okay, I extracted the inside of this if statement
757 // resources_to_fetch_, by descending weight. When StartNextFetch runs, it
758 // will begin fetching resources.
759 resources_to_fetch_ = std::move(resources_to_rank_);
760 if (global_ranking_) {
761 std::stable_sort(
762 resources_to_fetch_.begin(), resources_to_fetch_.end(),
763 [](const ResourceInfo& first, const ResourceInfo& second) {
764 return first.weight > second.weight;
765 });
766 }
767 // Truncate to size |total_resources_count|.
768 const size_t num_resources = std::min(
769 resources_to_fetch_.size(),
770 static_cast<size_t>(
771 unfinished_work_->config_settings().total_resources_count()));
772 resources_to_fetch_.erase(resources_to_fetch_.begin() + num_resources,
773 resources_to_fetch_.end());
774 // Save denominator for PercentCompleted UMA.
775 unfinished_work_->set_num_resource_urls(resources_to_fetch_.size());
776 }
777
730 StartNextFetch(); 778 StartNextFetch();
731 } 779 }
732 780
733 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { 781 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) {
734 UpdateStats(source.response_bytes(), source.network_response_bytes()); 782 UpdateStats(source.response_bytes(), source.network_response_bytes());
735 783
736 db_task_runner_->PostTask( 784 db_task_runner_->PostTask(
737 FROM_HERE, 785 FROM_HERE,
738 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_, 786 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_,
739 source.url(), source.referrer(), base::Time::Now(), 787 source.url(), source.referrer(), base::Time::Now(),
740 source.was_cached(), source.response_bytes())); 788 source.was_cached(), source.response_bytes()));
741 789
790 resources_fetching_.remove_if([&source](const ResourceInfo& resource) {
791 return resource.url == source.url();
792 });
793
742 pool_.Delete(source); 794 pool_.Delete(source);
743 795
744 // The resource has already been put in the cache during the fetch process, so 796 // The resource has already been put in the cache during the fetch process, so
745 // nothing more needs to be done for the resource. 797 // nothing more needs to be done for the resource.
746 StartNextFetch(); 798 StartNextFetch();
747 } 799 }
748 800
749 void PrecacheFetcher::UpdateStats(int64_t response_bytes, 801 void PrecacheFetcher::UpdateStats(int64_t response_bytes,
750 int64_t network_response_bytes) { 802 int64_t network_response_bytes) {
751 DCHECK_LE(0, response_bytes); 803 DCHECK_LE(0, response_bytes);
(...skipping 12 matching lines...) Expand all
764 remaining = 0; 816 remaining = 0;
765 quota_.set_remaining( 817 quota_.set_remaining(
766 used_bytes > quota_.remaining() ? 0U : quota_.remaining() - used_bytes); 818 used_bytes > quota_.remaining() ? 0U : quota_.remaining() - used_bytes);
767 db_task_runner_->PostTask( 819 db_task_runner_->PostTask(
768 FROM_HERE, 820 FROM_HERE,
769 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_)); 821 base::Bind(&PrecacheDatabase::SaveQuota, precache_database_, quota_));
770 } 822 }
771 } 823 }
772 824
773 } // namespace precache 825 } // namespace precache
OLDNEW
« no previous file with comments | « components/precache/core/precache_fetcher.h ('k') | components/precache/core/precache_fetcher_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698