OLD | NEW |
---|---|
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/precache/core/precache_fetcher.h" | 5 #include "components/precache/core/precache_fetcher.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <limits> | 8 #include <limits> |
9 #include <string> | |
10 #include <utility> | 9 #include <utility> |
11 #include <vector> | 10 #include <vector> |
12 | 11 |
12 #include "base/base64.h" | |
13 #include "base/bind.h" | 13 #include "base/bind.h" |
14 #include "base/bind_helpers.h" | 14 #include "base/bind_helpers.h" |
15 #include "base/callback.h" | 15 #include "base/callback.h" |
16 #include "base/command_line.h" | 16 #include "base/command_line.h" |
17 #include "base/compiler_specific.h" | 17 #include "base/compiler_specific.h" |
18 #include "base/containers/hash_tables.h" | 18 #include "base/containers/hash_tables.h" |
19 #include "base/location.h" | 19 #include "base/location.h" |
20 #include "base/logging.h" | 20 #include "base/logging.h" |
21 #include "base/memory/ptr_util.h" | 21 #include "base/memory/ptr_util.h" |
22 #include "base/memory/ref_counted.h" | 22 #include "base/memory/ref_counted.h" |
23 #include "base/metrics/histogram_macros.h" | 23 #include "base/metrics/histogram_macros.h" |
24 #include "base/sha1.h" | |
25 #include "base/strings/string_piece.h" | |
26 #include "base/task_runner_util.h" | |
27 #include "components/precache/core/precache_database.h" | |
24 #include "components/precache/core/precache_switches.h" | 28 #include "components/precache/core/precache_switches.h" |
25 #include "components/precache/core/proto/precache.pb.h" | 29 #include "components/precache/core/proto/precache.pb.h" |
26 #include "components/precache/core/proto/unfinished_work.pb.h" | 30 #include "components/precache/core/proto/unfinished_work.pb.h" |
27 #include "net/base/completion_callback.h" | 31 #include "net/base/completion_callback.h" |
28 #include "net/base/escape.h" | 32 #include "net/base/escape.h" |
29 #include "net/base/io_buffer.h" | 33 #include "net/base/io_buffer.h" |
30 #include "net/base/load_flags.h" | 34 #include "net/base/load_flags.h" |
31 #include "net/base/net_errors.h" | 35 #include "net/base/net_errors.h" |
36 #include "net/base/url_util.h" | |
32 #include "net/http/http_response_headers.h" | 37 #include "net/http/http_response_headers.h" |
33 #include "net/url_request/url_fetcher_response_writer.h" | 38 #include "net/url_request/url_fetcher_response_writer.h" |
34 #include "net/url_request/url_request_context_getter.h" | 39 #include "net/url_request/url_request_context_getter.h" |
35 #include "net/url_request/url_request_status.h" | 40 #include "net/url_request/url_request_status.h" |
36 | 41 |
37 namespace precache { | 42 namespace precache { |
38 | 43 |
39 // The following flags are for privacy reasons. For example, if a user clears | 44 // The following flags are for privacy reasons. For example, if a user clears |
40 // their cookies, but a tracking beacon is prefetched and the beacon specifies | 45 // their cookies, but a tracking beacon is prefetched and the beacon specifies |
41 // its source URL in a URL param, the beacon site would be able to rebuild a | 46 // its source URL in a URL param, the beacon site would be able to rebuild a |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
83 | 88 |
84 #if defined(PRECACHE_MANIFEST_URL_PREFIX) | 89 #if defined(PRECACHE_MANIFEST_URL_PREFIX) |
85 return PRECACHE_MANIFEST_URL_PREFIX; | 90 return PRECACHE_MANIFEST_URL_PREFIX; |
86 #else | 91 #else |
87 // The precache manifest URL prefix could not be determined, so return an | 92 // The precache manifest URL prefix could not be determined, so return an |
88 // empty string. | 93 // empty string. |
89 return std::string(); | 94 return std::string(); |
90 #endif | 95 #endif |
91 } | 96 } |
92 | 97 |
93 // Construct the URL of the precache manifest for the given name (either host or | |
94 // URL). The server is expecting a request for a URL consisting of the manifest | |
95 // URL prefix followed by the doubly escaped name. | |
96 std::string ConstructManifestURL(const std::string& prefix, | |
97 const std::string& name) { | |
98 return prefix + net::EscapeQueryParamValue( | |
99 net::EscapeQueryParamValue(name, false), false); | |
100 } | |
101 | |
102 // Attempts to parse a protobuf message from the response string of a | 98 // Attempts to parse a protobuf message from the response string of a |
103 // URLFetcher. If parsing is successful, the message parameter will contain the | 99 // URLFetcher. If parsing is successful, the message parameter will contain the |
104 // parsed protobuf and this function will return true. Otherwise, returns false. | 100 // parsed protobuf and this function will return true. Otherwise, returns false. |
105 bool ParseProtoFromFetchResponse(const net::URLFetcher& source, | 101 bool ParseProtoFromFetchResponse(const net::URLFetcher& source, |
106 ::google::protobuf::MessageLite* message) { | 102 ::google::protobuf::MessageLite* message) { |
107 std::string response_string; | 103 std::string response_string; |
108 | 104 |
109 if (!source.GetStatus().is_success()) { | 105 if (!source.GetStatus().is_success()) { |
110 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec(); | 106 DLOG(WARNING) << "Fetch failed: " << source.GetOriginalURL().spec(); |
111 return false; | 107 return false; |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
152 int num_bytes, | 148 int num_bytes, |
153 const net::CompletionCallback& callback) override { | 149 const net::CompletionCallback& callback) override { |
154 return num_bytes; | 150 return num_bytes; |
155 } | 151 } |
156 | 152 |
157 int Finish(const net::CompletionCallback& callback) override { | 153 int Finish(const net::CompletionCallback& callback) override { |
158 return net::OK; | 154 return net::OK; |
159 } | 155 } |
160 }; | 156 }; |
161 | 157 |
162 void AppendManifestURLIfValidAndNew( | 158 // Returns the base64 encoded resource URL hashes. The resource URLs are hashed |
163 const std::string& prefix, | 159 // individually, and 8 bytes of each hash is appended together, which is then |
164 const std::string& name, | 160 // encoded to base64. |
165 base::hash_set<std::string>* seen_manifest_urls, | 161 std::string GetResourceURLBase64Hash(const std::vector<GURL>& urls) { |
166 std::list<GURL>* unique_manifest_urls) { | 162 // Each resource hash uses 8 bytes. |
167 const std::string manifest_url = ConstructManifestURL(prefix, name); | 163 const int hash_bytes_size = 8; |
sclittle
2016/08/15 20:13:09
nit: Change this to be a size_t instead of an int,
Raj
2016/08/16 17:54:37
Done.
| |
168 bool first_seen = seen_manifest_urls->insert(manifest_url).second; | 164 std::string hashes; |
169 if (first_seen) { | 165 hashes.reserve(urls.size() * hash_bytes_size); |
170 GURL url(manifest_url); | 166 |
171 if (url.is_valid()) | 167 for (const auto& url : urls) { |
172 unique_manifest_urls->push_back(url); | 168 const std::string& url_spec = url.spec(); |
169 unsigned char sha1_hash[base::kSHA1Length]; | |
170 base::SHA1HashBytes( | |
171 reinterpret_cast<const unsigned char*>(url_spec.c_str()), | |
172 url_spec.size(), sha1_hash); | |
173 hashes.append(reinterpret_cast<const char*>(sha1_hash), hash_bytes_size); | |
173 } | 174 } |
175 base::Base64Encode(hashes, &hashes); | |
176 return hashes; | |
174 } | 177 } |
175 | 178 |
176 } // namespace | 179 } // namespace |
177 | 180 |
178 PrecacheFetcher::Fetcher::Fetcher( | 181 PrecacheFetcher::Fetcher::Fetcher( |
179 net::URLRequestContextGetter* request_context, | 182 net::URLRequestContextGetter* request_context, |
180 const GURL& url, | 183 const GURL& url, |
184 const std::string& referrer, | |
181 const base::Callback<void(const Fetcher&)>& callback, | 185 const base::Callback<void(const Fetcher&)>& callback, |
182 bool is_resource_request, | 186 bool is_resource_request, |
183 size_t max_bytes) | 187 size_t max_bytes) |
184 : request_context_(request_context), | 188 : request_context_(request_context), |
185 url_(url), | 189 url_(url), |
190 referrer_(referrer), | |
186 callback_(callback), | 191 callback_(callback), |
187 is_resource_request_(is_resource_request), | 192 is_resource_request_(is_resource_request), |
188 max_bytes_(max_bytes), | 193 max_bytes_(max_bytes), |
189 response_bytes_(0), | 194 response_bytes_(0), |
190 network_response_bytes_(0) { | 195 network_response_bytes_(0), |
196 was_cached_(false) { | |
197 DCHECK(url.is_valid()); | |
191 if (is_resource_request_) | 198 if (is_resource_request_) |
192 LoadFromCache(); | 199 LoadFromCache(); |
193 else | 200 else |
194 LoadFromNetwork(); | 201 LoadFromNetwork(); |
195 } | 202 } |
196 | 203 |
197 PrecacheFetcher::Fetcher::~Fetcher() {} | 204 PrecacheFetcher::Fetcher::~Fetcher() {} |
198 | 205 |
199 void PrecacheFetcher::Fetcher::LoadFromCache() { | 206 void PrecacheFetcher::Fetcher::LoadFromCache() { |
200 fetch_stage_ = FetchStage::CACHE; | 207 fetch_stage_ = FetchStage::CACHE; |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
233 const net::URLFetcher* source, | 240 const net::URLFetcher* source, |
234 int64_t current, | 241 int64_t current, |
235 int64_t total) { | 242 int64_t total) { |
236 // If going over the per-resource download cap. | 243 // If going over the per-resource download cap. |
237 if (fetch_stage_ == FetchStage::NETWORK && | 244 if (fetch_stage_ == FetchStage::NETWORK && |
238 // |current| is guaranteed to be non-negative, so this cast is safe. | 245 // |current| is guaranteed to be non-negative, so this cast is safe. |
239 static_cast<size_t>(std::max(current, total)) > max_bytes_) { | 246 static_cast<size_t>(std::max(current, total)) > max_bytes_) { |
240 VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total | 247 VLOG(1) << "Cancelling " << url_ << ": (" << current << "/" << total |
241 << ") is over " << max_bytes_; | 248 << ") is over " << max_bytes_; |
242 | 249 |
243 // Cancel the download. | |
244 network_url_fetcher_.reset(); | |
245 | |
246 // Call the completion callback, to attempt the next download, or to trigger | 250 // Call the completion callback, to attempt the next download, or to trigger |
247 // cleanup in precache_delegate_->OnDone(). | 251 // cleanup in precache_delegate_->OnDone(). |
248 response_bytes_ = network_response_bytes_ = current; | 252 response_bytes_ = network_response_bytes_ = current; |
253 was_cached_ = source->WasCached(); | |
249 | 254 |
255 // Cancel the download. | |
256 network_url_fetcher_.reset(); | |
250 callback_.Run(*this); | 257 callback_.Run(*this); |
251 } | 258 } |
252 } | 259 } |
253 | 260 |
254 void PrecacheFetcher::Fetcher::OnURLFetchComplete( | 261 void PrecacheFetcher::Fetcher::OnURLFetchComplete( |
255 const net::URLFetcher* source) { | 262 const net::URLFetcher* source) { |
256 CHECK(source); | 263 CHECK(source); |
257 if (fetch_stage_ == FetchStage::CACHE && | 264 if (fetch_stage_ == FetchStage::CACHE && |
258 (source->GetStatus().error() == net::ERR_CACHE_MISS || | 265 (source->GetStatus().error() == net::ERR_CACHE_MISS || |
259 (source->GetResponseHeaders() && | 266 (source->GetResponseHeaders() && |
260 source->GetResponseHeaders()->HasValidators()))) { | 267 source->GetResponseHeaders()->HasValidators()))) { |
261 // If the resource was not found in the cache, request it from the | 268 // If the resource was not found in the cache, request it from the |
262 // network. | 269 // network. |
263 // | 270 // |
264 // If the resource was found in the cache, but contains validators, | 271 // If the resource was found in the cache, but contains validators, |
265 // request a refresh. The presence of validators increases the chance that | 272 // request a refresh. The presence of validators increases the chance that |
266 // we get a 304 response rather than a full one, thus allowing us to | 273 // we get a 304 response rather than a full one, thus allowing us to |
267 // refresh the cache with minimal network load. | 274 // refresh the cache with minimal network load. |
268 LoadFromNetwork(); | 275 LoadFromNetwork(); |
269 return; | 276 return; |
270 } | 277 } |
271 | 278 |
272 // If any of: | 279 // If any of: |
273 // - The request was for a config or manifest. | 280 // - The request was for a config or manifest. |
274 // - The resource was a cache hit without validators. | 281 // - The resource was a cache hit without validators. |
275 // - The response came from the network. | 282 // - The response came from the network. |
276 // Then Fetcher is done with this URL and can return control to the caller. | 283 // Then Fetcher is done with this URL and can return control to the caller. |
277 response_bytes_ = source->GetReceivedResponseContentLength(); | 284 response_bytes_ = source->GetReceivedResponseContentLength(); |
278 network_response_bytes_ = source->GetTotalReceivedBytes(); | 285 network_response_bytes_ = source->GetTotalReceivedBytes(); |
286 was_cached_ = source->WasCached(); | |
279 callback_.Run(*this); | 287 callback_.Run(*this); |
280 } | 288 } |
281 | 289 |
282 // static | 290 // static |
283 void PrecacheFetcher::RecordCompletionStatistics( | 291 void PrecacheFetcher::RecordCompletionStatistics( |
284 const PrecacheUnfinishedWork& unfinished_work, | 292 const PrecacheUnfinishedWork& unfinished_work, |
285 size_t remaining_manifest_urls_to_fetch, | 293 size_t remaining_manifest_urls_to_fetch, |
286 size_t remaining_resource_urls_to_fetch) { | 294 size_t remaining_resource_urls_to_fetch) { |
287 // These may be unset in tests. | 295 // These may be unset in tests. |
288 if (!unfinished_work.has_start_time()) | 296 if (!unfinished_work.has_start_time()) |
(...skipping 30 matching lines...) Expand all Loading... | |
319 1, kMaxResponseBytes, | 327 1, kMaxResponseBytes, |
320 100); | 328 100); |
321 } | 329 } |
322 | 330 |
323 PrecacheFetcher::PrecacheFetcher( | 331 PrecacheFetcher::PrecacheFetcher( |
324 net::URLRequestContextGetter* request_context, | 332 net::URLRequestContextGetter* request_context, |
325 const GURL& config_url, | 333 const GURL& config_url, |
326 const std::string& manifest_url_prefix, | 334 const std::string& manifest_url_prefix, |
327 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, | 335 std::unique_ptr<PrecacheUnfinishedWork> unfinished_work, |
328 uint32_t experiment_id, | 336 uint32_t experiment_id, |
337 const base::WeakPtr<PrecacheDatabase>& precache_database, | |
338 const scoped_refptr<base::SingleThreadTaskRunner>& db_task_runner, | |
329 PrecacheFetcher::PrecacheDelegate* precache_delegate) | 339 PrecacheFetcher::PrecacheDelegate* precache_delegate) |
330 : request_context_(request_context), | 340 : request_context_(request_context), |
331 config_url_(config_url), | 341 config_url_(config_url), |
332 manifest_url_prefix_(manifest_url_prefix), | 342 manifest_url_prefix_(manifest_url_prefix), |
343 precache_database_(precache_database), | |
344 db_task_runner_(std::move(db_task_runner)), | |
333 precache_delegate_(precache_delegate), | 345 precache_delegate_(precache_delegate), |
334 pool_(kMaxParallelFetches), | 346 pool_(kMaxParallelFetches), |
335 experiment_id_(experiment_id) { | 347 experiment_id_(experiment_id) { |
336 DCHECK(request_context_.get()); // Request context must be non-NULL. | 348 DCHECK(request_context_.get()); // Request context must be non-NULL. |
337 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. | 349 DCHECK(precache_delegate_); // Precache delegate must be non-NULL. |
338 | 350 |
339 DCHECK_NE(GURL(), GetDefaultConfigURL()) | 351 DCHECK_NE(GURL(), GetDefaultConfigURL()) |
340 << "Could not determine the precache config settings URL."; | 352 << "Could not determine the precache config settings URL."; |
341 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) | 353 DCHECK_NE(std::string(), GetDefaultManifestURLPrefix()) |
342 << "Could not determine the default precache manifest URL prefix."; | 354 << "Could not determine the default precache manifest URL prefix."; |
343 DCHECK(unfinished_work); | 355 DCHECK(unfinished_work); |
344 | 356 |
345 // Copy manifests and resources to member variables as a convenience. | 357 // Copy resources to member variable as a convenience. |
346 // TODO(bengr): Consider accessing these directly from the proto. | 358 // TODO(rajendrant): Consider accessing these directly from the proto, by |
347 for (const auto& manifest : unfinished_work->manifest()) { | 359 // keeping track of the current resource index. |
348 if (manifest.has_url()) | |
349 manifest_urls_to_fetch_.push_back(GURL(manifest.url())); | |
350 } | |
351 for (const auto& resource : unfinished_work->resource()) { | 360 for (const auto& resource : unfinished_work->resource()) { |
352 if (resource.has_url()) | 361 if (resource.has_url() && resource.has_top_host_name()) { |
353 resource_urls_to_fetch_.push_back(GURL(resource.url())); | 362 resources_to_fetch_.emplace_back(GURL(resource.url()), |
363 resource.top_host_name()); | |
364 } | |
354 } | 365 } |
355 unfinished_work_ = std::move(unfinished_work); | 366 unfinished_work_ = std::move(unfinished_work); |
356 } | 367 } |
357 | 368 |
358 PrecacheFetcher::~PrecacheFetcher() { | 369 PrecacheFetcher::~PrecacheFetcher() { |
359 } | 370 } |
360 | 371 |
361 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { | 372 std::unique_ptr<PrecacheUnfinishedWork> PrecacheFetcher::CancelPrecaching() { |
362 // This could get called multiple times, and it should be handled gracefully. | 373 // This could get called multiple times, and it should be handled gracefully. |
363 if (!unfinished_work_) | 374 if (!unfinished_work_) |
364 return nullptr; | 375 return nullptr; |
365 | 376 |
366 unfinished_work_->clear_manifest(); | |
367 unfinished_work_->clear_resource(); | 377 unfinished_work_->clear_resource(); |
368 for (const auto& manifest : manifest_urls_to_fetch_) | 378 if (top_hosts_to_fetch_) { |
369 unfinished_work_->add_manifest()->set_url(manifest.spec()); | 379 unfinished_work_->clear_top_host(); |
370 for (const auto& resource : resource_urls_to_fetch_) | 380 for (const auto& top_host : *top_hosts_to_fetch_) { |
371 unfinished_work_->add_resource()->set_url(resource.spec()); | 381 unfinished_work_->add_top_host()->set_hostname(top_host.hostname); |
382 } | |
383 } | |
384 for (const auto& resource : resources_to_fetch_) { | |
385 auto new_resource = unfinished_work_->add_resource(); | |
386 new_resource->set_url(resource.first.spec()); | |
387 new_resource->set_top_host_name(resource.second); | |
388 } | |
372 for (const auto& it : pool_.elements()) { | 389 for (const auto& it : pool_.elements()) { |
373 const Fetcher* fetcher = it.first; | 390 const Fetcher* fetcher = it.first; |
374 if (fetcher->is_resource_request()) | 391 GURL config_url = |
375 unfinished_work_->add_resource()->set_url(fetcher->url().spec()); | 392 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
376 else if (fetcher->url() != config_url_) | 393 if (fetcher->is_resource_request()) { |
377 unfinished_work_->add_manifest()->set_url(fetcher->url().spec()); | 394 auto resource = unfinished_work_->add_resource(); |
395 resource->set_url(fetcher->url().spec()); | |
396 resource->set_top_host_name(fetcher->referrer()); | |
397 } else if (fetcher->url() != config_url) { | |
398 unfinished_work_->add_top_host()->set_hostname(fetcher->referrer()); | |
399 } | |
378 } | 400 } |
379 manifest_urls_to_fetch_.clear(); | 401 top_hosts_to_fetch_.reset(); |
380 resource_urls_to_fetch_.clear(); | 402 resources_to_fetch_.clear(); |
381 pool_.DeleteAll(); | 403 pool_.DeleteAll(); |
382 return std::move(unfinished_work_); | 404 return std::move(unfinished_work_); |
383 } | 405 } |
384 | 406 |
385 void PrecacheFetcher::Start() { | 407 void PrecacheFetcher::Start() { |
386 if (unfinished_work_->has_config_settings()) { | 408 if (unfinished_work_->has_config_settings()) { |
387 DCHECK(unfinished_work_->has_start_time()); | 409 DCHECK(unfinished_work_->has_start_time()); |
388 DetermineManifests(); | 410 DetermineManifests(); |
389 return; | 411 return; |
390 } | 412 } |
391 | 413 |
392 GURL config_url = | 414 GURL config_url = |
393 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; | 415 config_url_.is_empty() ? GetDefaultConfigURL() : config_url_; |
394 | 416 |
395 DCHECK(config_url.is_valid()) << "Config URL not valid: " | 417 DCHECK(config_url.is_valid()) << "Config URL not valid: " |
396 << config_url.possibly_invalid_spec(); | 418 << config_url.possibly_invalid_spec(); |
397 | 419 |
398 // Fetch the precache configuration settings from the server. | 420 // Fetch the precache configuration settings from the server. |
399 DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; | 421 DCHECK(pool_.IsEmpty()) << "All parallel requests should be available"; |
400 VLOG(3) << "Fetching " << config_url; | 422 VLOG(3) << "Fetching " << config_url; |
401 pool_.Add(base::WrapUnique(new Fetcher( | 423 pool_.Add(base::WrapUnique(new Fetcher( |
402 request_context_.get(), config_url, | 424 request_context_.get(), config_url, std::string(), |
403 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, | 425 base::Bind(&PrecacheFetcher::OnConfigFetchComplete, AsWeakPtr()), |
404 base::Unretained(this)), | |
405 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); | 426 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); |
406 } | 427 } |
407 | 428 |
408 void PrecacheFetcher::StartNextResourceFetch() { | 429 void PrecacheFetcher::StartNextResourceFetch() { |
409 DCHECK(unfinished_work_->has_config_settings()); | 430 DCHECK(unfinished_work_->has_config_settings()); |
410 while (!resource_urls_to_fetch_.empty() && pool_.IsAvailable()) { | 431 while (!resources_to_fetch_.empty() && pool_.IsAvailable()) { |
432 const auto& resource = resources_to_fetch_.front(); | |
411 const size_t max_bytes = | 433 const size_t max_bytes = |
412 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), | 434 std::min(unfinished_work_->config_settings().max_bytes_per_resource(), |
413 unfinished_work_->config_settings().max_bytes_total() - | 435 unfinished_work_->config_settings().max_bytes_total() - |
414 unfinished_work_->total_bytes()); | 436 unfinished_work_->total_bytes()); |
415 VLOG(3) << "Fetching " << resource_urls_to_fetch_.front(); | 437 VLOG(3) << "Fetching " << resource.first << " " << resource.second; |
416 pool_.Add(base::WrapUnique( | 438 pool_.Add(base::WrapUnique(new Fetcher( |
417 new Fetcher(request_context_.get(), resource_urls_to_fetch_.front(), | 439 request_context_.get(), resource.first, resource.second, |
418 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, | 440 base::Bind(&PrecacheFetcher::OnResourceFetchComplete, AsWeakPtr()), |
419 base::Unretained(this)), | 441 true /* is_resource_request */, max_bytes))); |
420 true /* is_resource_request */, max_bytes))); | |
421 | 442 |
422 resource_urls_to_fetch_.pop_front(); | 443 resources_to_fetch_.pop_front(); |
423 } | 444 } |
424 } | 445 } |
425 | 446 |
426 void PrecacheFetcher::StartNextManifestFetch() { | 447 void PrecacheFetcher::StartNextManifestFetch() { |
427 if (manifest_urls_to_fetch_.empty() || !pool_.IsAvailable()) | 448 if (!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty() || |
449 !pool_.IsAvailable()) | |
428 return; | 450 return; |
429 | 451 |
430 // We only fetch one manifest at a time to keep the size of | 452 // We only fetch one manifest at a time to keep the size of |
431 // resource_urls_to_fetch_ as small as possible. | 453 // resources_to_fetch_ as small as possible. |
432 VLOG(3) << "Fetching " << manifest_urls_to_fetch_.front(); | 454 VLOG(3) << "Fetching " << top_hosts_to_fetch_->front().manifest_url; |
433 pool_.Add(base::WrapUnique(new Fetcher( | 455 pool_.Add(base::WrapUnique(new Fetcher( |
434 request_context_.get(), manifest_urls_to_fetch_.front(), | 456 request_context_.get(), top_hosts_to_fetch_->front().manifest_url, |
435 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, | 457 top_hosts_to_fetch_->front().hostname, |
436 base::Unretained(this)), | 458 base::Bind(&PrecacheFetcher::OnManifestFetchComplete, AsWeakPtr()), |
437 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); | 459 false /* is_resource_request */, std::numeric_limits<int32_t>::max()))); |
438 | 460 top_hosts_to_fetch_->pop_front(); |
439 manifest_urls_to_fetch_.pop_front(); | |
440 } | 461 } |
441 | 462 |
442 void PrecacheFetcher::NotifyDone( | 463 void PrecacheFetcher::NotifyDone( |
443 size_t remaining_manifest_urls_to_fetch, | 464 size_t remaining_manifest_urls_to_fetch, |
444 size_t remaining_resource_urls_to_fetch) { | 465 size_t remaining_resource_urls_to_fetch) { |
445 RecordCompletionStatistics(*unfinished_work_, | 466 RecordCompletionStatistics(*unfinished_work_, |
446 remaining_manifest_urls_to_fetch, | 467 remaining_manifest_urls_to_fetch, |
447 remaining_resource_urls_to_fetch); | 468 remaining_resource_urls_to_fetch); |
448 precache_delegate_->OnDone(); | 469 precache_delegate_->OnDone(); |
449 } | 470 } |
450 | 471 |
451 void PrecacheFetcher::StartNextFetch() { | 472 void PrecacheFetcher::StartNextFetch() { |
452 DCHECK(unfinished_work_->has_config_settings()); | 473 DCHECK(unfinished_work_->has_config_settings()); |
453 // If over the precache total size cap, then stop prefetching. | 474 // If over the precache total size cap, then stop prefetching. |
454 if (unfinished_work_->total_bytes() > | 475 if (unfinished_work_->total_bytes() > |
455 unfinished_work_->config_settings().max_bytes_total()) { | 476 unfinished_work_->config_settings().max_bytes_total()) { |
456 size_t pending_manifests_in_pool = 0; | 477 size_t pending_manifests_in_pool = 0; |
457 size_t pending_resources_in_pool = 0; | 478 size_t pending_resources_in_pool = 0; |
458 for (const auto& element_pair : pool_.elements()) { | 479 for (const auto& element_pair : pool_.elements()) { |
459 const Fetcher* fetcher = element_pair.first; | 480 const Fetcher* fetcher = element_pair.first; |
460 if (fetcher->is_resource_request()) | 481 if (fetcher->is_resource_request()) |
461 pending_resources_in_pool++; | 482 pending_resources_in_pool++; |
462 else if (fetcher->url() != config_url_) | 483 else if (fetcher->url() != config_url_) |
463 pending_manifests_in_pool++; | 484 pending_manifests_in_pool++; |
464 } | 485 } |
465 pool_.DeleteAll(); | 486 pool_.DeleteAll(); |
466 NotifyDone(manifest_urls_to_fetch_.size() + pending_manifests_in_pool, | 487 int pending_top_hosts = |
467 resource_urls_to_fetch_.size() + pending_resources_in_pool); | 488 top_hosts_to_fetch_ ? top_hosts_to_fetch_->size() : 0; |
489 NotifyDone(pending_top_hosts + pending_manifests_in_pool, | |
490 resources_to_fetch_.size() + pending_resources_in_pool); | |
468 return; | 491 return; |
469 } | 492 } |
470 | 493 |
471 StartNextResourceFetch(); | 494 StartNextResourceFetch(); |
472 StartNextManifestFetch(); | 495 StartNextManifestFetch(); |
473 if (pool_.IsEmpty()) { | 496 if ((!top_hosts_to_fetch_ || top_hosts_to_fetch_->empty()) && |
497 resources_to_fetch_.empty() && pool_.IsEmpty()) { | |
474 // There are no more URLs to fetch, so end the precache cycle. | 498 // There are no more URLs to fetch, so end the precache cycle. |
475 NotifyDone(0, 0); | 499 NotifyDone(0, 0); |
476 // OnDone may have deleted this PrecacheFetcher, so don't do anything after | 500 // OnDone may have deleted this PrecacheFetcher, so don't do anything after |
477 // it is called. | 501 // it is called. |
478 } | 502 } |
479 } | 503 } |
480 | 504 |
481 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { | 505 void PrecacheFetcher::OnConfigFetchComplete(const Fetcher& source) { |
482 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 506 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
483 if (source.network_url_fetcher() == nullptr) { | 507 if (source.network_url_fetcher() == nullptr) { |
484 pool_.DeleteAll(); // Cancel any other ongoing request. | 508 pool_.DeleteAll(); // Cancel any other ongoing request. |
485 } else { | 509 } else { |
486 // Attempt to parse the config proto. On failure, continue on with the | 510 // Attempt to parse the config proto. On failure, continue on with the |
487 // default configuration. | 511 // default configuration. |
488 ParseProtoFromFetchResponse( | 512 ParseProtoFromFetchResponse( |
489 *source.network_url_fetcher(), | 513 *source.network_url_fetcher(), |
490 unfinished_work_->mutable_config_settings()); | 514 unfinished_work_->mutable_config_settings()); |
491 pool_.Delete(source); | 515 pool_.Delete(source); |
492 DetermineManifests(); | 516 DetermineManifests(); |
493 } | 517 } |
494 } | 518 } |
495 | 519 |
496 void PrecacheFetcher::DetermineManifests() { | 520 void PrecacheFetcher::DetermineManifests() { |
497 DCHECK(unfinished_work_->has_config_settings()); | 521 DCHECK(unfinished_work_->has_config_settings()); |
498 std::string prefix = manifest_url_prefix_.empty() | |
499 ? GetDefaultManifestURLPrefix() | |
500 : manifest_url_prefix_; | |
501 DCHECK_NE(std::string(), prefix) | |
502 << "Could not determine the precache manifest URL prefix."; | |
503 | 522 |
523 std::deque<std::string> top_hosts_to_fetch; | |
sclittle
2016/08/16 21:05:05
nit: Why is this a std::deque? Could this just be
Raj
2016/08/18 00:23:23
Done.
| |
524 std::unique_ptr<std::deque<ManifestHostInfo>> top_hosts_info( | |
sclittle
2016/08/15 20:13:09
nit: I think you can still remove this std::unique
Raj
2016/08/16 17:54:37
Tried this earlier. PostTaskAndReplyWithResult() d
sclittle
2016/08/16 21:05:05
Darn, good catch though.
| |
525 new std::deque<ManifestHostInfo>); | |
526 | |
527 // Attempt to fetch manifests for starting hosts up to the maximum top sites | |
528 // count. If a manifest does not exist for a particular starting host, then | |
529 // the fetch will fail, and that starting host will be ignored. Starting | |
530 // hosts are not added if this is a continuation from a previous precache | |
531 // session. | |
532 if (resources_to_fetch_.empty()) { | |
504 // Keep track of manifest URLs that are being fetched, in order to elide | 533 // Keep track of manifest URLs that are being fetched, in order to elide |
505 // duplicates. | 534 // duplicates. |
506 base::hash_set<std::string> seen_manifest_urls; | 535 std::set<base::StringPiece> seen_top_hosts; |
536 int64_t rank = 0; | |
537 for (const auto& host : unfinished_work_->top_host()) { | |
538 ++rank; | |
539 if (rank > unfinished_work_->config_settings().top_sites_count()) | |
540 break; | |
541 if (seen_top_hosts.insert(host.hostname()).second) | |
542 top_hosts_to_fetch.push_back(host.hostname()); | |
543 } | |
507 | 544 |
508 // Attempt to fetch manifests for starting hosts up to the maximum top sites | 545 for (const std::string& host : |
509 // count. If a manifest does not exist for a particular starting host, then | 546 unfinished_work_->config_settings().forced_site()) { |
510 // the fetch will fail, and that starting host will be ignored. Starting | 547 if (seen_top_hosts.insert(host).second) |
511 // hosts are not added if this is a continuation from a previous precache | 548 top_hosts_to_fetch.push_back(host); |
512 // session. | 549 } |
513 if (manifest_urls_to_fetch_.empty() && | 550 } |
514 resource_urls_to_fetch_.empty()) { | 551 // We only fetch one manifest at a time to keep the size of |
515 int64_t rank = 0; | 552 // resources_to_fetch_ as small as possible. |
516 for (const auto& host : unfinished_work_->top_host()) { | 553 auto retrieve_manifest_callback = |
517 ++rank; | 554 base::Bind(&PrecacheFetcher::RetrieveManifestInfo, AsWeakPtr(), |
518 if (rank > unfinished_work_->config_settings().top_sites_count()) | 555 std::move(top_hosts_to_fetch), top_hosts_info.get()); |
519 break; | 556 db_task_runner_->PostTaskAndReply( |
520 AppendManifestURLIfValidAndNew(prefix, host.hostname(), | 557 FROM_HERE, retrieve_manifest_callback, |
521 &seen_manifest_urls, | 558 base::Bind(&PrecacheFetcher::OnManifestInfoRetrieved, |
522 &manifest_urls_to_fetch_); | 559 base::Unretained(this), base::Passed(&top_hosts_info))); |
sclittle
2016/08/16 21:05:05
using base::Unretained(this) here doesn't seem saf
Raj
2016/08/18 00:23:23
Done.
| |
523 } | 560 } |
524 | 561 |
525 for (const std::string& host | 562 void PrecacheFetcher::RetrieveManifestInfo( |
sclittle
2016/08/16 21:05:05
Maybe move this method to the PrecacheDatabase ins
Raj
2016/08/18 00:23:23
Moved it to an anonymous function.
Moving to Prec
sclittle
2016/08/24 02:39:44
Ok, sounds good. Thanks for investigating this.
| |
526 : unfinished_work_->config_settings().forced_site()) { | 563 std::deque<std::string> hosts_to_fetch, |
527 AppendManifestURLIfValidAndNew(prefix, host, &seen_manifest_urls, | 564 std::deque<ManifestHostInfo>* hosts_info) { |
528 &manifest_urls_to_fetch_); | 565 for (const auto& host : hosts_to_fetch) { |
529 } | 566 auto referrer_host_info = precache_database_->GetReferrerHost(host); |
567 if (referrer_host_info.id != PrecacheReferrerHostEntry::kInvalidId) { | |
568 std::vector<GURL> used_urls, unused_urls; | |
569 precache_database_->GetURLListForReferrerHost(referrer_host_info.id, | |
570 &used_urls, &unused_urls); | |
571 hosts_info->emplace_back(referrer_host_info.id, host, | |
sclittle
2016/08/15 20:13:09
nit: use push_back instead of emplace_back, since
Raj
2016/08/16 17:54:37
I have changed it.
But emplace_back seems to be be
sclittle
2016/08/16 21:05:05
Yep, there's slightly less copy, but the extra cop
| |
572 GetResourceURLBase64Hash(used_urls), | |
573 GetResourceURLBase64Hash(unused_urls)); | |
574 } else { | |
575 hosts_info->emplace_back(PrecacheReferrerHostEntry::kInvalidId, host, | |
576 std::string(), std::string()); | |
530 } | 577 } |
531 unfinished_work_->set_num_manifest_urls(manifest_urls_to_fetch_.size()); | 578 } |
532 StartNextFetch(); | |
533 } | 579 } |
534 | 580 |
581 void PrecacheFetcher::OnManifestInfoRetrieved( | |
582 std::unique_ptr<std::deque<ManifestHostInfo>> manifests_info) { | |
583 DCHECK(manifests_info); | |
584 const std::string prefix = manifest_url_prefix_.empty() | |
585 ? GetDefaultManifestURLPrefix() | |
586 : manifest_url_prefix_; | |
587 top_hosts_to_fetch_.reset(new std::deque<ManifestHostInfo>()); | |
sclittle
2016/08/16 21:05:05
Could you just move the existing deque instead of
Raj
2016/08/18 00:23:23
Done.
| |
588 for (auto& manifest : *manifests_info) { | |
589 GURL manifest_url( | |
590 prefix + | |
591 net::EscapeQueryParamValue( | |
592 net::EscapeQueryParamValue(manifest.hostname, false), false)); | |
593 if (manifest_url.is_valid() && | |
sclittle
2016/08/16 21:05:05
It seems like the manifest_url could only be inval
Raj
2016/08/18 00:23:23
Done.
| |
594 manifest.manifest_id != PrecacheReferrerHostEntry::kInvalidId) { | |
595 manifest_url = net::AppendOrReplaceQueryParameter( | |
596 manifest_url, "manifest", std::to_string(manifest.manifest_id)); | |
597 manifest_url = net::AppendOrReplaceQueryParameter( | |
598 manifest_url, "used_resources", manifest.used_url_hash); | |
599 manifest_url = net::AppendOrReplaceQueryParameter( | |
600 manifest_url, "unused_resources", manifest.unused_url_hash); | |
601 DCHECK(manifest_url.is_valid()); | |
602 } | |
603 manifest.manifest_url = manifest_url; | |
604 if (manifest_url.is_valid()) | |
605 top_hosts_to_fetch_->push_back(std::move(manifest)); | |
606 } | |
607 unfinished_work_->set_num_manifest_urls(top_hosts_to_fetch_->size()); | |
608 StartNextFetch(); | |
609 } | |
610 | |
611 ManifestHostInfo::ManifestHostInfo(int64_t manifest_id, | |
612 const std::string& hostname, | |
613 const std::string& used_url_hash, | |
614 const std::string& unused_url_hash) | |
615 : manifest_id(manifest_id), | |
616 hostname(hostname), | |
617 used_url_hash(used_url_hash), | |
618 unused_url_hash(unused_url_hash) {} | |
619 | |
620 ManifestHostInfo::~ManifestHostInfo() {} | |
621 | |
622 ManifestHostInfo::ManifestHostInfo(ManifestHostInfo&&) = default; | |
623 | |
624 ManifestHostInfo& ManifestHostInfo::operator=(ManifestHostInfo&&) = default; | |
625 | |
535 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { | 626 void PrecacheFetcher::OnManifestFetchComplete(const Fetcher& source) { |
536 DCHECK(unfinished_work_->has_config_settings()); | 627 DCHECK(unfinished_work_->has_config_settings()); |
537 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 628 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
538 if (source.network_url_fetcher() == nullptr) { | 629 if (source.network_url_fetcher() == nullptr) { |
539 pool_.DeleteAll(); // Cancel any other ongoing request. | 630 pool_.DeleteAll(); // Cancel any other ongoing request. |
540 } else { | 631 } else { |
541 PrecacheManifest manifest; | 632 PrecacheManifest manifest; |
542 | 633 |
543 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { | 634 if (ParseProtoFromFetchResponse(*source.network_url_fetcher(), &manifest)) { |
544 const int32_t len = | 635 const int32_t len = |
545 std::min(manifest.resource_size(), | 636 std::min(manifest.resource_size(), |
546 unfinished_work_->config_settings().top_resources_count()); | 637 unfinished_work_->config_settings().top_resources_count()); |
547 const uint64_t resource_bitset = | 638 const uint64_t resource_bitset = |
548 GetResourceBitset(manifest, experiment_id_); | 639 GetResourceBitset(manifest, experiment_id_); |
549 for (int i = 0; i < len; ++i) { | 640 for (int i = 0; i < len; ++i) { |
550 if (((0x1ULL << i) & resource_bitset) && | 641 if (((0x1ULL << i) & resource_bitset) && |
551 manifest.resource(i).has_url()) { | 642 manifest.resource(i).has_url()) { |
552 GURL url(manifest.resource(i).url()); | 643 GURL url(manifest.resource(i).url()); |
553 if (url.is_valid()) | 644 if (url.is_valid()) { |
554 resource_urls_to_fetch_.push_back(url); | 645 resources_to_fetch_.emplace_back(url, source.referrer()); |
646 } | |
555 } | 647 } |
556 } | 648 } |
649 db_task_runner_->PostTask( | |
650 FROM_HERE, | |
651 base::Bind(&PrecacheDatabase::UpdatePrecacheReferrerHost, | |
652 precache_database_, source.referrer(), | |
653 manifest.id().timestamp().seconds(), base::Time::Now())); | |
557 } | 654 } |
558 } | 655 } |
559 | 656 |
560 pool_.Delete(source); | 657 pool_.Delete(source); |
561 StartNextFetch(); | 658 StartNextFetch(); |
562 } | 659 } |
563 | 660 |
564 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { | 661 void PrecacheFetcher::OnResourceFetchComplete(const Fetcher& source) { |
565 UpdateStats(source.response_bytes(), source.network_response_bytes()); | 662 UpdateStats(source.response_bytes(), source.network_response_bytes()); |
663 | |
664 db_task_runner_->PostTask( | |
665 FROM_HERE, | |
666 base::Bind(&PrecacheDatabase::RecordURLPrefetch, precache_database_, | |
667 source.url(), source.referrer(), base::Time::Now(), | |
668 source.was_cached(), source.response_bytes())); | |
669 | |
566 pool_.Delete(source); | 670 pool_.Delete(source); |
671 | |
567 // The resource has already been put in the cache during the fetch process, so | 672 // The resource has already been put in the cache during the fetch process, so |
568 // nothing more needs to be done for the resource. | 673 // nothing more needs to be done for the resource. |
569 StartNextFetch(); | 674 StartNextFetch(); |
570 } | 675 } |
571 | 676 |
572 void PrecacheFetcher::UpdateStats(int64_t response_bytes, | 677 void PrecacheFetcher::UpdateStats(int64_t response_bytes, |
573 int64_t network_response_bytes) { | 678 int64_t network_response_bytes) { |
574 unfinished_work_->set_total_bytes( | 679 unfinished_work_->set_total_bytes( |
575 unfinished_work_->total_bytes() + response_bytes); | 680 unfinished_work_->total_bytes() + response_bytes); |
576 unfinished_work_->set_network_bytes( | 681 unfinished_work_->set_network_bytes( |
577 unfinished_work_->network_bytes() + network_response_bytes); | 682 unfinished_work_->network_bytes() + network_response_bytes); |
578 } | 683 } |
579 | 684 |
580 } // namespace precache | 685 } // namespace precache |
OLD | NEW |