| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <map> |
| 6 #include <memory> |
| 5 #include <string> | 7 #include <string> |
| 8 #include <utility> |
| 6 | 9 |
| 7 #include "chrome/browser/predictors/loading_data_collector.h" | 10 #include "chrome/browser/predictors/loading_data_collector.h" |
| 11 #include "chrome/browser/predictors/loading_stats_collector.h" |
| 12 #include "chrome/browser/predictors/resource_prefetch_predictor.h" |
| 13 #include "chrome/browser/predictors/resource_prefetch_predictor_tables.h" |
| 8 #include "chrome/browser/profiles/profile.h" | 14 #include "chrome/browser/profiles/profile.h" |
| 15 #include "components/history/core/browser/history_service.h" |
| 9 #include "components/mime_util/mime_util.h" | 16 #include "components/mime_util/mime_util.h" |
| 17 #include "content/public/browser/browser_thread.h" |
| 10 #include "content/public/browser/resource_request_info.h" | 18 #include "content/public/browser/resource_request_info.h" |
| 11 #include "content/public/common/resource_type.h" | 19 #include "content/public/common/resource_type.h" |
| 20 #include "net/http/http_response_headers.h" |
| 12 #include "net/url_request/url_request.h" | 21 #include "net/url_request/url_request.h" |
| 13 | 22 |
| 23 using content::BrowserThread; |
| 24 |
| 14 namespace predictors { | 25 namespace predictors { |
| 15 | 26 |
| 16 namespace { | 27 namespace { |
| 17 | 28 |
| 18 bool g_allow_port_in_urls = false; | 29 bool g_allow_port_in_urls = false; |
| 19 | 30 |
| 31 // Sorted by decreasing likelihood according to HTTP archive. |
| 32 const char* kFontMimeTypes[] = {"font/woff2", |
| 33 "application/x-font-woff", |
| 34 "application/font-woff", |
| 35 "application/font-woff2", |
| 36 "font/x-woff", |
| 37 "application/x-font-ttf", |
| 38 "font/woff", |
| 39 "font/ttf", |
| 40 "application/x-font-otf", |
| 41 "x-font/woff", |
| 42 "application/font-sfnt", |
| 43 "application/font-ttf"}; |
| 44 |
| 45 bool IsNoStore(const net::URLRequest& response) { |
| 46 if (response.was_cached()) |
| 47 return false; |
| 48 |
| 49 const net::HttpResponseInfo& response_info = response.response_info(); |
| 50 if (!response_info.headers.get()) |
| 51 return false; |
| 52 return response_info.headers->HasHeaderValue("cache-control", "no-store"); |
| 53 } |
| 54 |
| 20 } // namespace | 55 } // namespace |
| 21 | 56 |
| 57 OriginRequestSummary::OriginRequestSummary() |
| 58 : origin(), |
| 59 always_access_network(false), |
| 60 accessed_network(false), |
| 61 first_occurrence(0) {} |
| 62 |
| 63 OriginRequestSummary::OriginRequestSummary(const OriginRequestSummary& other) = |
| 64 default; |
| 65 |
| 66 OriginRequestSummary::~OriginRequestSummary() {} |
| 67 |
| 68 URLRequestSummary::URLRequestSummary() |
| 69 : resource_type(content::RESOURCE_TYPE_LAST_TYPE), |
| 70 priority(net::IDLE), |
| 71 before_first_contentful_paint(false), |
| 72 was_cached(false), |
| 73 has_validators(false), |
| 74 always_revalidate(false), |
| 75 is_no_store(false), |
| 76 network_accessed(false) {} |
| 77 |
| 78 URLRequestSummary::URLRequestSummary(const URLRequestSummary& other) = default; |
| 79 |
| 80 URLRequestSummary::~URLRequestSummary() {} |
| 81 |
| 82 // static |
| 83 bool URLRequestSummary::SummarizeResponse(const net::URLRequest& request, |
| 84 URLRequestSummary* summary) { |
| 85 const content::ResourceRequestInfo* request_info = |
| 86 content::ResourceRequestInfo::ForRequest(&request); |
| 87 if (!request_info) |
| 88 return false; |
| 89 |
| 90 // This method is called when the response is started, so this field reflects |
| 91 // the time at which the response began, not when it finished, as would |
| 92 // arguably be ideal. This means if firstContentfulPaint happens after the |
| 93 // response has started, but before it's finished, we will erroneously mark |
| 94 // the resource as having been loaded before firstContentfulPaint. This is |
| 95 // a rare and insignificant enough occurrence that we opt to record the time |
| 96 // here for the sake of simplicity. |
| 97 summary->response_time = base::TimeTicks::Now(); |
| 98 summary->resource_url = request.original_url(); |
| 99 summary->request_url = request.url(); |
| 100 content::ResourceType resource_type_from_request = |
| 101 request_info->GetResourceType(); |
| 102 summary->priority = request.priority(); |
| 103 request.GetMimeType(&summary->mime_type); |
| 104 summary->was_cached = request.was_cached(); |
| 105 summary->resource_type = LoadingDataCollector::GetResourceType( |
| 106 resource_type_from_request, summary->mime_type); |
| 107 |
| 108 scoped_refptr<net::HttpResponseHeaders> headers = |
| 109 request.response_info().headers; |
| 110 if (headers.get()) { |
| 111 summary->has_validators = headers->HasValidators(); |
| 112 // RFC 2616, section 14.9. |
| 113 summary->always_revalidate = |
| 114 headers->HasHeaderValue("cache-control", "no-cache") || |
| 115 headers->HasHeaderValue("pragma", "no-cache") || |
| 116 headers->HasHeaderValue("vary", "*"); |
| 117 summary->is_no_store = IsNoStore(request); |
| 118 } |
| 119 summary->network_accessed = request.response_info().network_accessed; |
| 120 return true; |
| 121 } |
| 122 |
| 123 PageRequestSummary::PageRequestSummary(const GURL& i_main_frame_url) |
| 124 : main_frame_url(i_main_frame_url), |
| 125 initial_url(i_main_frame_url), |
| 126 first_contentful_paint(base::TimeTicks::Max()) {} |
| 127 |
| 128 PageRequestSummary::PageRequestSummary(const PageRequestSummary& other) = |
| 129 default; |
| 130 |
| 131 void PageRequestSummary::UpdateOrAddToOrigins( |
| 132 const URLRequestSummary& request_summary) { |
| 133 const GURL& request_url = request_summary.request_url; |
| 134 DCHECK(request_url.is_valid()); |
| 135 if (!request_url.is_valid()) |
| 136 return; |
| 137 |
| 138 GURL origin = request_url.GetOrigin(); |
| 139 auto it = origins.find(origin); |
| 140 if (it == origins.end()) { |
| 141 OriginRequestSummary summary; |
| 142 summary.origin = origin; |
| 143 summary.first_occurrence = origins.size(); |
| 144 it = origins.insert({origin, summary}).first; |
| 145 } |
| 146 |
| 147 it->second.always_access_network |= |
| 148 request_summary.always_revalidate || request_summary.is_no_store; |
| 149 it->second.accessed_network |= request_summary.network_accessed; |
| 150 } |
| 151 |
| 152 PageRequestSummary::~PageRequestSummary() {} |
| 153 |
| 154 content::ResourceType LoadingDataCollector::GetResourceTypeFromMimeType( |
| 155 const std::string& mime_type, |
| 156 content::ResourceType fallback) { |
| 157 if (mime_type.empty()) { |
| 158 return fallback; |
| 159 } else if (mime_util::IsSupportedImageMimeType(mime_type)) { |
| 160 return content::RESOURCE_TYPE_IMAGE; |
| 161 } else if (mime_util::IsSupportedJavascriptMimeType(mime_type)) { |
| 162 return content::RESOURCE_TYPE_SCRIPT; |
| 163 } else if (net::MatchesMimeType("text/css", mime_type)) { |
| 164 return content::RESOURCE_TYPE_STYLESHEET; |
| 165 } else { |
| 166 bool found = |
| 167 std::any_of(std::begin(kFontMimeTypes), std::end(kFontMimeTypes), |
| 168 [&mime_type](const std::string& mime) { |
| 169 return net::MatchesMimeType(mime, mime_type); |
| 170 }); |
| 171 if (found) |
| 172 return content::RESOURCE_TYPE_FONT_RESOURCE; |
| 173 } |
| 174 return fallback; |
| 175 } |
| 176 |
| 177 content::ResourceType LoadingDataCollector::GetResourceType( |
| 178 content::ResourceType resource_type, |
| 179 const std::string& mime_type) { |
| 180 // Restricts content::RESOURCE_TYPE_{PREFETCH,SUB_RESOURCE,XHR} to a small set |
| 181 // of mime types, because these resource types don't communicate how the |
| 182 // resources will be used. |
| 183 if (resource_type == content::RESOURCE_TYPE_PREFETCH || |
| 184 resource_type == content::RESOURCE_TYPE_SUB_RESOURCE || |
| 185 resource_type == content::RESOURCE_TYPE_XHR) { |
| 186 return GetResourceTypeFromMimeType(mime_type, |
| 187 content::RESOURCE_TYPE_LAST_TYPE); |
| 188 } |
| 189 return resource_type; |
| 190 } |
| 191 |
| 22 // static | 192 // static |
| 23 bool LoadingDataCollector::ShouldRecordRequest( | 193 bool LoadingDataCollector::ShouldRecordRequest( |
| 24 net::URLRequest* request, | 194 net::URLRequest* request, |
| 25 content::ResourceType resource_type) { | 195 content::ResourceType resource_type) { |
| 26 const content::ResourceRequestInfo* request_info = | 196 const content::ResourceRequestInfo* request_info = |
| 27 content::ResourceRequestInfo::ForRequest(request); | 197 content::ResourceRequestInfo::ForRequest(request); |
| 28 if (!request_info) | 198 if (!request_info) |
| 29 return false; | 199 return false; |
| 30 | 200 |
| 31 if (!request_info->IsMainFrame()) | 201 if (!request_info->IsMainFrame()) |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 91 return false; | 261 return false; |
| 92 | 262 |
| 93 return true; | 263 return true; |
| 94 } | 264 } |
| 95 | 265 |
| 96 // static | 266 // static |
| 97 bool LoadingDataCollector::IsHandledResourceType( | 267 bool LoadingDataCollector::IsHandledResourceType( |
| 98 content::ResourceType resource_type, | 268 content::ResourceType resource_type, |
| 99 const std::string& mime_type) { | 269 const std::string& mime_type) { |
| 100 content::ResourceType actual_resource_type = | 270 content::ResourceType actual_resource_type = |
| 101 ResourcePrefetchPredictor::GetResourceType(resource_type, mime_type); | 271 GetResourceType(resource_type, mime_type); |
| 102 return actual_resource_type == content::RESOURCE_TYPE_STYLESHEET || | 272 return actual_resource_type == content::RESOURCE_TYPE_STYLESHEET || |
| 103 actual_resource_type == content::RESOURCE_TYPE_SCRIPT || | 273 actual_resource_type == content::RESOURCE_TYPE_SCRIPT || |
| 104 actual_resource_type == content::RESOURCE_TYPE_IMAGE || | 274 actual_resource_type == content::RESOURCE_TYPE_IMAGE || |
| 105 actual_resource_type == content::RESOURCE_TYPE_FONT_RESOURCE; | 275 actual_resource_type == content::RESOURCE_TYPE_FONT_RESOURCE; |
| 106 } | 276 } |
| 107 | 277 |
| 108 // static | 278 // static |
| 109 void LoadingDataCollector::SetAllowPortInUrlsForTesting(bool state) { | 279 void LoadingDataCollector::SetAllowPortInUrlsForTesting(bool state) { |
| 110 g_allow_port_in_urls = state; | 280 g_allow_port_in_urls = state; |
| 111 } | 281 } |
| 112 | 282 |
| 113 LoadingDataCollector::LoadingDataCollector(ResourcePrefetchPredictor* predictor) | 283 LoadingDataCollector::LoadingDataCollector( |
| 114 : predictor_(predictor) {} | 284 ResourcePrefetchPredictor* predictor, |
| 285 predictors::LoadingStatsCollector* stats_collector, |
| 286 const LoadingPredictorConfig& config) |
| 287 : predictor_(predictor), |
| 288 stats_collector_(stats_collector), |
| 289 config_(config) {} |
| 115 | 290 |
| 116 LoadingDataCollector::~LoadingDataCollector() {} | 291 LoadingDataCollector::~LoadingDataCollector() {} |
| 117 | 292 |
| 118 void LoadingDataCollector::RecordURLRequest( | 293 void LoadingDataCollector::RecordURLRequest(const URLRequestSummary& request) { |
| 119 const ResourcePrefetchPredictor::URLRequestSummary& request) { | 294 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 120 predictor_->RecordURLRequest(request); | 295 DCHECK_EQ(request.resource_type, content::RESOURCE_TYPE_MAIN_FRAME); |
| 296 |
| 297 CleanupAbandonedNavigations(request.navigation_id); |
| 298 |
| 299 // New empty navigation entry. |
| 300 const GURL& main_frame_url = request.navigation_id.main_frame_url; |
| 301 inflight_navigations_.emplace( |
| 302 request.navigation_id, |
| 303 base::MakeUnique<PageRequestSummary>(main_frame_url)); |
| 121 } | 304 } |
| 122 | 305 |
| 123 void LoadingDataCollector::RecordURLResponse( | 306 void LoadingDataCollector::RecordURLResponse( |
| 124 const ResourcePrefetchPredictor::URLRequestSummary& response) { | 307 const URLRequestSummary& response) { |
| 125 predictor_->RecordURLResponse(response); | 308 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 309 |
| 310 if (response.resource_type == content::RESOURCE_TYPE_MAIN_FRAME) |
| 311 return; |
| 312 |
| 313 NavigationMap::const_iterator nav_it = |
| 314 inflight_navigations_.find(response.navigation_id); |
| 315 if (nav_it == inflight_navigations_.end()) |
| 316 return; |
| 317 auto& page_request_summary = *nav_it->second; |
| 318 |
| 319 if (!response.is_no_store) |
| 320 page_request_summary.subresource_requests.push_back(response); |
| 321 |
| 322 if (config_.is_origin_learning_enabled) |
| 323 page_request_summary.UpdateOrAddToOrigins(response); |
| 126 } | 324 } |
| 127 | 325 |
| 128 void LoadingDataCollector::RecordURLRedirect( | 326 void LoadingDataCollector::RecordURLRedirect( |
| 129 const ResourcePrefetchPredictor::URLRequestSummary& response) { | 327 const URLRequestSummary& response) { |
| 130 predictor_->RecordURLRedirect(response); | 328 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 329 |
| 330 if (response.resource_type == content::RESOURCE_TYPE_MAIN_FRAME) |
| 331 OnMainFrameRedirect(response); |
| 332 else |
| 333 OnSubresourceRedirect(response); |
| 131 } | 334 } |
| 132 | 335 |
| 133 void LoadingDataCollector::RecordMainFrameLoadComplete( | 336 void LoadingDataCollector::RecordMainFrameLoadComplete( |
| 134 const NavigationID& navigation_id) { | 337 const NavigationID& navigation_id) { |
| 135 predictor_->RecordMainFrameLoadComplete(navigation_id); | 338 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 339 |
| 340 // WebContents can return an empty URL if the navigation entry corresponding |
| 341 // to the navigation has not been created yet. |
| 342 if (navigation_id.main_frame_url.is_empty()) |
| 343 return; |
| 344 |
| 345 NavigationMap::iterator nav_it = inflight_navigations_.find(navigation_id); |
| 346 if (nav_it == inflight_navigations_.end()) |
| 347 return; |
| 348 |
| 349 // Remove the navigation from the inflight navigations. |
| 350 std::unique_ptr<PageRequestSummary> summary = std::move(nav_it->second); |
| 351 inflight_navigations_.erase(nav_it); |
| 352 |
| 353 // Set before_first_contentful paint for each resource. |
| 354 for (auto& request_summary : summary->subresource_requests) { |
| 355 request_summary.before_first_contentful_paint = |
| 356 request_summary.response_time < summary->first_contentful_paint; |
| 357 } |
| 358 |
| 359 if (stats_collector_) |
| 360 stats_collector_->RecordPageRequestSummary(*summary); |
| 361 |
| 362 if (predictor_) { |
| 363 predictor_->RecordPageRequestSummary(std::move(summary)); |
| 364 } |
| 136 } | 365 } |
| 137 | 366 |
| 138 void LoadingDataCollector::RecordFirstContentfulPaint( | 367 void LoadingDataCollector::RecordFirstContentfulPaint( |
| 139 const NavigationID& navigation_id, | 368 const NavigationID& navigation_id, |
| 140 const base::TimeTicks& first_contentful_paint) { | 369 const base::TimeTicks& first_contentful_paint) { |
| 141 predictor_->RecordFirstContentfulPaint(navigation_id, first_contentful_paint); | 370 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 371 |
| 372 NavigationMap::iterator nav_it = inflight_navigations_.find(navigation_id); |
| 373 if (nav_it != inflight_navigations_.end()) |
| 374 nav_it->second->first_contentful_paint = first_contentful_paint; |
| 375 } |
| 376 |
| 377 void LoadingDataCollector::OnMainFrameRedirect( |
| 378 const URLRequestSummary& response) { |
| 379 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 380 |
| 381 const GURL& main_frame_url = response.navigation_id.main_frame_url; |
| 382 std::unique_ptr<PageRequestSummary> summary; |
| 383 NavigationMap::iterator nav_it = |
| 384 inflight_navigations_.find(response.navigation_id); |
| 385 if (nav_it != inflight_navigations_.end()) { |
| 386 summary = std::move(nav_it->second); |
| 387 inflight_navigations_.erase(nav_it); |
| 388 } |
| 389 |
| 390 // The redirect url may be empty if the URL was invalid. |
| 391 if (response.redirect_url.is_empty()) |
| 392 return; |
| 393 |
| 394 // If we lost the information about the first hop for some reason. |
| 395 if (!summary) { |
| 396 summary = base::MakeUnique<PageRequestSummary>(main_frame_url); |
| 397 } |
| 398 |
| 399 // A redirect will not lead to another OnMainFrameRequest call, so record the |
| 400 // redirect url as a new navigation id and save the initial url. |
| 401 NavigationID navigation_id(response.navigation_id); |
| 402 navigation_id.main_frame_url = response.redirect_url; |
| 403 summary->main_frame_url = response.redirect_url; |
| 404 inflight_navigations_.emplace(navigation_id, std::move(summary)); |
| 405 } |
| 406 |
| 407 void LoadingDataCollector::OnSubresourceRedirect( |
| 408 const URLRequestSummary& response) { |
| 409 DCHECK_CURRENTLY_ON(BrowserThread::UI); |
| 410 |
| 411 if (!config_.is_origin_learning_enabled) |
| 412 return; |
| 413 |
| 414 NavigationMap::const_iterator nav_it = |
| 415 inflight_navigations_.find(response.navigation_id); |
| 416 if (nav_it == inflight_navigations_.end()) |
| 417 return; |
| 418 auto& page_request_summary = *nav_it->second; |
| 419 page_request_summary.UpdateOrAddToOrigins(response); |
| 420 } |
| 421 |
| 422 void LoadingDataCollector::CleanupAbandonedNavigations( |
| 423 const NavigationID& navigation_id) { |
| 424 if (stats_collector_) |
| 425 stats_collector_->CleanupAbandonedStats(); |
| 426 |
| 427 static const base::TimeDelta max_navigation_age = |
| 428 base::TimeDelta::FromSeconds(config_.max_navigation_lifetime_seconds); |
| 429 |
| 430 base::TimeTicks time_now = base::TimeTicks::Now(); |
| 431 for (NavigationMap::iterator it = inflight_navigations_.begin(); |
| 432 it != inflight_navigations_.end();) { |
| 433 if ((it->first.tab_id == navigation_id.tab_id) || |
| 434 (time_now - it->first.creation_time > max_navigation_age)) { |
| 435 inflight_navigations_.erase(it++); |
| 436 } else { |
| 437 ++it; |
| 438 } |
| 439 } |
| 142 } | 440 } |
| 143 | 441 |
| 144 } // namespace predictors | 442 } // namespace predictors |
| OLD | NEW |