Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 260 | 260 |
| 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS | 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS |
| 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being | 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being |
| 263 // able to rewrite URLs inside CSS documents means that resources imported from | 263 // able to rewrite URLs inside CSS documents means that resources imported from |
| 264 // url(...) statements in CSS might not work when rewriting links for the | 264 // url(...) statements in CSS might not work when rewriting links for the |
| 265 // "Webpage, Complete" method of saving a page. It will take some work but it | 265 // "Webpage, Complete" method of saving a page. It will take some work but it |
| 266 // needs to be done if we want to continue to support non-MHTML saved pages. | 266 // needs to be done if we want to continue to support non-MHTML saved pages. |
| 267 | 267 |
| 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, | 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, |
| 269 Delegate& delegate) | 269 Delegate& delegate) |
| 270 : resources_(&resources), is_serializing_css_(false), delegate_(delegate) {} | 270 : resources_(&resources), |
| 271 is_serializing_css_(false), | |
| 272 delegate_(delegate), | |
| 273 total_image_count_(0), | |
| 274 loaded_image_count_(0), | |
| 275 total_css_count_(0), | |
| 276 loaded_css_count_(0), | |
| 277 should_collect_problem_metric_(false) {} | |
| 271 | 278 |
| 272 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { | 279 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { |
| 273 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); | 280 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); |
| 274 DCHECK(frame.GetDocument()); | 281 DCHECK(frame.GetDocument()); |
| 275 Document& document = *frame.GetDocument(); | 282 Document& document = *frame.GetDocument(); |
| 276 KURL url = document.Url(); | 283 KURL url = document.Url(); |
| 277 | 284 |
| 278 // If frame is an image document, add the image and don't continue | 285 // If frame is an image document, add the image and don't continue |
| 279 if (document.IsImageDocument()) { | 286 if (document.IsImageDocument()) { |
| 280 ImageDocument& image_document = ToImageDocument(document); | 287 ImageDocument& image_document = ToImageDocument(document); |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 292 String text = | 299 String text = |
| 293 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); | 300 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); |
| 294 | 301 |
| 295 CString frame_html = | 302 CString frame_html = |
| 296 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); | 303 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); |
| 297 resources_->push_back(SerializedResource( | 304 resources_->push_back(SerializedResource( |
| 298 url, document.SuggestedMIMEType(), | 305 url, document.SuggestedMIMEType(), |
| 299 SharedBuffer::Create(frame_html.data(), frame_html.length()))); | 306 SharedBuffer::Create(frame_html.data(), frame_html.length()))); |
| 300 } | 307 } |
| 301 | 308 |
| 309 should_collect_problem_metric_ = | |
| 310 delegate_.ShouldCollectProblemMetric() && frame.IsMainFrame(); | |
| 302 for (Node* node : serialized_nodes) { | 311 for (Node* node : serialized_nodes) { |
| 303 DCHECK(node); | 312 DCHECK(node); |
| 304 if (!node->IsElementNode()) | 313 if (!node->IsElementNode()) |
| 305 continue; | 314 continue; |
| 306 | 315 |
| 307 Element& element = ToElement(*node); | 316 Element& element = ToElement(*node); |
| 308 // We have to process in-line style as it might contain some resources | 317 // We have to process in-line style as it might contain some resources |
| 309 // (typically background images). | 318 // (typically background images). |
| 310 if (element.IsStyledElement()) { | 319 if (element.IsStyledElement()) { |
| 311 RetrieveResourcesForProperties(element.InlineStyle(), document); | 320 RetrieveResourcesForProperties(element.InlineStyle(), document); |
| (...skipping 18 matching lines...) Expand all Loading... | |
| 330 } | 339 } |
| 331 } else if (isHTMLLinkElement(element)) { | 340 } else if (isHTMLLinkElement(element)) { |
| 332 HTMLLinkElement& link_element = toHTMLLinkElement(element); | 341 HTMLLinkElement& link_element = toHTMLLinkElement(element); |
| 333 if (CSSStyleSheet* sheet = link_element.sheet()) { | 342 if (CSSStyleSheet* sheet = link_element.sheet()) { |
| 334 KURL url = document.CompleteURL( | 343 KURL url = document.CompleteURL( |
| 335 link_element.getAttribute(HTMLNames::hrefAttr)); | 344 link_element.getAttribute(HTMLNames::hrefAttr)); |
| 336 SerializeCSSStyleSheet(*sheet, url); | 345 SerializeCSSStyleSheet(*sheet, url); |
| 337 } | 346 } |
| 338 } else if (isHTMLStyleElement(element)) { | 347 } else if (isHTMLStyleElement(element)) { |
| 339 HTMLStyleElement& style_element = toHTMLStyleElement(element); | 348 HTMLStyleElement& style_element = toHTMLStyleElement(element); |
| 340 if (CSSStyleSheet* sheet = style_element.sheet()) | 349 CSSStyleSheet* sheet = style_element.sheet(); |
| 350 if (sheet) | |
| 341 SerializeCSSStyleSheet(*sheet, KURL()); | 351 SerializeCSSStyleSheet(*sheet, KURL()); |
| 342 } | 352 } |
| 343 } | 353 } |
| 354 if (should_collect_problem_metric_) { | |
| 355 // Report detectors through UMA. | |
| 356 // We're having exact 21 buckets for percentage because we want to have 5% | |
| 357 // in each bucket to avoid potential spikes in the distribution. | |
| 358 DCHECK_LE(loaded_image_count_, total_image_count_); | |
| 359 DEFINE_STATIC_LOCAL( | |
| 360 CustomCountHistogram, image_histogram, | |
| 361 ("PageSerialization.ProblemDetection.LoadedImagePercentage", 1, 100, | |
| 362 21)); | |
| 363 image_histogram.Count( | |
| 364 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_)); | |
| 365 UMA_HISTOGRAM_COUNTS_100( | |
| 366 "PageSerialization.ProblemDetection.TotalImageCount", | |
| 367 static_cast<int64_t>(total_image_count_)); | |
| 368 | |
| 369 DCHECK_LE(loaded_css_count_, total_css_count_); | |
| 370 DEFINE_STATIC_LOCAL( | |
| 371 CustomCountHistogram, css_histogram, | |
| 372 ("PageSerialization.ProblemDetection.LoadedCSSPercentage", 1, 100, 21)); | |
| 373 css_histogram.Count( | |
| 374 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_)); | |
| 375 UMA_HISTOGRAM_COUNTS_100("PageSerialization.ProblemDetection.TotalCSSCount", | |
| 376 static_cast<int64_t>(total_css_count_)); | |
| 377 should_collect_problem_metric_ = false; | |
| 378 } | |
| 344 } | 379 } |
| 345 | 380 |
| 346 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, | 381 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, |
| 347 const KURL& url) { | 382 const KURL& url) { |
| 348 // If the URL is invalid or if it is a data URL this means that this CSS is | 383 // If the URL is invalid or if it is a data URL this means that this CSS is |
| 349 // defined inline, respectively in a <style> tag or in the data URL itself. | 384 // defined inline, respectively in a <style> tag or in the data URL itself. |
| 350 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); | 385 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); |
| 386 if (should_collect_problem_metric_) | |
| 387 total_css_count_++; | |
| 351 // If this CSS is not inline then it is identifiable by its URL. So just skip | 388 // If this CSS is not inline then it is identifiable by its URL. So just skip |
| 352 // it if it has already been analyzed before. | 389 // it if it has already been analyzed before. |
| 353 if (!is_inline_css && (resource_urls_.Contains(url) || | 390 if (!is_inline_css && (resource_urls_.Contains(url) || |
| 354 delegate_.ShouldSkipResourceWithURL(url))) { | 391 delegate_.ShouldSkipResourceWithURL(url))) { |
| 355 return; | 392 return; |
| 356 } | 393 } |
| 394 if (should_collect_problem_metric_ && style_sheet.LoadCompleted()) | |
| 395 loaded_css_count_++; | |
| 357 | 396 |
| 358 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", | 397 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", |
| 359 "type", "CSS", "url", url.ElidedString().Utf8().data()); | 398 "type", "CSS", "url", url.ElidedString().Utf8().data()); |
| 360 // Only report UMA metric if this is not a reentrant CSS serialization call. | 399 // Only report UMA metric if this is not a reentrant CSS serialization call. |
| 361 double css_start_time = 0; | 400 double css_start_time = 0; |
| 362 if (!is_serializing_css_) { | 401 if (!is_serializing_css_) { |
| 363 is_serializing_css_ = true; | 402 is_serializing_css_ = true; |
| 364 css_start_time = MonotonicallyIncreasingTime(); | 403 css_start_time = MonotonicallyIncreasingTime(); |
| 365 } | 404 } |
| 366 | 405 |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 471 DLOG(ERROR) << "No data for resource " << url.GetString(); | 510 DLOG(ERROR) << "No data for resource " << url.GetString(); |
| 472 return; | 511 return; |
| 473 } | 512 } |
| 474 | 513 |
| 475 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); | 514 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); |
| 476 resource_urls_.insert(url); | 515 resource_urls_.insert(url); |
| 477 } | 516 } |
| 478 | 517 |
| 479 void FrameSerializer::AddImageToResources(ImageResourceContent* image, | 518 void FrameSerializer::AddImageToResources(ImageResourceContent* image, |
| 480 const KURL& url) { | 519 const KURL& url) { |
| 520 if (should_collect_problem_metric_) | |
| 521 total_image_count_++; | |
|
Łukasz Anforowicz
2017/06/08 16:08:45
I wonder how the data will be calculated in case o
romax
2017/06/08 20:02:07
Sorry for lacking knowledge about processing HTMLs
| |
| 481 if (!image || !image->HasImage() || image->ErrorOccurred() || | 522 if (!image || !image->HasImage() || image->ErrorOccurred() || |
| 482 !ShouldAddURL(url)) | 523 !ShouldAddURL(url)) |
| 483 return; | 524 return; |
| 525 if (should_collect_problem_metric_ && image->IsLoaded()) | |
| 526 loaded_image_count_++; | |
| 484 | 527 |
| 485 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", | 528 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", |
| 486 "type", "image", "url", url.ElidedString().Utf8().data()); | 529 "type", "image", "url", url.ElidedString().Utf8().data()); |
| 487 double image_start_time = MonotonicallyIncreasingTime(); | 530 double image_start_time = MonotonicallyIncreasingTime(); |
| 488 | 531 |
| 489 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); | 532 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); |
| 490 AddToResources(image->GetResponse().MimeType(), | 533 AddToResources(image->GetResponse().MimeType(), |
| 491 image->HasCacheControlNoStoreHeader() | 534 image->HasCacheControlNoStoreHeader() |
| 492 ? kHasCacheControlNoStoreHeader | 535 ? kHasCacheControlNoStoreHeader |
| 493 : kNoCacheControlNoStoreHeader, | 536 : kNoCacheControlNoStoreHeader, |
| (...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 579 emits_minus = ch == '-'; | 622 emits_minus = ch == '-'; |
| 580 builder.Append(ch); | 623 builder.Append(ch); |
| 581 } | 624 } |
| 582 CString escaped_url = builder.ToString().Ascii(); | 625 CString escaped_url = builder.ToString().Ascii(); |
| 583 return String::Format("saved from url=(%04d)%s", | 626 return String::Format("saved from url=(%04d)%s", |
| 584 static_cast<int>(escaped_url.length()), | 627 static_cast<int>(escaped_url.length()), |
| 585 escaped_url.data()); | 628 escaped_url.data()); |
| 586 } | 629 } |
| 587 | 630 |
| 588 } // namespace blink | 631 } // namespace blink |
| OLD | NEW |