| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
| 6 * met: | 6 * met: |
| 7 * | 7 * |
| 8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
| (...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 260 | 260 |
| 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS | 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS |
| 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being | 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being |
| 263 // able to rewrite URLs inside CSS documents means that resources imported from | 263 // able to rewrite URLs inside CSS documents means that resources imported from |
| 264 // url(...) statements in CSS might not work when rewriting links for the | 264 // url(...) statements in CSS might not work when rewriting links for the |
| 265 // "Webpage, Complete" method of saving a page. It will take some work but it | 265 // "Webpage, Complete" method of saving a page. It will take some work but it |
| 266 // needs to be done if we want to continue to support non-MHTML saved pages. | 266 // needs to be done if we want to continue to support non-MHTML saved pages. |
| 267 | 267 |
| 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, | 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, |
| 269 Delegate& delegate) | 269 Delegate& delegate) |
| 270 : resources_(&resources), is_serializing_css_(false), delegate_(delegate) {} | 270 : resources_(&resources), |
| 271 is_serializing_css_(false), |
| 272 delegate_(delegate), |
| 273 total_image_count_(0), |
| 274 loaded_image_count_(0), |
| 275 total_css_count_(0), |
| 276 loaded_css_count_(0), |
| 277 should_collect_problem_metric_(false) {} |
| 271 | 278 |
| 272 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { | 279 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { |
| 273 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); | 280 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); |
| 274 DCHECK(frame.GetDocument()); | 281 DCHECK(frame.GetDocument()); |
| 275 Document& document = *frame.GetDocument(); | 282 Document& document = *frame.GetDocument(); |
| 276 KURL url = document.Url(); | 283 KURL url = document.Url(); |
| 277 | 284 |
| 278 // If frame is an image document, add the image and don't continue | 285 // If frame is an image document, add the image and don't continue |
| 279 if (document.IsImageDocument()) { | 286 if (document.IsImageDocument()) { |
| 280 ImageDocument& image_document = ToImageDocument(document); | 287 ImageDocument& image_document = ToImageDocument(document); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 292 String text = | 299 String text = |
| 293 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); | 300 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); |
| 294 | 301 |
| 295 CString frame_html = | 302 CString frame_html = |
| 296 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); | 303 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); |
| 297 resources_->push_back(SerializedResource( | 304 resources_->push_back(SerializedResource( |
| 298 url, document.SuggestedMIMEType(), | 305 url, document.SuggestedMIMEType(), |
| 299 SharedBuffer::Create(frame_html.data(), frame_html.length()))); | 306 SharedBuffer::Create(frame_html.data(), frame_html.length()))); |
| 300 } | 307 } |
| 301 | 308 |
| 309 should_collect_problem_metric_ = |
| 310 delegate_.ShouldCollectProblemMetric() && frame.IsMainFrame(); |
| 302 for (Node* node : serialized_nodes) { | 311 for (Node* node : serialized_nodes) { |
| 303 DCHECK(node); | 312 DCHECK(node); |
| 304 if (!node->IsElementNode()) | 313 if (!node->IsElementNode()) |
| 305 continue; | 314 continue; |
| 306 | 315 |
| 307 Element& element = ToElement(*node); | 316 Element& element = ToElement(*node); |
| 308 // We have to process in-line style as it might contain some resources | 317 // We have to process in-line style as it might contain some resources |
| 309 // (typically background images). | 318 // (typically background images). |
| 310 if (element.IsStyledElement()) { | 319 if (element.IsStyledElement()) { |
| 311 RetrieveResourcesForProperties(element.InlineStyle(), document); | 320 RetrieveResourcesForProperties(element.InlineStyle(), document); |
| (...skipping 22 matching lines...) Expand all Loading... |
| 334 KURL url = document.CompleteURL( | 343 KURL url = document.CompleteURL( |
| 335 link_element.getAttribute(HTMLNames::hrefAttr)); | 344 link_element.getAttribute(HTMLNames::hrefAttr)); |
| 336 SerializeCSSStyleSheet(*sheet, url); | 345 SerializeCSSStyleSheet(*sheet, url); |
| 337 } | 346 } |
| 338 } else if (isHTMLStyleElement(element)) { | 347 } else if (isHTMLStyleElement(element)) { |
| 339 HTMLStyleElement& style_element = toHTMLStyleElement(element); | 348 HTMLStyleElement& style_element = toHTMLStyleElement(element); |
| 340 if (CSSStyleSheet* sheet = style_element.sheet()) | 349 if (CSSStyleSheet* sheet = style_element.sheet()) |
| 341 SerializeCSSStyleSheet(*sheet, KURL()); | 350 SerializeCSSStyleSheet(*sheet, KURL()); |
| 342 } | 351 } |
| 343 } | 352 } |
| 353 if (should_collect_problem_metric_) { |
| 354 // Report detectors through UMA. |
| 355 // We're having exact 21 buckets for percentage because we want to have 5% |
| 356 // in each bucket to avoid potential spikes in the distribution. |
| 357 UMA_HISTOGRAM_COUNTS_100( |
| 358 "PageSerialization.ProblemDetection.TotalImageCount", |
| 359 static_cast<int64_t>(total_image_count_)); |
| 360 if (total_image_count_ > 0) { |
| 361 DCHECK_LE(loaded_image_count_, total_image_count_); |
| 362 DEFINE_STATIC_LOCAL( |
| 363 LinearHistogram, image_histogram, |
| 364 ("PageSerialization.ProblemDetection.LoadedImagePercentage", 1, 100, |
| 365 21)); |
| 366 image_histogram.Count( |
| 367 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_)); |
| 368 } |
| 369 |
| 370 UMA_HISTOGRAM_COUNTS_100("PageSerialization.ProblemDetection.TotalCSSCount", |
| 371 static_cast<int64_t>(total_css_count_)); |
| 372 if (total_css_count_ > 0) { |
| 373 DCHECK_LE(loaded_css_count_, total_css_count_); |
| 374 DEFINE_STATIC_LOCAL( |
| 375 LinearHistogram, css_histogram, |
| 376 ("PageSerialization.ProblemDetection.LoadedCSSPercentage", 1, 100, |
| 377 21)); |
| 378 css_histogram.Count( |
| 379 static_cast<int64_t>(loaded_css_count_ * 100 / total_css_count_)); |
| 380 } |
| 381 should_collect_problem_metric_ = false; |
| 382 } |
| 344 } | 383 } |
| 345 | 384 |
| 346 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, | 385 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, |
| 347 const KURL& url) { | 386 const KURL& url) { |
| 348 // If the URL is invalid or if it is a data URL this means that this CSS is | 387 // If the URL is invalid or if it is a data URL this means that this CSS is |
| 349 // defined inline, respectively in a <style> tag or in the data URL itself. | 388 // defined inline, respectively in a <style> tag or in the data URL itself. |
| 350 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); | 389 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); |
| 351 // If this CSS is not inline then it is identifiable by its URL. So just skip | 390 // If this CSS is not inline then it is identifiable by its URL. So just skip |
| 352 // it if it has already been analyzed before. | 391 // it if it has already been analyzed before. |
| 353 if (!is_inline_css && (resource_urls_.Contains(url) || | 392 if (!is_inline_css && (resource_urls_.Contains(url) || |
| 354 delegate_.ShouldSkipResourceWithURL(url))) { | 393 delegate_.ShouldSkipResourceWithURL(url))) { |
| 355 return; | 394 return; |
| 356 } | 395 } |
| 396 if (!is_inline_css) |
| 397 resource_urls_.insert(url); |
| 398 if (should_collect_problem_metric_ && !is_inline_css) { |
| 399 total_css_count_++; |
| 400 if (style_sheet.LoadCompleted()) |
| 401 loaded_css_count_++; |
| 402 } |
| 357 | 403 |
| 358 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", | 404 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", |
| 359 "type", "CSS", "url", url.ElidedString().Utf8().data()); | 405 "type", "CSS", "url", url.ElidedString().Utf8().data()); |
| 360 // Only report UMA metric if this is not a reentrant CSS serialization call. | 406 // Only report UMA metric if this is not a reentrant CSS serialization call. |
| 361 double css_start_time = 0; | 407 double css_start_time = 0; |
| 362 if (!is_serializing_css_) { | 408 if (!is_serializing_css_) { |
| 363 is_serializing_css_ = true; | 409 is_serializing_css_ = true; |
| 364 css_start_time = MonotonicallyIncreasingTime(); | 410 css_start_time = MonotonicallyIncreasingTime(); |
| 365 } | 411 } |
| 366 | 412 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 383 } | 429 } |
| 384 | 430 |
| 385 WTF::TextEncoding text_encoding(style_sheet.Contents()->Charset()); | 431 WTF::TextEncoding text_encoding(style_sheet.Contents()->Charset()); |
| 386 DCHECK(text_encoding.IsValid()); | 432 DCHECK(text_encoding.IsValid()); |
| 387 String text_string = css_text.ToString(); | 433 String text_string = css_text.ToString(); |
| 388 CString text = text_encoding.Encode( | 434 CString text = text_encoding.Encode( |
| 389 text_string, WTF::kCSSEncodedEntitiesForUnencodables); | 435 text_string, WTF::kCSSEncodedEntitiesForUnencodables); |
| 390 resources_->push_back( | 436 resources_->push_back( |
| 391 SerializedResource(url, String("text/css"), | 437 SerializedResource(url, String("text/css"), |
| 392 SharedBuffer::Create(text.data(), text.length()))); | 438 SharedBuffer::Create(text.data(), text.length()))); |
| 393 resource_urls_.insert(url); | |
| 394 } | 439 } |
| 395 | 440 |
| 396 // Sub resources need to be serialized even if the CSS definition doesn't | 441 // Sub resources need to be serialized even if the CSS definition doesn't |
| 397 // need to be. | 442 // need to be. |
| 398 for (unsigned i = 0; i < style_sheet.length(); ++i) | 443 for (unsigned i = 0; i < style_sheet.length(); ++i) |
| 399 SerializeCSSRule(style_sheet.item(i)); | 444 SerializeCSSRule(style_sheet.item(i)); |
| 400 | 445 |
| 401 if (css_start_time != 0) { | 446 if (css_start_time != 0) { |
| 402 is_serializing_css_ = false; | 447 is_serializing_css_ = false; |
| 403 DEFINE_STATIC_LOCAL(CustomCountHistogram, css_histogram, | 448 DEFINE_STATIC_LOCAL(CustomCountHistogram, css_histogram, |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 466 const KURL& url) { | 511 const KURL& url) { |
| 467 if (delegate_.ShouldSkipResource(has_cache_control_no_store_header)) | 512 if (delegate_.ShouldSkipResource(has_cache_control_no_store_header)) |
| 468 return; | 513 return; |
| 469 | 514 |
| 470 if (!data) { | 515 if (!data) { |
| 471 DLOG(ERROR) << "No data for resource " << url.GetString(); | 516 DLOG(ERROR) << "No data for resource " << url.GetString(); |
| 472 return; | 517 return; |
| 473 } | 518 } |
| 474 | 519 |
| 475 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); | 520 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); |
| 476 resource_urls_.insert(url); | |
| 477 } | 521 } |
| 478 | 522 |
| 479 void FrameSerializer::AddImageToResources(ImageResourceContent* image, | 523 void FrameSerializer::AddImageToResources(ImageResourceContent* image, |
| 480 const KURL& url) { | 524 const KURL& url) { |
| 481 if (!image || !image->HasImage() || image->ErrorOccurred() || | 525 if (!ShouldAddURL(url)) |
| 482 !ShouldAddURL(url)) | |
| 483 return; | 526 return; |
| 527 resource_urls_.insert(url); |
| 528 if (should_collect_problem_metric_) |
| 529 total_image_count_++; |
| 530 if (!image || !image->HasImage() || image->ErrorOccurred()) |
| 531 return; |
| 532 if (should_collect_problem_metric_ && image->IsLoaded()) |
| 533 loaded_image_count_++; |
| 484 | 534 |
| 485 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", | 535 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", |
| 486 "type", "image", "url", url.ElidedString().Utf8().data()); | 536 "type", "image", "url", url.ElidedString().Utf8().data()); |
| 487 double image_start_time = MonotonicallyIncreasingTime(); | 537 double image_start_time = MonotonicallyIncreasingTime(); |
| 488 | 538 |
| 489 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); | 539 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); |
| 490 AddToResources(image->GetResponse().MimeType(), | 540 AddToResources(image->GetResponse().MimeType(), |
| 491 image->HasCacheControlNoStoreHeader() | 541 image->HasCacheControlNoStoreHeader() |
| 492 ? kHasCacheControlNoStoreHeader | 542 ? kHasCacheControlNoStoreHeader |
| 493 : kNoCacheControlNoStoreHeader, | 543 : kNoCacheControlNoStoreHeader, |
| 494 data, url); | 544 data, url); |
| 495 | 545 |
| 496 // If we're already reporting time for CSS serialization don't report it for | 546 // If we're already reporting time for CSS serialization don't report it for |
| 497 // this image to avoid reporting the same time twice. | 547 // this image to avoid reporting the same time twice. |
| 498 if (!is_serializing_css_) { | 548 if (!is_serializing_css_) { |
| 499 DEFINE_STATIC_LOCAL(CustomCountHistogram, image_histogram, | 549 DEFINE_STATIC_LOCAL(CustomCountHistogram, image_histogram, |
| 500 ("PageSerialization.SerializationTime.ImageElement", 0, | 550 ("PageSerialization.SerializationTime.ImageElement", 0, |
| 501 maxSerializationTimeUmaMicroseconds, 50)); | 551 maxSerializationTimeUmaMicroseconds, 50)); |
| 502 image_histogram.Count(static_cast<int64_t>( | 552 image_histogram.Count(static_cast<int64_t>( |
| 503 (MonotonicallyIncreasingTime() - image_start_time) * | 553 (MonotonicallyIncreasingTime() - image_start_time) * |
| 504 secondsToMicroseconds)); | 554 secondsToMicroseconds)); |
| 505 } | 555 } |
| 506 } | 556 } |
| 507 | 557 |
| 508 void FrameSerializer::AddFontToResources(FontResource* font) { | 558 void FrameSerializer::AddFontToResources(FontResource* font) { |
| 509 if (!font || !font->IsLoaded() || !font->ResourceBuffer() || | 559 if (!font || !ShouldAddURL(font->Url())) |
| 510 !ShouldAddURL(font->Url())) | 560 return; |
| 561 resource_urls_.insert(font->Url()); |
| 562 if (!font || !font->IsLoaded() || !font->ResourceBuffer()) |
| 511 return; | 563 return; |
| 512 | 564 |
| 513 RefPtr<const SharedBuffer> data(font->ResourceBuffer()); | 565 RefPtr<const SharedBuffer> data(font->ResourceBuffer()); |
| 514 | 566 |
| 515 AddToResources(font->GetResponse().MimeType(), | 567 AddToResources(font->GetResponse().MimeType(), |
| 516 font->HasCacheControlNoStoreHeader() | 568 font->HasCacheControlNoStoreHeader() |
| 517 ? kHasCacheControlNoStoreHeader | 569 ? kHasCacheControlNoStoreHeader |
| 518 : kNoCacheControlNoStoreHeader, | 570 : kNoCacheControlNoStoreHeader, |
| 519 data, font->Url()); | 571 data, font->Url()); |
| 520 } | 572 } |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 579 emits_minus = ch == '-'; | 631 emits_minus = ch == '-'; |
| 580 builder.Append(ch); | 632 builder.Append(ch); |
| 581 } | 633 } |
| 582 CString escaped_url = builder.ToString().Ascii(); | 634 CString escaped_url = builder.ToString().Ascii(); |
| 583 return String::Format("saved from url=(%04d)%s", | 635 return String::Format("saved from url=(%04d)%s", |
| 584 static_cast<int>(escaped_url.length()), | 636 static_cast<int>(escaped_url.length()), |
| 585 escaped_url.data()); | 637 escaped_url.data()); |
| 586 } | 638 } |
| 587 | 639 |
| 588 } // namespace blink | 640 } // namespace blink |
| OLD | NEW |