OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
260 | 260 |
261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS | 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS |
262 // documents which leads to bugs like <https://crbug.com/251898>. Not being | 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being |
263 // able to rewrite URLs inside CSS documents means that resources imported from | 263 // able to rewrite URLs inside CSS documents means that resources imported from |
264 // url(...) statements in CSS might not work when rewriting links for the | 264 // url(...) statements in CSS might not work when rewriting links for the |
265 // "Webpage, Complete" method of saving a page. It will take some work but it | 265 // "Webpage, Complete" method of saving a page. It will take some work but it |
266 // needs to be done if we want to continue to support non-MHTML saved pages. | 266 // needs to be done if we want to continue to support non-MHTML saved pages. |
267 | 267 |
268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, | 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, |
269 Delegate& delegate) | 269 Delegate& delegate) |
270 : resources_(&resources), is_serializing_css_(false), delegate_(delegate) {} | 270 : resources_(&resources), |
| 271 is_serializing_css_(false), |
| 272 delegate_(delegate), |
| 273 total_image_count_(0), |
| 274 loaded_image_count_(0), |
| 275 total_css_count_(0), |
| 276 loaded_css_count_(0), |
| 277 should_collect_problem_metric_(false) {} |
271 | 278 |
272 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { | 279 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { |
273 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); | 280 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); |
274 DCHECK(frame.GetDocument()); | 281 DCHECK(frame.GetDocument()); |
275 Document& document = *frame.GetDocument(); | 282 Document& document = *frame.GetDocument(); |
276 KURL url = document.Url(); | 283 KURL url = document.Url(); |
277 | 284 |
278 // If frame is an image document, add the image and don't continue | 285 // If frame is an image document, add the image and don't continue |
279 if (document.IsImageDocument()) { | 286 if (document.IsImageDocument()) { |
280 ImageDocument& image_document = ToImageDocument(document); | 287 ImageDocument& image_document = ToImageDocument(document); |
(...skipping 11 matching lines...) Expand all Loading... |
292 String text = | 299 String text = |
293 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); | 300 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); |
294 | 301 |
295 CString frame_html = | 302 CString frame_html = |
296 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); | 303 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); |
297 resources_->push_back(SerializedResource( | 304 resources_->push_back(SerializedResource( |
298 url, document.SuggestedMIMEType(), | 305 url, document.SuggestedMIMEType(), |
299 SharedBuffer::Create(frame_html.data(), frame_html.length()))); | 306 SharedBuffer::Create(frame_html.data(), frame_html.length()))); |
300 } | 307 } |
301 | 308 |
| 309 should_collect_problem_metric_ = |
| 310 delegate_.ShouldCollectProblemMetric() && frame.IsMainFrame(); |
302 for (Node* node : serialized_nodes) { | 311 for (Node* node : serialized_nodes) { |
303 DCHECK(node); | 312 DCHECK(node); |
304 if (!node->IsElementNode()) | 313 if (!node->IsElementNode()) |
305 continue; | 314 continue; |
306 | 315 |
307 Element& element = ToElement(*node); | 316 Element& element = ToElement(*node); |
308 // We have to process in-line style as it might contain some resources | 317 // We have to process in-line style as it might contain some resources |
309 // (typically background images). | 318 // (typically background images). |
310 if (element.IsStyledElement()) { | 319 if (element.IsStyledElement()) { |
311 RetrieveResourcesForProperties(element.InlineStyle(), document); | 320 RetrieveResourcesForProperties(element.InlineStyle(), document); |
(...skipping 22 matching lines...) Expand all Loading... |
334 KURL url = document.CompleteURL( | 343 KURL url = document.CompleteURL( |
335 link_element.getAttribute(HTMLNames::hrefAttr)); | 344 link_element.getAttribute(HTMLNames::hrefAttr)); |
336 SerializeCSSStyleSheet(*sheet, url); | 345 SerializeCSSStyleSheet(*sheet, url); |
337 } | 346 } |
338 } else if (isHTMLStyleElement(element)) { | 347 } else if (isHTMLStyleElement(element)) { |
339 HTMLStyleElement& style_element = toHTMLStyleElement(element); | 348 HTMLStyleElement& style_element = toHTMLStyleElement(element); |
340 if (CSSStyleSheet* sheet = style_element.sheet()) | 349 if (CSSStyleSheet* sheet = style_element.sheet()) |
341 SerializeCSSStyleSheet(*sheet, KURL()); | 350 SerializeCSSStyleSheet(*sheet, KURL()); |
342 } | 351 } |
343 } | 352 } |
| 353 if (should_collect_problem_metric_) { |
| 354 // Report detectors through UMA. |
| 355 // We're having exact 21 buckets for percentage because we want to have 5% |
| 356 // in each bucket to avoid potential spikes in the distribution. |
| 357 UMA_HISTOGRAM_COUNTS_100( |
| 358 "PageSerialization.ProblemDetection.TotalImageCount", |
| 359 static_cast<int64_t>(total_image_count_)); |
| 360 if (total_image_count_ > 0) { |
| 361 DCHECK_LE(loaded_image_count_, total_image_count_); |
| 362 DEFINE_STATIC_LOCAL( |
| 363 LinearHistogram, image_histogram, |
| 364 ("PageSerialization.ProblemDetection.LoadedImagePercentage", 1, 100, |
| 365 21)); |
| 366 image_histogram.Count( |
| 367 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_)); |
| 368 } |
| 369 |
| 370 UMA_HISTOGRAM_COUNTS_100("PageSerialization.ProblemDetection.TotalCSSCount", |
| 371 static_cast<int64_t>(total_css_count_)); |
| 372 if (total_css_count_ > 0) { |
| 373 DCHECK_LE(loaded_css_count_, total_css_count_); |
| 374 DEFINE_STATIC_LOCAL( |
| 375 LinearHistogram, css_histogram, |
| 376 ("PageSerialization.ProblemDetection.LoadedCSSPercentage", 1, 100, |
| 377 21)); |
| 378 css_histogram.Count( |
| 379 static_cast<int64_t>(loaded_css_count_ * 100 / total_css_count_)); |
| 380 } |
| 381 should_collect_problem_metric_ = false; |
| 382 } |
344 } | 383 } |
345 | 384 |
346 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, | 385 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, |
347 const KURL& url) { | 386 const KURL& url) { |
348 // If the URL is invalid or if it is a data URL this means that this CSS is | 387 // If the URL is invalid or if it is a data URL this means that this CSS is |
349 // defined inline, respectively in a <style> tag or in the data URL itself. | 388 // defined inline, respectively in a <style> tag or in the data URL itself. |
350 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); | 389 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); |
351 // If this CSS is not inline then it is identifiable by its URL. So just skip | 390 // If this CSS is not inline then it is identifiable by its URL. So just skip |
352 // it if it has already been analyzed before. | 391 // it if it has already been analyzed before. |
353 if (!is_inline_css && (resource_urls_.Contains(url) || | 392 if (!is_inline_css && (resource_urls_.Contains(url) || |
354 delegate_.ShouldSkipResourceWithURL(url))) { | 393 delegate_.ShouldSkipResourceWithURL(url))) { |
355 return; | 394 return; |
356 } | 395 } |
| 396 if (!is_inline_css) |
| 397 resource_urls_.insert(url); |
| 398 if (should_collect_problem_metric_ && !is_inline_css) { |
| 399 total_css_count_++; |
| 400 if (style_sheet.LoadCompleted()) |
| 401 loaded_css_count_++; |
| 402 } |
357 | 403 |
358 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", | 404 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", |
359 "type", "CSS", "url", url.ElidedString().Utf8().data()); | 405 "type", "CSS", "url", url.ElidedString().Utf8().data()); |
360 // Only report UMA metric if this is not a reentrant CSS serialization call. | 406 // Only report UMA metric if this is not a reentrant CSS serialization call. |
361 double css_start_time = 0; | 407 double css_start_time = 0; |
362 if (!is_serializing_css_) { | 408 if (!is_serializing_css_) { |
363 is_serializing_css_ = true; | 409 is_serializing_css_ = true; |
364 css_start_time = MonotonicallyIncreasingTime(); | 410 css_start_time = MonotonicallyIncreasingTime(); |
365 } | 411 } |
366 | 412 |
(...skipping 16 matching lines...) Expand all Loading... |
383 } | 429 } |
384 | 430 |
385 WTF::TextEncoding text_encoding(style_sheet.Contents()->Charset()); | 431 WTF::TextEncoding text_encoding(style_sheet.Contents()->Charset()); |
386 DCHECK(text_encoding.IsValid()); | 432 DCHECK(text_encoding.IsValid()); |
387 String text_string = css_text.ToString(); | 433 String text_string = css_text.ToString(); |
388 CString text = text_encoding.Encode( | 434 CString text = text_encoding.Encode( |
389 text_string, WTF::kCSSEncodedEntitiesForUnencodables); | 435 text_string, WTF::kCSSEncodedEntitiesForUnencodables); |
390 resources_->push_back( | 436 resources_->push_back( |
391 SerializedResource(url, String("text/css"), | 437 SerializedResource(url, String("text/css"), |
392 SharedBuffer::Create(text.data(), text.length()))); | 438 SharedBuffer::Create(text.data(), text.length()))); |
393 resource_urls_.insert(url); | |
394 } | 439 } |
395 | 440 |
396 // Sub resources need to be serialized even if the CSS definition doesn't | 441 // Sub resources need to be serialized even if the CSS definition doesn't |
397 // need to be. | 442 // need to be. |
398 for (unsigned i = 0; i < style_sheet.length(); ++i) | 443 for (unsigned i = 0; i < style_sheet.length(); ++i) |
399 SerializeCSSRule(style_sheet.item(i)); | 444 SerializeCSSRule(style_sheet.item(i)); |
400 | 445 |
401 if (css_start_time != 0) { | 446 if (css_start_time != 0) { |
402 is_serializing_css_ = false; | 447 is_serializing_css_ = false; |
403 DEFINE_STATIC_LOCAL(CustomCountHistogram, css_histogram, | 448 DEFINE_STATIC_LOCAL(CustomCountHistogram, css_histogram, |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
466 const KURL& url) { | 511 const KURL& url) { |
467 if (delegate_.ShouldSkipResource(has_cache_control_no_store_header)) | 512 if (delegate_.ShouldSkipResource(has_cache_control_no_store_header)) |
468 return; | 513 return; |
469 | 514 |
470 if (!data) { | 515 if (!data) { |
471 DLOG(ERROR) << "No data for resource " << url.GetString(); | 516 DLOG(ERROR) << "No data for resource " << url.GetString(); |
472 return; | 517 return; |
473 } | 518 } |
474 | 519 |
475 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); | 520 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); |
476 resource_urls_.insert(url); | |
477 } | 521 } |
478 | 522 |
479 void FrameSerializer::AddImageToResources(ImageResourceContent* image, | 523 void FrameSerializer::AddImageToResources(ImageResourceContent* image, |
480 const KURL& url) { | 524 const KURL& url) { |
481 if (!image || !image->HasImage() || image->ErrorOccurred() || | 525 if (!ShouldAddURL(url)) |
482 !ShouldAddURL(url)) | |
483 return; | 526 return; |
| 527 resource_urls_.insert(url); |
| 528 if (should_collect_problem_metric_) |
| 529 total_image_count_++; |
| 530 if (!image || !image->HasImage() || image->ErrorOccurred()) |
| 531 return; |
| 532 if (should_collect_problem_metric_ && image->IsLoaded()) |
| 533 loaded_image_count_++; |
484 | 534 |
485 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", | 535 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", |
486 "type", "image", "url", url.ElidedString().Utf8().data()); | 536 "type", "image", "url", url.ElidedString().Utf8().data()); |
487 double image_start_time = MonotonicallyIncreasingTime(); | 537 double image_start_time = MonotonicallyIncreasingTime(); |
488 | 538 |
489 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); | 539 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); |
490 AddToResources(image->GetResponse().MimeType(), | 540 AddToResources(image->GetResponse().MimeType(), |
491 image->HasCacheControlNoStoreHeader() | 541 image->HasCacheControlNoStoreHeader() |
492 ? kHasCacheControlNoStoreHeader | 542 ? kHasCacheControlNoStoreHeader |
493 : kNoCacheControlNoStoreHeader, | 543 : kNoCacheControlNoStoreHeader, |
494 data, url); | 544 data, url); |
495 | 545 |
496 // If we're already reporting time for CSS serialization don't report it for | 546 // If we're already reporting time for CSS serialization don't report it for |
497 // this image to avoid reporting the same time twice. | 547 // this image to avoid reporting the same time twice. |
498 if (!is_serializing_css_) { | 548 if (!is_serializing_css_) { |
499 DEFINE_STATIC_LOCAL(CustomCountHistogram, image_histogram, | 549 DEFINE_STATIC_LOCAL(CustomCountHistogram, image_histogram, |
500 ("PageSerialization.SerializationTime.ImageElement", 0, | 550 ("PageSerialization.SerializationTime.ImageElement", 0, |
501 maxSerializationTimeUmaMicroseconds, 50)); | 551 maxSerializationTimeUmaMicroseconds, 50)); |
502 image_histogram.Count(static_cast<int64_t>( | 552 image_histogram.Count(static_cast<int64_t>( |
503 (MonotonicallyIncreasingTime() - image_start_time) * | 553 (MonotonicallyIncreasingTime() - image_start_time) * |
504 secondsToMicroseconds)); | 554 secondsToMicroseconds)); |
505 } | 555 } |
506 } | 556 } |
507 | 557 |
508 void FrameSerializer::AddFontToResources(FontResource* font) { | 558 void FrameSerializer::AddFontToResources(FontResource* font) { |
509 if (!font || !font->IsLoaded() || !font->ResourceBuffer() || | 559 if (!font || !ShouldAddURL(font->Url())) |
510 !ShouldAddURL(font->Url())) | 560 return; |
| 561 resource_urls_.insert(font->Url()); |
| 562 if (!font || !font->IsLoaded() || !font->ResourceBuffer()) |
511 return; | 563 return; |
512 | 564 |
513 RefPtr<const SharedBuffer> data(font->ResourceBuffer()); | 565 RefPtr<const SharedBuffer> data(font->ResourceBuffer()); |
514 | 566 |
515 AddToResources(font->GetResponse().MimeType(), | 567 AddToResources(font->GetResponse().MimeType(), |
516 font->HasCacheControlNoStoreHeader() | 568 font->HasCacheControlNoStoreHeader() |
517 ? kHasCacheControlNoStoreHeader | 569 ? kHasCacheControlNoStoreHeader |
518 : kNoCacheControlNoStoreHeader, | 570 : kNoCacheControlNoStoreHeader, |
519 data, font->Url()); | 571 data, font->Url()); |
520 } | 572 } |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
579 emits_minus = ch == '-'; | 631 emits_minus = ch == '-'; |
580 builder.Append(ch); | 632 builder.Append(ch); |
581 } | 633 } |
582 CString escaped_url = builder.ToString().Ascii(); | 634 CString escaped_url = builder.ToString().Ascii(); |
583 return String::Format("saved from url=(%04d)%s", | 635 return String::Format("saved from url=(%04d)%s", |
584 static_cast<int>(escaped_url.length()), | 636 static_cast<int>(escaped_url.length()), |
585 escaped_url.data()); | 637 escaped_url.data()); |
586 } | 638 } |
587 | 639 |
588 } // namespace blink | 640 } // namespace blink |
OLD | NEW |