Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(52)

Side by Side Diff: third_party/WebKit/Source/core/frame/FrameSerializer.cpp

Issue 2886943003: [Offline Pages] Adding missing image/CSS detection in FrameSerializer. (Closed)
Patch Set: finally fixed. Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 260
261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS
262 // documents which leads to bugs like <https://crbug.com/251898>. Not being 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being
263 // able to rewrite URLs inside CSS documents means that resources imported from 263 // able to rewrite URLs inside CSS documents means that resources imported from
264 // url(...) statements in CSS might not work when rewriting links for the 264 // url(...) statements in CSS might not work when rewriting links for the
265 // "Webpage, Complete" method of saving a page. It will take some work but it 265 // "Webpage, Complete" method of saving a page. It will take some work but it
266 // needs to be done if we want to continue to support non-MHTML saved pages. 266 // needs to be done if we want to continue to support non-MHTML saved pages.
267 267
268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources,
269 Delegate& delegate) 269 Delegate& delegate)
270 : resources_(&resources), is_serializing_css_(false), delegate_(delegate) {} 270 : resources_(&resources),
271 is_serializing_css_(false),
272 delegate_(delegate),
273 total_image_count_(0),
274 loaded_image_count_(0),
275 total_css_count_(0),
276 loaded_css_count_(0),
277 should_collect_problem_metric_(false) {}
271 278
272 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { 279 void FrameSerializer::SerializeFrame(const LocalFrame& frame) {
273 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); 280 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame");
274 DCHECK(frame.GetDocument()); 281 DCHECK(frame.GetDocument());
275 Document& document = *frame.GetDocument(); 282 Document& document = *frame.GetDocument();
276 KURL url = document.Url(); 283 KURL url = document.Url();
277 284
278 // If frame is an image document, add the image and don't continue 285 // If frame is an image document, add the image and don't continue
279 if (document.IsImageDocument()) { 286 if (document.IsImageDocument()) {
280 ImageDocument& image_document = ToImageDocument(document); 287 ImageDocument& image_document = ToImageDocument(document);
(...skipping 11 matching lines...) Expand all
292 String text = 299 String text =
293 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); 300 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode);
294 301
295 CString frame_html = 302 CString frame_html =
296 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); 303 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables);
297 resources_->push_back(SerializedResource( 304 resources_->push_back(SerializedResource(
298 url, document.SuggestedMIMEType(), 305 url, document.SuggestedMIMEType(),
299 SharedBuffer::Create(frame_html.data(), frame_html.length()))); 306 SharedBuffer::Create(frame_html.data(), frame_html.length())));
300 } 307 }
301 308
309 should_collect_problem_metric_ =
310 delegate_.ShouldCollectProblemMetric() && frame.IsMainFrame();
302 for (Node* node : serialized_nodes) { 311 for (Node* node : serialized_nodes) {
303 DCHECK(node); 312 DCHECK(node);
304 if (!node->IsElementNode()) 313 if (!node->IsElementNode())
305 continue; 314 continue;
306 315
307 Element& element = ToElement(*node); 316 Element& element = ToElement(*node);
308 // We have to process in-line style as it might contain some resources 317 // We have to process in-line style as it might contain some resources
309 // (typically background images). 318 // (typically background images).
310 if (element.IsStyledElement()) { 319 if (element.IsStyledElement()) {
311 RetrieveResourcesForProperties(element.InlineStyle(), document); 320 RetrieveResourcesForProperties(element.InlineStyle(), document);
(...skipping 22 matching lines...) Expand all
334 KURL url = document.CompleteURL( 343 KURL url = document.CompleteURL(
335 link_element.getAttribute(HTMLNames::hrefAttr)); 344 link_element.getAttribute(HTMLNames::hrefAttr));
336 SerializeCSSStyleSheet(*sheet, url); 345 SerializeCSSStyleSheet(*sheet, url);
337 } 346 }
338 } else if (isHTMLStyleElement(element)) { 347 } else if (isHTMLStyleElement(element)) {
339 HTMLStyleElement& style_element = toHTMLStyleElement(element); 348 HTMLStyleElement& style_element = toHTMLStyleElement(element);
340 if (CSSStyleSheet* sheet = style_element.sheet()) 349 if (CSSStyleSheet* sheet = style_element.sheet())
341 SerializeCSSStyleSheet(*sheet, KURL()); 350 SerializeCSSStyleSheet(*sheet, KURL());
342 } 351 }
343 } 352 }
353 if (should_collect_problem_metric_) {
354 // Report detectors through UMA.
355 // We're having exact 21 buckets for percentage because we want to have 5%
356 // in each bucket to avoid potential spikes in the distribution.
357 UMA_HISTOGRAM_COUNTS_100(
358 "PageSerialization.ProblemDetection.TotalImageCount",
359 static_cast<int64_t>(total_image_count_));
360 if (total_image_count_ > 0) {
361 DCHECK_LE(loaded_image_count_, total_image_count_);
362 DEFINE_STATIC_LOCAL(
363 LinearHistogram, image_histogram,
364 ("PageSerialization.ProblemDetection.LoadedImagePercentage", 1, 100,
365 21));
366 image_histogram.Count(
367 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_));
368 }
369
370 UMA_HISTOGRAM_COUNTS_100("PageSerialization.ProblemDetection.TotalCSSCount",
371 static_cast<int64_t>(total_css_count_));
372 if (total_css_count_ > 0) {
373 DCHECK_LE(loaded_css_count_, total_css_count_);
374 DEFINE_STATIC_LOCAL(
375 LinearHistogram, css_histogram,
376 ("PageSerialization.ProblemDetection.LoadedCSSPercentage", 1, 100,
377 21));
378 css_histogram.Count(
379 static_cast<int64_t>(loaded_css_count_ * 100 / total_css_count_));
380 }
381 should_collect_problem_metric_ = false;
382 }
344 } 383 }
345 384
346 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, 385 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet,
347 const KURL& url) { 386 const KURL& url) {
348 // If the URL is invalid or if it is a data URL this means that this CSS is 387 // If the URL is invalid or if it is a data URL this means that this CSS is
349 // defined inline, respectively in a <style> tag or in the data URL itself. 388 // defined inline, respectively in a <style> tag or in the data URL itself.
350 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); 389 bool is_inline_css = !url.IsValid() || url.ProtocolIsData();
351 // If this CSS is not inline then it is identifiable by its URL. So just skip 390 // If this CSS is not inline then it is identifiable by its URL. So just skip
352 // it if it has already been analyzed before. 391 // it if it has already been analyzed before.
353 if (!is_inline_css && (resource_urls_.Contains(url) || 392 if (!is_inline_css && (resource_urls_.Contains(url) ||
354 delegate_.ShouldSkipResourceWithURL(url))) { 393 delegate_.ShouldSkipResourceWithURL(url))) {
355 return; 394 return;
356 } 395 }
396 if (!is_inline_css)
397 resource_urls_.insert(url);
398 if (should_collect_problem_metric_ && !is_inline_css) {
399 total_css_count_++;
400 if (style_sheet.LoadCompleted())
401 loaded_css_count_++;
402 }
357 403
358 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", 404 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet",
359 "type", "CSS", "url", url.ElidedString().Utf8().data()); 405 "type", "CSS", "url", url.ElidedString().Utf8().data());
360 // Only report UMA metric if this is not a reentrant CSS serialization call. 406 // Only report UMA metric if this is not a reentrant CSS serialization call.
361 double css_start_time = 0; 407 double css_start_time = 0;
362 if (!is_serializing_css_) { 408 if (!is_serializing_css_) {
363 is_serializing_css_ = true; 409 is_serializing_css_ = true;
364 css_start_time = MonotonicallyIncreasingTime(); 410 css_start_time = MonotonicallyIncreasingTime();
365 } 411 }
366 412
(...skipping 16 matching lines...) Expand all
383 } 429 }
384 430
385 WTF::TextEncoding text_encoding(style_sheet.Contents()->Charset()); 431 WTF::TextEncoding text_encoding(style_sheet.Contents()->Charset());
386 DCHECK(text_encoding.IsValid()); 432 DCHECK(text_encoding.IsValid());
387 String text_string = css_text.ToString(); 433 String text_string = css_text.ToString();
388 CString text = text_encoding.Encode( 434 CString text = text_encoding.Encode(
389 text_string, WTF::kCSSEncodedEntitiesForUnencodables); 435 text_string, WTF::kCSSEncodedEntitiesForUnencodables);
390 resources_->push_back( 436 resources_->push_back(
391 SerializedResource(url, String("text/css"), 437 SerializedResource(url, String("text/css"),
392 SharedBuffer::Create(text.data(), text.length()))); 438 SharedBuffer::Create(text.data(), text.length())));
393 resource_urls_.insert(url);
394 } 439 }
395 440
396 // Sub resources need to be serialized even if the CSS definition doesn't 441 // Sub resources need to be serialized even if the CSS definition doesn't
397 // need to be. 442 // need to be.
398 for (unsigned i = 0; i < style_sheet.length(); ++i) 443 for (unsigned i = 0; i < style_sheet.length(); ++i)
399 SerializeCSSRule(style_sheet.item(i)); 444 SerializeCSSRule(style_sheet.item(i));
400 445
401 if (css_start_time != 0) { 446 if (css_start_time != 0) {
402 is_serializing_css_ = false; 447 is_serializing_css_ = false;
403 DEFINE_STATIC_LOCAL(CustomCountHistogram, css_histogram, 448 DEFINE_STATIC_LOCAL(CustomCountHistogram, css_histogram,
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
466 const KURL& url) { 511 const KURL& url) {
467 if (delegate_.ShouldSkipResource(has_cache_control_no_store_header)) 512 if (delegate_.ShouldSkipResource(has_cache_control_no_store_header))
468 return; 513 return;
469 514
470 if (!data) { 515 if (!data) {
471 DLOG(ERROR) << "No data for resource " << url.GetString(); 516 DLOG(ERROR) << "No data for resource " << url.GetString();
472 return; 517 return;
473 } 518 }
474 519
475 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); 520 resources_->push_back(SerializedResource(url, mime_type, std::move(data)));
476 resource_urls_.insert(url);
477 } 521 }
478 522
479 void FrameSerializer::AddImageToResources(ImageResourceContent* image, 523 void FrameSerializer::AddImageToResources(ImageResourceContent* image,
480 const KURL& url) { 524 const KURL& url) {
481 if (!image || !image->HasImage() || image->ErrorOccurred() || 525 if (!ShouldAddURL(url))
482 !ShouldAddURL(url))
483 return; 526 return;
527 resource_urls_.insert(url);
528 if (should_collect_problem_metric_)
529 total_image_count_++;
530 if (!image || !image->HasImage() || image->ErrorOccurred())
531 return;
532 if (should_collect_problem_metric_ && image->IsLoaded())
533 loaded_image_count_++;
484 534
485 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", 535 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources",
486 "type", "image", "url", url.ElidedString().Utf8().data()); 536 "type", "image", "url", url.ElidedString().Utf8().data());
487 double image_start_time = MonotonicallyIncreasingTime(); 537 double image_start_time = MonotonicallyIncreasingTime();
488 538
489 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); 539 RefPtr<const SharedBuffer> data = image->GetImage()->Data();
490 AddToResources(image->GetResponse().MimeType(), 540 AddToResources(image->GetResponse().MimeType(),
491 image->HasCacheControlNoStoreHeader() 541 image->HasCacheControlNoStoreHeader()
492 ? kHasCacheControlNoStoreHeader 542 ? kHasCacheControlNoStoreHeader
493 : kNoCacheControlNoStoreHeader, 543 : kNoCacheControlNoStoreHeader,
494 data, url); 544 data, url);
495 545
496 // If we're already reporting time for CSS serialization don't report it for 546 // If we're already reporting time for CSS serialization don't report it for
497 // this image to avoid reporting the same time twice. 547 // this image to avoid reporting the same time twice.
498 if (!is_serializing_css_) { 548 if (!is_serializing_css_) {
499 DEFINE_STATIC_LOCAL(CustomCountHistogram, image_histogram, 549 DEFINE_STATIC_LOCAL(CustomCountHistogram, image_histogram,
500 ("PageSerialization.SerializationTime.ImageElement", 0, 550 ("PageSerialization.SerializationTime.ImageElement", 0,
501 maxSerializationTimeUmaMicroseconds, 50)); 551 maxSerializationTimeUmaMicroseconds, 50));
502 image_histogram.Count(static_cast<int64_t>( 552 image_histogram.Count(static_cast<int64_t>(
503 (MonotonicallyIncreasingTime() - image_start_time) * 553 (MonotonicallyIncreasingTime() - image_start_time) *
504 secondsToMicroseconds)); 554 secondsToMicroseconds));
505 } 555 }
506 } 556 }
507 557
508 void FrameSerializer::AddFontToResources(FontResource* font) { 558 void FrameSerializer::AddFontToResources(FontResource* font) {
509 if (!font || !font->IsLoaded() || !font->ResourceBuffer() || 559 if (!font || !ShouldAddURL(font->Url()))
510 !ShouldAddURL(font->Url())) 560 return;
561 resource_urls_.insert(font->Url());
562 if (!font || !font->IsLoaded() || !font->ResourceBuffer())
511 return; 563 return;
512 564
513 RefPtr<const SharedBuffer> data(font->ResourceBuffer()); 565 RefPtr<const SharedBuffer> data(font->ResourceBuffer());
514 566
515 AddToResources(font->GetResponse().MimeType(), 567 AddToResources(font->GetResponse().MimeType(),
516 font->HasCacheControlNoStoreHeader() 568 font->HasCacheControlNoStoreHeader()
517 ? kHasCacheControlNoStoreHeader 569 ? kHasCacheControlNoStoreHeader
518 : kNoCacheControlNoStoreHeader, 570 : kNoCacheControlNoStoreHeader,
519 data, font->Url()); 571 data, font->Url());
520 } 572 }
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
579 emits_minus = ch == '-'; 631 emits_minus = ch == '-';
580 builder.Append(ch); 632 builder.Append(ch);
581 } 633 }
582 CString escaped_url = builder.ToString().Ascii(); 634 CString escaped_url = builder.ToString().Ascii();
583 return String::Format("saved from url=(%04d)%s", 635 return String::Format("saved from url=(%04d)%s",
584 static_cast<int>(escaped_url.length()), 636 static_cast<int>(escaped_url.length()),
585 escaped_url.data()); 637 escaped_url.data());
586 } 638 }
587 639
588 } // namespace blink 640 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/frame/FrameSerializer.h ('k') | third_party/WebKit/Source/platform/Histogram.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698