Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(164)

Side by Side Diff: third_party/WebKit/Source/core/frame/FrameSerializer.cpp

Issue 2886943003: [Offline Pages] Adding missing image/CSS detection in FrameSerializer. (Closed)
Patch Set: more comments. Created 3 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Google Inc. All rights reserved. 2 * Copyright (C) 2011 Google Inc. All rights reserved.
3 * 3 *
4 * Redistribution and use in source and binary forms, with or without 4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are 5 * modification, are permitted provided that the following conditions are
6 * met: 6 * met:
7 * 7 *
8 * * Redistributions of source code must retain the above copyright 8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above 10 * * Redistributions in binary form must reproduce the above
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after
260 260
261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS
262 // documents which leads to bugs like <https://crbug.com/251898>. Not being 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being
263 // able to rewrite URLs inside CSS documents means that resources imported from 263 // able to rewrite URLs inside CSS documents means that resources imported from
264 // url(...) statements in CSS might not work when rewriting links for the 264 // url(...) statements in CSS might not work when rewriting links for the
265 // "Webpage, Complete" method of saving a page. It will take some work but it 265 // "Webpage, Complete" method of saving a page. It will take some work but it
266 // needs to be done if we want to continue to support non-MHTML saved pages. 266 // needs to be done if we want to continue to support non-MHTML saved pages.
267 267
268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources,
269 Delegate& delegate) 269 Delegate& delegate)
270 : resources_(&resources), is_serializing_css_(false), delegate_(delegate) {} 270 : resources_(&resources),
271 is_serializing_css_(false),
272 delegate_(delegate),
273 total_image_count_(0),
274 loaded_image_count_(0),
275 total_css_count_(0),
276 loaded_css_count_(0),
277 should_collect_problem_metric_(false) {}
271 278
272 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { 279 void FrameSerializer::SerializeFrame(const LocalFrame& frame) {
273 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); 280 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame");
274 DCHECK(frame.GetDocument()); 281 DCHECK(frame.GetDocument());
275 Document& document = *frame.GetDocument(); 282 Document& document = *frame.GetDocument();
276 KURL url = document.Url(); 283 KURL url = document.Url();
277 284
278 // If frame is an image document, add the image and don't continue 285 // If frame is an image document, add the image and don't continue
279 if (document.IsImageDocument()) { 286 if (document.IsImageDocument()) {
280 ImageDocument& image_document = ToImageDocument(document); 287 ImageDocument& image_document = ToImageDocument(document);
(...skipping 11 matching lines...) Expand all
292 String text = 299 String text =
293 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); 300 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode);
294 301
295 CString frame_html = 302 CString frame_html =
296 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); 303 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables);
297 resources_->push_back(SerializedResource( 304 resources_->push_back(SerializedResource(
298 url, document.SuggestedMIMEType(), 305 url, document.SuggestedMIMEType(),
299 SharedBuffer::Create(frame_html.data(), frame_html.length()))); 306 SharedBuffer::Create(frame_html.data(), frame_html.length())));
300 } 307 }
301 308
309 should_collect_problem_metric_ =
310 delegate_.ShouldCollectProblemMetric() && frame.IsMainFrame();
302 for (Node* node : serialized_nodes) { 311 for (Node* node : serialized_nodes) {
303 DCHECK(node); 312 DCHECK(node);
304 if (!node->IsElementNode()) 313 if (!node->IsElementNode())
305 continue; 314 continue;
306 315
307 Element& element = ToElement(*node); 316 Element& element = ToElement(*node);
308 // We have to process in-line style as it might contain some resources 317 // We have to process in-line style as it might contain some resources
309 // (typically background images). 318 // (typically background images).
310 if (element.IsStyledElement()) { 319 if (element.IsStyledElement()) {
311 RetrieveResourcesForProperties(element.InlineStyle(), document); 320 RetrieveResourcesForProperties(element.InlineStyle(), document);
(...skipping 18 matching lines...) Expand all
330 } 339 }
331 } else if (isHTMLLinkElement(element)) { 340 } else if (isHTMLLinkElement(element)) {
332 HTMLLinkElement& link_element = toHTMLLinkElement(element); 341 HTMLLinkElement& link_element = toHTMLLinkElement(element);
333 if (CSSStyleSheet* sheet = link_element.sheet()) { 342 if (CSSStyleSheet* sheet = link_element.sheet()) {
334 KURL url = document.CompleteURL( 343 KURL url = document.CompleteURL(
335 link_element.getAttribute(HTMLNames::hrefAttr)); 344 link_element.getAttribute(HTMLNames::hrefAttr));
336 SerializeCSSStyleSheet(*sheet, url); 345 SerializeCSSStyleSheet(*sheet, url);
337 } 346 }
338 } else if (isHTMLStyleElement(element)) { 347 } else if (isHTMLStyleElement(element)) {
339 HTMLStyleElement& style_element = toHTMLStyleElement(element); 348 HTMLStyleElement& style_element = toHTMLStyleElement(element);
340 if (CSSStyleSheet* sheet = style_element.sheet()) 349 CSSStyleSheet* sheet = style_element.sheet();
350 if (sheet)
341 SerializeCSSStyleSheet(*sheet, KURL()); 351 SerializeCSSStyleSheet(*sheet, KURL());
342 } 352 }
343 } 353 }
354 if (should_collect_problem_metric_) {
355 // Report detectors through UMA.
356 // We're having exact 21 buckets for percentage because we want to have 5%
357 // in each bucket to avoid potential spikes in the distribution.
358 DCHECK_LE(loaded_image_count_, total_image_count_);
359 DEFINE_STATIC_LOCAL(
360 CustomCountHistogram, image_histogram,
361 ("PageSerialization.ProblemDetection.LoadedImagePercentage", 1, 100,
362 21));
363 image_histogram.Count(
364 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_));
365 UMA_HISTOGRAM_COUNTS_100(
366 "PageSerialization.ProblemDetection.TotalImageCount",
367 static_cast<int64_t>(total_image_count_));
368
369 DCHECK_LE(loaded_css_count_, total_css_count_);
370 DEFINE_STATIC_LOCAL(
371 CustomCountHistogram, css_histogram,
372 ("PageSerialization.ProblemDetection.LoadedCSSPercentage", 1, 100, 21));
373 css_histogram.Count(
374 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_));
375 UMA_HISTOGRAM_COUNTS_100("PageSerialization.ProblemDetection.TotalCSSCount",
376 static_cast<int64_t>(total_css_count_));
377 should_collect_problem_metric_ = false;
378 }
344 } 379 }
345 380
346 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, 381 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet,
347 const KURL& url) { 382 const KURL& url) {
348 // If the URL is invalid or if it is a data URL this means that this CSS is 383 // If the URL is invalid or if it is a data URL this means that this CSS is
349 // defined inline, respectively in a <style> tag or in the data URL itself. 384 // defined inline, respectively in a <style> tag or in the data URL itself.
350 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); 385 bool is_inline_css = !url.IsValid() || url.ProtocolIsData();
351 // If this CSS is not inline then it is identifiable by its URL. So just skip 386 // If this CSS is not inline then it is identifiable by its URL. So just skip
352 // it if it has already been analyzed before. 387 // it if it has already been analyzed before.
353 if (!is_inline_css && (resource_urls_.Contains(url) || 388 if (!is_inline_css && (resource_urls_.Contains(url) ||
354 delegate_.ShouldSkipResourceWithURL(url))) { 389 delegate_.ShouldSkipResourceWithURL(url))) {
355 return; 390 return;
356 } 391 }
392 if (should_collect_problem_metric_) {
Łukasz Anforowicz 2017/06/08 20:48:54 Should this say if (should_collect_prolem_metric
romax 2017/06/08 23:30:46 Done.
393 total_css_count_++;
394 if (style_sheet.LoadCompleted())
395 loaded_css_count_++;
396 }
357 397
358 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", 398 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet",
359 "type", "CSS", "url", url.ElidedString().Utf8().data()); 399 "type", "CSS", "url", url.ElidedString().Utf8().data());
360 // Only report UMA metric if this is not a reentrant CSS serialization call. 400 // Only report UMA metric if this is not a reentrant CSS serialization call.
361 double css_start_time = 0; 401 double css_start_time = 0;
362 if (!is_serializing_css_) { 402 if (!is_serializing_css_) {
363 is_serializing_css_ = true; 403 is_serializing_css_ = true;
364 css_start_time = MonotonicallyIncreasingTime(); 404 css_start_time = MonotonicallyIncreasingTime();
365 } 405 }
366 406
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after
471 DLOG(ERROR) << "No data for resource " << url.GetString(); 511 DLOG(ERROR) << "No data for resource " << url.GetString();
472 return; 512 return;
473 } 513 }
474 514
475 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); 515 resources_->push_back(SerializedResource(url, mime_type, std::move(data)));
476 resource_urls_.insert(url); 516 resource_urls_.insert(url);
477 } 517 }
478 518
479 void FrameSerializer::AddImageToResources(ImageResourceContent* image, 519 void FrameSerializer::AddImageToResources(ImageResourceContent* image,
480 const KURL& url) { 520 const KURL& url) {
481 if (!image || !image->HasImage() || image->ErrorOccurred() || 521 if (!ShouldAddURL(url))
482 !ShouldAddURL(url))
483 return; 522 return;
523 if (should_collect_problem_metric_)
524 total_image_count_++;
525 if (!image || !image->HasImage() || image->ErrorOccurred())
526 return;
527 if (should_collect_problem_metric_ && image->IsLoaded())
528 loaded_image_count_++;
484 529
485 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", 530 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources",
486 "type", "image", "url", url.ElidedString().Utf8().data()); 531 "type", "image", "url", url.ElidedString().Utf8().data());
487 double image_start_time = MonotonicallyIncreasingTime(); 532 double image_start_time = MonotonicallyIncreasingTime();
488 533
489 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); 534 RefPtr<const SharedBuffer> data = image->GetImage()->Data();
490 AddToResources(image->GetResponse().MimeType(), 535 AddToResources(image->GetResponse().MimeType(),
491 image->HasCacheControlNoStoreHeader() 536 image->HasCacheControlNoStoreHeader()
492 ? kHasCacheControlNoStoreHeader 537 ? kHasCacheControlNoStoreHeader
493 : kNoCacheControlNoStoreHeader, 538 : kNoCacheControlNoStoreHeader,
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
579 emits_minus = ch == '-'; 624 emits_minus = ch == '-';
580 builder.Append(ch); 625 builder.Append(ch);
581 } 626 }
582 CString escaped_url = builder.ToString().Ascii(); 627 CString escaped_url = builder.ToString().Ascii();
583 return String::Format("saved from url=(%04d)%s", 628 return String::Format("saved from url=(%04d)%s",
584 static_cast<int>(escaped_url.length()), 629 static_cast<int>(escaped_url.length()),
585 escaped_url.data()); 630 escaped_url.data());
586 } 631 }
587 632
588 } // namespace blink 633 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/frame/FrameSerializer.h ('k') | third_party/WebKit/Source/web/WebFrameSerializer.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698