OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright (C) 2011 Google Inc. All rights reserved. | 2 * Copyright (C) 2011 Google Inc. All rights reserved. |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions are | 5 * modification, are permitted provided that the following conditions are |
6 * met: | 6 * met: |
7 * | 7 * |
8 * * Redistributions of source code must retain the above copyright | 8 * * Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * * Redistributions in binary form must reproduce the above | 10 * * Redistributions in binary form must reproduce the above |
(...skipping 249 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
260 | 260 |
261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS | 261 // TODO(tiger): Right now there is no support for rewriting URLs inside CSS |
262 // documents which leads to bugs like <https://crbug.com/251898>. Not being | 262 // documents which leads to bugs like <https://crbug.com/251898>. Not being |
263 // able to rewrite URLs inside CSS documents means that resources imported from | 263 // able to rewrite URLs inside CSS documents means that resources imported from |
264 // url(...) statements in CSS might not work when rewriting links for the | 264 // url(...) statements in CSS might not work when rewriting links for the |
265 // "Webpage, Complete" method of saving a page. It will take some work but it | 265 // "Webpage, Complete" method of saving a page. It will take some work but it |
266 // needs to be done if we want to continue to support non-MHTML saved pages. | 266 // needs to be done if we want to continue to support non-MHTML saved pages. |
267 | 267 |
268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, | 268 FrameSerializer::FrameSerializer(Deque<SerializedResource>& resources, |
269 Delegate& delegate) | 269 Delegate& delegate) |
270 : resources_(&resources), is_serializing_css_(false), delegate_(delegate) {} | 270 : resources_(&resources), |
271 is_serializing_css_(false), | |
272 delegate_(delegate), | |
273 total_image_count_(0), | |
274 loaded_image_count_(0), | |
275 total_css_count_(0), | |
276 loaded_css_count_(0), | |
277 should_collect_problem_metric_(false) {} | |
271 | 278 |
272 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { | 279 void FrameSerializer::SerializeFrame(const LocalFrame& frame) { |
273 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); | 280 TRACE_EVENT0("page-serialization", "FrameSerializer::serializeFrame"); |
274 DCHECK(frame.GetDocument()); | 281 DCHECK(frame.GetDocument()); |
275 Document& document = *frame.GetDocument(); | 282 Document& document = *frame.GetDocument(); |
276 KURL url = document.Url(); | 283 KURL url = document.Url(); |
277 | 284 |
278 // If frame is an image document, add the image and don't continue | 285 // If frame is an image document, add the image and don't continue |
279 if (document.IsImageDocument()) { | 286 if (document.IsImageDocument()) { |
280 ImageDocument& image_document = ToImageDocument(document); | 287 ImageDocument& image_document = ToImageDocument(document); |
(...skipping 11 matching lines...) Expand all Loading... | |
292 String text = | 299 String text = |
293 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); | 300 SerializeNodes<EditingStrategy>(accumulator, document, kIncludeNode); |
294 | 301 |
295 CString frame_html = | 302 CString frame_html = |
296 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); | 303 document.Encoding().Encode(text, WTF::kEntitiesForUnencodables); |
297 resources_->push_back(SerializedResource( | 304 resources_->push_back(SerializedResource( |
298 url, document.SuggestedMIMEType(), | 305 url, document.SuggestedMIMEType(), |
299 SharedBuffer::Create(frame_html.data(), frame_html.length()))); | 306 SharedBuffer::Create(frame_html.data(), frame_html.length()))); |
300 } | 307 } |
301 | 308 |
309 should_collect_problem_metric_ = | |
310 delegate_.ShouldCollectProblemMetric() && frame.IsMainFrame(); | |
302 for (Node* node : serialized_nodes) { | 311 for (Node* node : serialized_nodes) { |
303 DCHECK(node); | 312 DCHECK(node); |
304 if (!node->IsElementNode()) | 313 if (!node->IsElementNode()) |
305 continue; | 314 continue; |
306 | 315 |
307 Element& element = ToElement(*node); | 316 Element& element = ToElement(*node); |
308 // We have to process in-line style as it might contain some resources | 317 // We have to process in-line style as it might contain some resources |
309 // (typically background images). | 318 // (typically background images). |
310 if (element.IsStyledElement()) { | 319 if (element.IsStyledElement()) { |
311 RetrieveResourcesForProperties(element.InlineStyle(), document); | 320 RetrieveResourcesForProperties(element.InlineStyle(), document); |
(...skipping 18 matching lines...) Expand all Loading... | |
330 } | 339 } |
331 } else if (isHTMLLinkElement(element)) { | 340 } else if (isHTMLLinkElement(element)) { |
332 HTMLLinkElement& link_element = toHTMLLinkElement(element); | 341 HTMLLinkElement& link_element = toHTMLLinkElement(element); |
333 if (CSSStyleSheet* sheet = link_element.sheet()) { | 342 if (CSSStyleSheet* sheet = link_element.sheet()) { |
334 KURL url = document.CompleteURL( | 343 KURL url = document.CompleteURL( |
335 link_element.getAttribute(HTMLNames::hrefAttr)); | 344 link_element.getAttribute(HTMLNames::hrefAttr)); |
336 SerializeCSSStyleSheet(*sheet, url); | 345 SerializeCSSStyleSheet(*sheet, url); |
337 } | 346 } |
338 } else if (isHTMLStyleElement(element)) { | 347 } else if (isHTMLStyleElement(element)) { |
339 HTMLStyleElement& style_element = toHTMLStyleElement(element); | 348 HTMLStyleElement& style_element = toHTMLStyleElement(element); |
340 if (CSSStyleSheet* sheet = style_element.sheet()) | 349 CSSStyleSheet* sheet = style_element.sheet(); |
350 if (sheet) | |
341 SerializeCSSStyleSheet(*sheet, KURL()); | 351 SerializeCSSStyleSheet(*sheet, KURL()); |
342 } | 352 } |
343 } | 353 } |
354 if (should_collect_problem_metric_) { | |
355 // Report detectors through UMA. | |
356 // We're having exact 21 buckets for percentage because we want to have 5% | |
357 // in each bucket to avoid potential spikes in the distribution. | |
358 DCHECK_LE(loaded_image_count_, total_image_count_); | |
359 DEFINE_STATIC_LOCAL( | |
360 CustomCountHistogram, image_histogram, | |
361 ("PageSerialization.ProblemDetection.LoadedImagePercentage", 1, 100, | |
362 21)); | |
363 image_histogram.Count( | |
364 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_)); | |
365 UMA_HISTOGRAM_COUNTS_100( | |
366 "PageSerialization.ProblemDetection.TotalImageCount", | |
367 static_cast<int64_t>(total_image_count_)); | |
368 | |
369 DCHECK_LE(loaded_css_count_, total_css_count_); | |
370 DEFINE_STATIC_LOCAL( | |
371 CustomCountHistogram, css_histogram, | |
372 ("PageSerialization.ProblemDetection.LoadedCSSPercentage", 1, 100, 21)); | |
373 css_histogram.Count( | |
374 static_cast<int64_t>(loaded_image_count_ * 100 / total_image_count_)); | |
375 UMA_HISTOGRAM_COUNTS_100("PageSerialization.ProblemDetection.TotalCSSCount", | |
376 static_cast<int64_t>(total_css_count_)); | |
377 should_collect_problem_metric_ = false; | |
378 } | |
344 } | 379 } |
345 | 380 |
346 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, | 381 void FrameSerializer::SerializeCSSStyleSheet(CSSStyleSheet& style_sheet, |
347 const KURL& url) { | 382 const KURL& url) { |
348 // If the URL is invalid or if it is a data URL this means that this CSS is | 383 // If the URL is invalid or if it is a data URL this means that this CSS is |
349 // defined inline, respectively in a <style> tag or in the data URL itself. | 384 // defined inline, respectively in a <style> tag or in the data URL itself. |
350 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); | 385 bool is_inline_css = !url.IsValid() || url.ProtocolIsData(); |
386 if (should_collect_problem_metric_) | |
387 total_css_count_++; | |
351 // If this CSS is not inline then it is identifiable by its URL. So just skip | 388 // If this CSS is not inline then it is identifiable by its URL. So just skip |
352 // it if it has already been analyzed before. | 389 // it if it has already been analyzed before. |
353 if (!is_inline_css && (resource_urls_.Contains(url) || | 390 if (!is_inline_css && (resource_urls_.Contains(url) || |
354 delegate_.ShouldSkipResourceWithURL(url))) { | 391 delegate_.ShouldSkipResourceWithURL(url))) { |
355 return; | 392 return; |
356 } | 393 } |
394 if (should_collect_problem_metric_ && style_sheet.LoadCompleted()) | |
395 loaded_css_count_++; | |
357 | 396 |
358 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", | 397 TRACE_EVENT2("page-serialization", "FrameSerializer::serializeCSSStyleSheet", |
359 "type", "CSS", "url", url.ElidedString().Utf8().data()); | 398 "type", "CSS", "url", url.ElidedString().Utf8().data()); |
360 // Only report UMA metric if this is not a reentrant CSS serialization call. | 399 // Only report UMA metric if this is not a reentrant CSS serialization call. |
361 double css_start_time = 0; | 400 double css_start_time = 0; |
362 if (!is_serializing_css_) { | 401 if (!is_serializing_css_) { |
363 is_serializing_css_ = true; | 402 is_serializing_css_ = true; |
364 css_start_time = MonotonicallyIncreasingTime(); | 403 css_start_time = MonotonicallyIncreasingTime(); |
365 } | 404 } |
366 | 405 |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
471 DLOG(ERROR) << "No data for resource " << url.GetString(); | 510 DLOG(ERROR) << "No data for resource " << url.GetString(); |
472 return; | 511 return; |
473 } | 512 } |
474 | 513 |
475 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); | 514 resources_->push_back(SerializedResource(url, mime_type, std::move(data))); |
476 resource_urls_.insert(url); | 515 resource_urls_.insert(url); |
477 } | 516 } |
478 | 517 |
479 void FrameSerializer::AddImageToResources(ImageResourceContent* image, | 518 void FrameSerializer::AddImageToResources(ImageResourceContent* image, |
480 const KURL& url) { | 519 const KURL& url) { |
520 if (should_collect_problem_metric_) | |
521 total_image_count_++; | |
Łukasz Anforowicz
2017/06/08 16:08:45
I wonder how the data will be calculated in case o
romax
2017/06/08 20:02:07
Sorry for lacking knowledge about processing HTMLs
| |
481 if (!image || !image->HasImage() || image->ErrorOccurred() || | 522 if (!image || !image->HasImage() || image->ErrorOccurred() || |
482 !ShouldAddURL(url)) | 523 !ShouldAddURL(url)) |
483 return; | 524 return; |
525 if (should_collect_problem_metric_ && image->IsLoaded()) | |
526 loaded_image_count_++; | |
484 | 527 |
485 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", | 528 TRACE_EVENT2("page-serialization", "FrameSerializer::addImageToResources", |
486 "type", "image", "url", url.ElidedString().Utf8().data()); | 529 "type", "image", "url", url.ElidedString().Utf8().data()); |
487 double image_start_time = MonotonicallyIncreasingTime(); | 530 double image_start_time = MonotonicallyIncreasingTime(); |
488 | 531 |
489 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); | 532 RefPtr<const SharedBuffer> data = image->GetImage()->Data(); |
490 AddToResources(image->GetResponse().MimeType(), | 533 AddToResources(image->GetResponse().MimeType(), |
491 image->HasCacheControlNoStoreHeader() | 534 image->HasCacheControlNoStoreHeader() |
492 ? kHasCacheControlNoStoreHeader | 535 ? kHasCacheControlNoStoreHeader |
493 : kNoCacheControlNoStoreHeader, | 536 : kNoCacheControlNoStoreHeader, |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
579 emits_minus = ch == '-'; | 622 emits_minus = ch == '-'; |
580 builder.Append(ch); | 623 builder.Append(ch); |
581 } | 624 } |
582 CString escaped_url = builder.ToString().Ascii(); | 625 CString escaped_url = builder.ToString().Ascii(); |
583 return String::Format("saved from url=(%04d)%s", | 626 return String::Format("saved from url=(%04d)%s", |
584 static_cast<int>(escaped_url.length()), | 627 static_cast<int>(escaped_url.length()), |
585 escaped_url.data()); | 628 escaped_url.data()); |
586 } | 629 } |
587 | 630 |
588 } // namespace blink | 631 } // namespace blink |
OLD | NEW |