| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "DocumentStatisticsCollector.h" | 5 #include "DocumentStatisticsCollector.h" |
| 6 | 6 |
| 7 #include "core/HTMLNames.h" | 7 #include "core/HTMLNames.h" |
| 8 #include "core/InputTypeNames.h" | 8 #include "core/InputTypeNames.h" |
| 9 #include "core/dom/ElementTraversal.h" | 9 #include "core/dom/ElementTraversal.h" |
| 10 #include "core/dom/NodeComputedStyle.h" | 10 #include "core/dom/NodeComputedStyle.h" |
| (...skipping 10 matching lines...) Expand all Loading... |
| 21 | 21 |
| 22 namespace blink { | 22 namespace blink { |
| 23 | 23 |
| 24 using namespace HTMLNames; | 24 using namespace HTMLNames; |
| 25 | 25 |
| 26 namespace { | 26 namespace { |
| 27 | 27 |
| 28 // Saturate the length of a paragraph to save time. | 28 // Saturate the length of a paragraph to save time. |
| 29 const int kTextContentLengthSaturation = 1000; | 29 const int kTextContentLengthSaturation = 1000; |
| 30 | 30 |
| 31 // Filter out short P elements. The threshold is set to around 2 English sentenc
es. | 31 // Filter out short P elements. The threshold is set to around 2 English |
| 32 // sentences. |
| 32 const unsigned kParagraphLengthThreshold = 140; | 33 const unsigned kParagraphLengthThreshold = 140; |
| 33 | 34 |
| 34 // Saturate the scores to save time. The max is the score of 6 long paragraphs. | 35 // Saturate the scores to save time. The max is the score of 6 long paragraphs. |
| 35 const double kMozScoreSaturation = | 36 // 6 * sqrt(kTextContentLengthSaturation - kParagraphLengthThreshold) |
| 36 175.954539583; // 6 * sqrt(kTextContentLengthSaturation - kParagraphLengthT
hreshold) | 37 const double kMozScoreSaturation = 175.954539583; |
| 37 const double kMozScoreAllSqrtSaturation = | 38 // 6 * sqrt(kTextContentLengthSaturation); |
| 38 189.73665961; // 6 * sqrt(kTextContentLengthSaturation); | 39 const double kMozScoreAllSqrtSaturation = 189.73665961; |
| 39 const double kMozScoreAllLinearSaturation = 6 * kTextContentLengthSaturation; | 40 const double kMozScoreAllLinearSaturation = 6 * kTextContentLengthSaturation; |
| 40 | 41 |
| 41 unsigned textContentLengthSaturated(const Element& root) { | 42 unsigned textContentLengthSaturated(const Element& root) { |
| 42 unsigned length = 0; | 43 unsigned length = 0; |
| 43 // This skips shadow DOM intentionally, to match the JavaScript implementation
. | 44 // This skips shadow DOM intentionally, to match the JavaScript |
| 44 // We would like to use the same statistics extracted by the JavaScript implem
entation | 45 // implementation. We would like to use the same statistics extracted by the |
| 45 // on iOS, and JavaScript cannot peek deeply into shadow DOM except on modern
Chrome | 46 // JavaScript implementation on iOS, and JavaScript cannot peek deeply into |
| 46 // versions. | 47 // shadow DOM except on modern Chrome versions. |
| 47 // Given shadow DOM rarely appears in <P> elements in long-form articles, the
overall | 48 // Given shadow DOM rarely appears in <P> elements in long-form articles, the |
| 48 // accuracy should not be largely affected. | 49 // overall accuracy should not be largely affected. |
| 49 for (Node& node : NodeTraversal::inclusiveDescendantsOf(root)) { | 50 for (Node& node : NodeTraversal::inclusiveDescendantsOf(root)) { |
| 50 if (!node.isTextNode()) { | 51 if (!node.isTextNode()) { |
| 51 continue; | 52 continue; |
| 52 } | 53 } |
| 53 length += toText(node).length(); | 54 length += toText(node).length(); |
| 54 if (length > kTextContentLengthSaturation) { | 55 if (length > kTextContentLengthSaturation) { |
| 55 return kTextContentLengthSaturation; | 56 return kTextContentLengthSaturation; |
| 56 } | 57 } |
| 57 } | 58 } |
| 58 return length; | 59 return length; |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 201 HTMLElement* body = document.body(); | 202 HTMLElement* body = document.body(); |
| 202 HTMLElement* head = document.head(); | 203 HTMLElement* head = document.head(); |
| 203 | 204 |
| 204 if (!body || !head) | 205 if (!body || !head) |
| 205 return features; | 206 return features; |
| 206 | 207 |
| 207 features.isMobileFriendly = isMobileFriendly(document); | 208 features.isMobileFriendly = isMobileFriendly(document); |
| 208 | 209 |
| 209 double startTime = monotonicallyIncreasingTime(); | 210 double startTime = monotonicallyIncreasingTime(); |
| 210 | 211 |
| 211 // This should be cheap since collectStatistics is only called right after lay
out. | 212 // This should be cheap since collectStatistics is only called right after |
| 213 // layout. |
| 212 document.updateStyleAndLayoutTree(); | 214 document.updateStyleAndLayoutTree(); |
| 213 | 215 |
| 214 // Traverse the DOM tree and collect statistics. | 216 // Traverse the DOM tree and collect statistics. |
| 215 collectFeatures(*body, features); | 217 collectFeatures(*body, features); |
| 216 features.openGraph = hasOpenGraphArticle(*head); | 218 features.openGraph = hasOpenGraphArticle(*head); |
| 217 | 219 |
| 218 double elapsedTime = monotonicallyIncreasingTime() - startTime; | 220 double elapsedTime = monotonicallyIncreasingTime() - startTime; |
| 219 | 221 |
| 220 DEFINE_STATIC_LOCAL(CustomCountHistogram, distillabilityHistogram, | 222 DEFINE_STATIC_LOCAL(CustomCountHistogram, distillabilityHistogram, |
| 221 ("WebCore.DistillabilityUs", 1, 1000000, 50)); | 223 ("WebCore.DistillabilityUs", 1, 1000000, 50)); |
| 222 distillabilityHistogram.count(static_cast<int>(1e6 * elapsedTime)); | 224 distillabilityHistogram.count(static_cast<int>(1e6 * elapsedTime)); |
| 223 | 225 |
| 224 return features; | 226 return features; |
| 225 } | 227 } |
| 226 | 228 |
| 227 } // namespace blink | 229 } // namespace blink |
| OLD | NEW |