OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "DocumentStatisticsCollector.h" | 5 #include "DocumentStatisticsCollector.h" |
6 | 6 |
7 #include "core/HTMLNames.h" | 7 #include "core/HTMLNames.h" |
8 #include "core/InputTypeNames.h" | 8 #include "core/InputTypeNames.h" |
9 #include "core/dom/ElementTraversal.h" | 9 #include "core/dom/ElementTraversal.h" |
10 #include "core/dom/NodeComputedStyle.h" | 10 #include "core/dom/NodeComputedStyle.h" |
(...skipping 10 matching lines...) Expand all Loading... |
21 | 21 |
22 namespace blink { | 22 namespace blink { |
23 | 23 |
24 using namespace HTMLNames; | 24 using namespace HTMLNames; |
25 | 25 |
26 namespace { | 26 namespace { |
27 | 27 |
28 // Saturate the length of a paragraph to save time. | 28 // Saturate the length of a paragraph to save time. |
29 const int kTextContentLengthSaturation = 1000; | 29 const int kTextContentLengthSaturation = 1000; |
30 | 30 |
31 // Filter out short P elements. The threshold is set to around 2 English sentenc
es. | 31 // Filter out short P elements. The threshold is set to around 2 English |
| 32 // sentences. |
32 const unsigned kParagraphLengthThreshold = 140; | 33 const unsigned kParagraphLengthThreshold = 140; |
33 | 34 |
34 // Saturate the scores to save time. The max is the score of 6 long paragraphs. | 35 // Saturate the scores to save time. The max is the score of 6 long paragraphs. |
35 const double kMozScoreSaturation = | 36 // 6 * sqrt(kTextContentLengthSaturation - kParagraphLengthThreshold) |
36 175.954539583; // 6 * sqrt(kTextContentLengthSaturation - kParagraphLengthT
hreshold) | 37 const double kMozScoreSaturation = 175.954539583; |
37 const double kMozScoreAllSqrtSaturation = | 38 // 6 * sqrt(kTextContentLengthSaturation); |
38 189.73665961; // 6 * sqrt(kTextContentLengthSaturation); | 39 const double kMozScoreAllSqrtSaturation = 189.73665961; |
39 const double kMozScoreAllLinearSaturation = 6 * kTextContentLengthSaturation; | 40 const double kMozScoreAllLinearSaturation = 6 * kTextContentLengthSaturation; |
40 | 41 |
41 unsigned textContentLengthSaturated(const Element& root) { | 42 unsigned textContentLengthSaturated(const Element& root) { |
42 unsigned length = 0; | 43 unsigned length = 0; |
43 // This skips shadow DOM intentionally, to match the JavaScript implementation
. | 44 // This skips shadow DOM intentionally, to match the JavaScript |
44 // We would like to use the same statistics extracted by the JavaScript implem
entation | 45 // implementation. We would like to use the same statistics extracted by the |
45 // on iOS, and JavaScript cannot peek deeply into shadow DOM except on modern
Chrome | 46 // JavaScript implementation on iOS, and JavaScript cannot peek deeply into |
46 // versions. | 47 // shadow DOM except on modern Chrome versions. |
47 // Given shadow DOM rarely appears in <P> elements in long-form articles, the
overall | 48 // Given shadow DOM rarely appears in <P> elements in long-form articles, the |
48 // accuracy should not be largely affected. | 49 // overall accuracy should not be largely affected. |
49 for (Node& node : NodeTraversal::inclusiveDescendantsOf(root)) { | 50 for (Node& node : NodeTraversal::inclusiveDescendantsOf(root)) { |
50 if (!node.isTextNode()) { | 51 if (!node.isTextNode()) { |
51 continue; | 52 continue; |
52 } | 53 } |
53 length += toText(node).length(); | 54 length += toText(node).length(); |
54 if (length > kTextContentLengthSaturation) { | 55 if (length > kTextContentLengthSaturation) { |
55 return kTextContentLengthSaturation; | 56 return kTextContentLengthSaturation; |
56 } | 57 } |
57 } | 58 } |
58 return length; | 59 return length; |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
201 HTMLElement* body = document.body(); | 202 HTMLElement* body = document.body(); |
202 HTMLElement* head = document.head(); | 203 HTMLElement* head = document.head(); |
203 | 204 |
204 if (!body || !head) | 205 if (!body || !head) |
205 return features; | 206 return features; |
206 | 207 |
207 features.isMobileFriendly = isMobileFriendly(document); | 208 features.isMobileFriendly = isMobileFriendly(document); |
208 | 209 |
209 double startTime = monotonicallyIncreasingTime(); | 210 double startTime = monotonicallyIncreasingTime(); |
210 | 211 |
211 // This should be cheap since collectStatistics is only called right after lay
out. | 212 // This should be cheap since collectStatistics is only called right after |
| 213 // layout. |
212 document.updateStyleAndLayoutTree(); | 214 document.updateStyleAndLayoutTree(); |
213 | 215 |
214 // Traverse the DOM tree and collect statistics. | 216 // Traverse the DOM tree and collect statistics. |
215 collectFeatures(*body, features); | 217 collectFeatures(*body, features); |
216 features.openGraph = hasOpenGraphArticle(*head); | 218 features.openGraph = hasOpenGraphArticle(*head); |
217 | 219 |
218 double elapsedTime = monotonicallyIncreasingTime() - startTime; | 220 double elapsedTime = monotonicallyIncreasingTime() - startTime; |
219 | 221 |
220 DEFINE_STATIC_LOCAL(CustomCountHistogram, distillabilityHistogram, | 222 DEFINE_STATIC_LOCAL(CustomCountHistogram, distillabilityHistogram, |
221 ("WebCore.DistillabilityUs", 1, 1000000, 50)); | 223 ("WebCore.DistillabilityUs", 1, 1000000, 50)); |
222 distillabilityHistogram.count(static_cast<int>(1e6 * elapsedTime)); | 224 distillabilityHistogram.count(static_cast<int>(1e6 * elapsedTime)); |
223 | 225 |
224 return features; | 226 return features; |
225 } | 227 } |
226 | 228 |
227 } // namespace blink | 229 } // namespace blink |
OLD | NEW |