OLD | NEW |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "core/dom/DocumentStatisticsCollector.h" | 5 #include "core/dom/DocumentStatisticsCollector.h" |
6 | 6 |
7 #include "core/dom/Document.h" | 7 #include "core/dom/Document.h" |
8 #include "core/frame/FrameView.h" | 8 #include "core/frame/FrameView.h" |
9 #include "core/html/HTMLHeadElement.h" | 9 #include "core/html/HTMLHeadElement.h" |
10 #include "core/html/HTMLLinkElement.h" | 10 #include "core/html/HTMLLinkElement.h" |
11 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
12 #include "public/platform/WebDistillability.h" | 12 #include "public/platform/WebDistillability.h" |
13 #include "testing/gmock/include/gmock/gmock.h" | 13 #include "testing/gmock/include/gmock/gmock.h" |
14 #include "testing/gtest/include/gtest/gtest.h" | 14 #include "testing/gtest/include/gtest/gtest.h" |
15 #include "wtf/text/StringBuilder.h" | 15 #include "wtf/text/StringBuilder.h" |
16 #include <memory> | 16 #include <memory> |
17 | 17 |
18 namespace blink { | 18 namespace blink { |
19 | 19 |
20 // Saturate the length of a paragraph to save time. | 20 // Saturate the length of a paragraph to save time. |
21 const unsigned kTextContentLengthSaturation = 1000; | 21 const unsigned kTextContentLengthSaturation = 1000; |
22 | 22 |
23 // Filter out short P elements. The threshold is set to around 2 English sentenc
es. | 23 // Filter out short P elements. The threshold is set to around 2 English |
| 24 // sentences. |
24 const unsigned kParagraphLengthThreshold = 140; | 25 const unsigned kParagraphLengthThreshold = 140; |
25 | 26 |
26 class DocumentStatisticsCollectorTest : public ::testing::Test { | 27 class DocumentStatisticsCollectorTest : public ::testing::Test { |
27 protected: | 28 protected: |
28 void SetUp() override; | 29 void SetUp() override; |
29 | 30 |
30 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 31 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
31 | 32 |
32 Document& document() const { return m_dummyPageHolder->document(); } | 33 Document& document() const { return m_dummyPageHolder->document(); } |
33 | 34 |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
93 EXPECT_EQ(1u, features.textInputCount); | 94 EXPECT_EQ(1u, features.textInputCount); |
94 EXPECT_EQ(1u, features.passwordInputCount); | 95 EXPECT_EQ(1u, features.passwordInputCount); |
95 EXPECT_EQ(2u, features.pCount); | 96 EXPECT_EQ(2u, features.pCount); |
96 EXPECT_EQ(1u, features.preCount); | 97 EXPECT_EQ(1u, features.preCount); |
97 } | 98 } |
98 | 99 |
99 // This test checks score calculations are correct. | 100 // This test checks score calculations are correct. |
100 TEST_F(DocumentStatisticsCollectorTest, CountScore) { | 101 TEST_F(DocumentStatisticsCollectorTest, CountScore) { |
101 setHtmlInnerHTML( | 102 setHtmlInnerHTML( |
102 "<p class='menu' id='article'>1</p>" // textContentLength = 1 | 103 "<p class='menu' id='article'>1</p>" // textContentLength = 1 |
103 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because u
nder li | 104 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because |
104 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unlik
elyCandidates | 105 // under li |
| 106 "<p class='menu'>123</p>" // textContentLength = 3, skipped because |
| 107 // unlikelyCandidates |
105 "<p>" | 108 "<p>" |
106 "12345678901234567890123456789012345678901234567890" | 109 "12345678901234567890123456789012345678901234567890" |
107 "12345678901234567890123456789012345678901234567890" | 110 "12345678901234567890123456789012345678901234567890" |
108 "12345678901234567890123456789012345678901234" | 111 "12345678901234567890123456789012345678901234" |
109 "</p>" // textContentLength = 144 | 112 "</p>" // textContentLength = 144 |
110 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped bec
ause invisible | 113 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped |
111 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6,
skipped because invisible | 114 // because invisible |
112 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLengt
h = 7, skipped because invisible | 115 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6, |
113 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped bec
ause invisible | 116 // skipped because |
| 117 // invisible |
| 118 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLengt
h |
| 119 // = 7, skipped |
| 120 // because |
| 121 // invisible |
| 122 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped |
| 123 // because invisible |
114 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 | 124 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 |
115 "<ul><li></li><p>123456789012</p></ul>" // textContentLength = 12 | 125 "<ul><li></li><p>123456789012</p></ul>" // textContentLength = 12 |
116 ); | 126 ); |
117 WebDistillabilityFeatures features = | 127 WebDistillabilityFeatures features = |
118 DocumentStatisticsCollector::collectStatistics(document()); | 128 DocumentStatisticsCollector::collectStatistics(document()); |
119 | 129 |
120 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); | 130 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); |
121 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, | 131 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, |
122 1 + sqrt(144) + sqrt(9) + sqrt(12)); | 132 1 + sqrt(144) + sqrt(9) + sqrt(12)); |
123 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9 + 12); | 133 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9 + 12); |
(...skipping 17 matching lines...) Expand all Loading... |
141 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - | 151 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - |
142 kParagraphLengthThreshold), | 152 kParagraphLengthThreshold), |
143 error); | 153 error); |
144 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation), | 154 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation), |
145 error); | 155 error); |
146 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, | 156 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, |
147 error); | 157 error); |
148 } | 158 } |
149 | 159 |
150 } // namespace blink | 160 } // namespace blink |
OLD | NEW |