OLD | NEW |
---|---|
(Empty) | |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "config.h" | |
6 #include "core/dom/DocumentStatisticsCollector.h" | |
7 | |
8 #include "core/dom/Document.h" | |
9 #include "core/dom/DocumentVisibilityObserver.h" | |
10 #include "core/frame/FrameView.h" | |
11 #include "core/html/HTMLHeadElement.h" | |
12 #include "core/html/HTMLLinkElement.h" | |
13 #include "core/testing/DummyPageHolder.h" | |
14 #include "public/platform/WebDistillability.h" | |
15 #include "wtf/text/StringBuilder.h" | |
16 | |
17 #include <gmock/gmock.h> | |
18 #include <gtest/gtest.h> | |
19 | |
20 namespace blink { | |
21 | |
22 // Saturate the length of a paragraph to save time. | |
23 const unsigned kTextContentLengthSaturation = 1000; | |
24 | |
25 // Filter out short P elements. The threshold is set to around 2 English sentenc es. | |
26 const unsigned kParagraphLengthThreshold = 140; | |
27 | |
28 class DocumentStatisticsCollectorTest : public ::testing::Test { | |
29 protected: | |
30 void SetUp() override; | |
31 | |
32 #if ENABLE(OILPAN) | |
33 void TearDown() override | |
34 { | |
35 Heap::collectAllGarbage(); | |
36 } | |
37 #endif | |
38 | |
39 Document& document() const { return m_dummyPageHolder->document(); } | |
40 | |
41 void setHtmlInnerHTML(const String&); | |
42 | |
43 private: | |
44 OwnPtr<DummyPageHolder> m_dummyPageHolder; | |
45 }; | |
46 | |
47 void DocumentStatisticsCollectorTest::SetUp() | |
48 { | |
49 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | |
50 } | |
51 | |
52 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent ) | |
53 { | |
54 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO N); | |
55 document().view()->updateAllLifecyclePhases(); | |
esprehn
2015/11/05 01:54:17
you can remove this if you do that.
wychen
2015/11/05 02:00:00
Right! I forgot to update this one.
| |
56 } | |
57 | |
58 // This test checks open graph articles can be recognized. | |
59 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) | |
60 { | |
61 setHtmlInnerHTML( | |
62 "<head>" | |
63 // Note the case-insensitive matching of the word "article". | |
64 " <meta property='og:type' content='arTiclE' />" | |
65 "</head>" | |
66 ); | |
67 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
68 | |
69 EXPECT_TRUE(features.openGraph); | |
70 } | |
71 | |
72 // This test checks non-existence of open graph articles can be recognized. | |
73 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) | |
74 { | |
75 setHtmlInnerHTML( | |
76 "<head>" | |
77 " <meta property='og:type' content='movie' />" | |
78 "</head>" | |
79 ); | |
80 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
81 | |
82 EXPECT_FALSE(features.openGraph); | |
83 } | |
84 | |
85 // This test checks element counts are correct. | |
86 TEST_F(DocumentStatisticsCollectorTest, CountElements) | |
87 { | |
88 setHtmlInnerHTML( | |
89 "<form>" | |
90 " <input type='text'>" | |
91 " <input type='password'>" | |
92 "</form>" | |
93 "<pre></pre>" | |
94 "<p><a> </a></p>" | |
95 "<ul><li><p><a> </a></p></li></ul>" | |
96 ); | |
97 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
98 | |
99 EXPECT_FALSE(features.openGraph); | |
100 | |
101 EXPECT_EQ(10u, features.elementCount); | |
102 EXPECT_EQ(2u, features.anchorCount); | |
103 EXPECT_EQ(1u, features.formCount); | |
104 EXPECT_EQ(1u, features.textInputCount); | |
105 EXPECT_EQ(1u, features.passwordInputCount); | |
106 EXPECT_EQ(2u, features.pCount); | |
107 EXPECT_EQ(1u, features.preCount); | |
108 } | |
109 | |
110 // This test checks score calculations are correct. | |
111 TEST_F(DocumentStatisticsCollectorTest, CountScore) | |
112 { | |
113 setHtmlInnerHTML( | |
114 "<p class='menu' id='article'>1</p>" // textContentLength = 1 | |
115 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because under li | |
116 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unli kelyCandidates | |
117 "<p>" | |
118 "12345678901234567890123456789012345678901234567890" | |
119 "12345678901234567890123456789012345678901234567890" | |
120 "12345678901234567890123456789012345678901234" | |
121 "</p>" // textContentLength = 144 | |
122 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped be cause invisible | |
123 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6 , skipped because invisible | |
124 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLeng th = 7, skipped because invisible | |
125 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped be cause invisible | |
126 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 | |
127 ); | |
128 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
129 | |
130 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); | |
131 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); | |
132 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); | |
133 } | |
134 | |
135 // This test checks score calculations are correct. | |
136 TEST_F(DocumentStatisticsCollectorTest, CountScoreSaturation) | |
137 { | |
138 StringBuilder html; | |
139 for (int i = 0; i < 10; i++) { | |
140 html.append("<p>"); | |
141 for (int j = 0; j < 1000; j++) { | |
142 html.append("0123456789"); | |
143 } | |
144 html.append("</p>"); | |
145 } | |
146 setHtmlInnerHTML( | |
147 html.toString() | |
148 ); | |
149 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
150 | |
151 double error = 1e-5; | |
152 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - kPara graphLengthThreshold), error); | |
153 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation) , error); | |
154 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, er ror); | |
155 } | |
156 | |
157 } // namespace blink | |
OLD | NEW |