OLD | NEW |
---|---|
(Empty) | |
1 /* | |
2 * Copyright (c) 2015, Google Inc. All rights reserved. | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions are | |
esprehn
2015/10/26 21:43:09
Use the modern short copyright.
wychen
2015/10/27 23:52:12
Done.
| |
6 * met: | |
7 * | |
8 * * Redistributions of source code must retain the above copyright | |
9 * notice, this list of conditions and the following disclaimer. | |
10 * * Redistributions in binary form must reproduce the above | |
11 * copyright notice, this list of conditions and the following disclaimer | |
12 * in the documentation and/or other materials provided with the | |
13 * distribution. | |
14 * * Neither the name of Google Inc. nor the names of its | |
15 * contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 #include "config.h" | |
32 #include "core/dom/DocumentStatisticsCollector.h" | |
33 | |
34 #include "core/dom/Document.h" | |
35 #include "core/dom/DocumentVisibilityObserver.h" | |
36 #include "core/frame/FrameView.h" | |
37 #include "core/html/HTMLHeadElement.h" | |
38 #include "core/html/HTMLLinkElement.h" | |
39 #include "core/testing/DummyPageHolder.h" | |
40 #include "public/platform/WebDistillability.h" | |
41 #include <gmock/gmock.h> | |
42 #include <gtest/gtest.h> | |
43 | |
44 namespace blink { | |
45 | |
46 class DocumentStatisticsCollectorTest : public ::testing::Test { | |
47 protected: | |
48 void SetUp() override; | |
49 | |
50 #if ENABLE(OILPAN) | |
51 void TearDown() override | |
52 { | |
53 Heap::collectAllGarbage(); | |
54 } | |
55 #endif | |
56 | |
57 Document& document() const { return m_dummyPageHolder->document(); } | |
58 | |
59 void setHtmlInnerHTML(const char*); | |
esprehn
2015/10/26 21:43:09
const String&
wychen
2015/10/27 23:52:12
Done.
| |
60 | |
61 private: | |
62 OwnPtr<DummyPageHolder> m_dummyPageHolder; | |
63 }; | |
64 | |
65 void DocumentStatisticsCollectorTest::SetUp() | |
66 { | |
67 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | |
68 } | |
69 | |
70 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const char* htmlContent) | |
71 { | |
72 document().documentElement()->setInnerHTML(String::fromUTF8(htmlContent), AS SERT_NO_EXCEPTION); | |
esprehn
2015/10/26 21:43:09
from fromtUTF8
wychen
2015/10/27 23:52:12
I'm not quite sure I understand this comment. For
| |
73 document().view()->updateAllLifecyclePhases(); | |
esprehn
2015/10/26 21:43:09
remove this, you don't need it.
wychen
2015/10/27 23:52:12
Without this line, there's an assertion error:
ASS
| |
74 } | |
75 | |
76 // This test checks open graph articles can be recognized. | |
77 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) | |
78 { | |
79 setHtmlInnerHTML( | |
80 "<head>" | |
81 // Note the case-insensitive matching of the word "article". | |
82 " <meta property='og:type' content='arTiclE' />" | |
83 "</head>" | |
84 ); | |
85 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
86 | |
87 EXPECT_EQ(true, features.openGraph); | |
88 } | |
89 | |
90 // This test checks non-existence of open graph articles can be recognized. | |
91 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) | |
92 { | |
93 setHtmlInnerHTML( | |
94 "<head>" | |
95 " <meta property='og:type' content='movie' />" | |
96 "</head>" | |
97 ); | |
98 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
99 | |
100 EXPECT_EQ(false, features.openGraph); | |
101 } | |
102 | |
103 // This test checks element counts are correct. | |
104 TEST_F(DocumentStatisticsCollectorTest, CountElements) | |
105 { | |
106 setHtmlInnerHTML( | |
107 "<form>" | |
108 " <input type='text'>" | |
109 " <input type='password'>" | |
110 "</form>" | |
111 "<pre></pre>" | |
112 "<p><a> </a></p>" | |
113 "<ul><li><p><a> </a></p></li></ul>" | |
114 ); | |
115 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
116 | |
117 EXPECT_EQ(false, features.openGraph); | |
118 | |
119 EXPECT_EQ(10u, features.elementCount); | |
120 EXPECT_EQ(2u, features.anchorCount); | |
121 EXPECT_EQ(1u, features.formCount); | |
122 EXPECT_EQ(1u, features.textInputCount); | |
123 EXPECT_EQ(1u, features.passwordInputCount); | |
124 EXPECT_EQ(2u, features.pCount); | |
125 EXPECT_EQ(1u, features.preCount); | |
126 } | |
127 | |
128 // This test checks score calculations are correct. | |
129 TEST_F(DocumentStatisticsCollectorTest, CountScore) | |
130 { | |
131 setHtmlInnerHTML( | |
132 "<p class='menu' id='article'> 1 </p>" // trimmedTextContentLength = 1 | |
133 "<ul><li><p>12</p></li></ul>" // trimmedTextContentLength = 2, skipped b ecause under li | |
134 "<p class='menu'>123</p>" // trimmedTextContentLength = 3, skipped becau se unlikelyCandidates | |
135 "<p>" | |
136 "12345678901234567890123456789012345678901234567890" | |
137 "12345678901234567890123456789012345678901234567890" | |
138 "12345678901234567890123456789012345678901234" | |
139 "</p>" // trimmedTextContentLength = 144 | |
140 "<p style='display:none'>12345</p>" // trimmedTextContentLength = 5, ski pped because invisible | |
141 "<div style='visibility:hidden'><p>123456</p></div>" // trimmedTextConte ntLength = 6, skipped because invisible | |
142 "<p style='opacity:0'>1234567</p>" // trimmedTextContentLength = 7, skip ped because invisible | |
143 "<p> <a href='#'> 12345 </a> 9 <b> </b> </p>" // trimmedTextContentLen gth = 9 | |
144 ); | |
145 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
146 | |
147 const unsigned kParagraphLengthThreshold = 140; | |
148 | |
149 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); | |
150 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); | |
151 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); | |
152 } | |
153 | |
154 } // namespace blink | |
OLD | NEW |