Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp

Issue 1419033004: Add feature extraction for distillability to Blink (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: address esprehn's comments Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/dom/DocumentStatisticsCollector.h"
7
8 #include "core/dom/Document.h"
9 #include "core/dom/DocumentVisibilityObserver.h"
10 #include "core/frame/FrameView.h"
11 #include "core/html/HTMLHeadElement.h"
12 #include "core/html/HTMLLinkElement.h"
13 #include "core/testing/DummyPageHolder.h"
14 #include "public/platform/WebDistillability.h"
15 #include "wtf/text/StringBuilder.h"
16
17 #include <gmock/gmock.h>
18 #include <gtest/gtest.h>
19
20 namespace blink {
21
22 // Saturate the length of a paragraph to save time.
23 const unsigned kTextContentLengthSaturation = 1000;
24
25 // Filter out short P elements. The threshold is set to around 2 English sentenc es.
26 const unsigned kParagraphLengthThreshold = 140;
27
28 class DocumentStatisticsCollectorTest : public ::testing::Test {
29 protected:
30 void SetUp() override;
31
32 #if ENABLE(OILPAN)
33 void TearDown() override
34 {
35 Heap::collectAllGarbage();
36 }
37 #endif
38
39 Document& document() const { return m_dummyPageHolder->document(); }
40
41 void setHtmlInnerHTML(const String&);
42
43 private:
44 OwnPtr<DummyPageHolder> m_dummyPageHolder;
45 };
46
47 void DocumentStatisticsCollectorTest::SetUp()
48 {
49 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
50 }
51
52 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent )
53 {
54 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO N);
55 document().view()->updateAllLifecyclePhases();
esprehn 2015/11/05 01:54:17 you can remove this if you do that.
wychen 2015/11/05 02:00:00 Right! I forgot to update this one.
56 }
57
58 // This test checks open graph articles can be recognized.
59 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle)
60 {
61 setHtmlInnerHTML(
62 "<head>"
63 // Note the case-insensitive matching of the word "article".
64 " <meta property='og:type' content='arTiclE' />"
65 "</head>"
66 );
67 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
68
69 EXPECT_TRUE(features.openGraph);
70 }
71
72 // This test checks non-existence of open graph articles can be recognized.
73 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle)
74 {
75 setHtmlInnerHTML(
76 "<head>"
77 " <meta property='og:type' content='movie' />"
78 "</head>"
79 );
80 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
81
82 EXPECT_FALSE(features.openGraph);
83 }
84
85 // This test checks element counts are correct.
86 TEST_F(DocumentStatisticsCollectorTest, CountElements)
87 {
88 setHtmlInnerHTML(
89 "<form>"
90 " <input type='text'>"
91 " <input type='password'>"
92 "</form>"
93 "<pre></pre>"
94 "<p><a> </a></p>"
95 "<ul><li><p><a> </a></p></li></ul>"
96 );
97 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
98
99 EXPECT_FALSE(features.openGraph);
100
101 EXPECT_EQ(10u, features.elementCount);
102 EXPECT_EQ(2u, features.anchorCount);
103 EXPECT_EQ(1u, features.formCount);
104 EXPECT_EQ(1u, features.textInputCount);
105 EXPECT_EQ(1u, features.passwordInputCount);
106 EXPECT_EQ(2u, features.pCount);
107 EXPECT_EQ(1u, features.preCount);
108 }
109
110 // This test checks score calculations are correct.
111 TEST_F(DocumentStatisticsCollectorTest, CountScore)
112 {
113 setHtmlInnerHTML(
114 "<p class='menu' id='article'>1</p>" // textContentLength = 1
115 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because under li
116 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unli kelyCandidates
117 "<p>"
118 "12345678901234567890123456789012345678901234567890"
119 "12345678901234567890123456789012345678901234567890"
120 "12345678901234567890123456789012345678901234"
121 "</p>" // textContentLength = 144
122 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped be cause invisible
123 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6 , skipped because invisible
124 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLeng th = 7, skipped because invisible
125 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped be cause invisible
126 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9
127 );
128 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
129
130 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold));
131 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9));
132 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9);
133 }
134
135 // This test checks score calculations are correct.
136 TEST_F(DocumentStatisticsCollectorTest, CountScoreSaturation)
137 {
138 StringBuilder html;
139 for (int i = 0; i < 10; i++) {
140 html.append("<p>");
141 for (int j = 0; j < 1000; j++) {
142 html.append("0123456789");
143 }
144 html.append("</p>");
145 }
146 setHtmlInnerHTML(
147 html.toString()
148 );
149 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
150
151 double error = 1e-5;
152 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - kPara graphLengthThreshold), error);
153 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation) , error);
154 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, er ror);
155 }
156
157 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/dom/DocumentStatisticsCollector.cpp ('k') | third_party/WebKit/Source/web/WebDocument.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698