Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(235)

Side by Side Diff: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp

Issue 1419033004: Add feature extraction for distillability to Blink (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: stricter test Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/dom/DocumentStatisticsCollector.h"
7
8 #include "core/dom/Document.h"
9 #include "core/dom/DocumentVisibilityObserver.h"
10 #include "core/frame/FrameView.h"
11 #include "core/html/HTMLHeadElement.h"
12 #include "core/html/HTMLLinkElement.h"
13 #include "core/testing/DummyPageHolder.h"
14 #include "public/platform/WebDistillability.h"
15 #include "wtf/text/StringBuilder.h"
16
17 #include <gmock/gmock.h>
18 #include <gtest/gtest.h>
19
20 namespace blink {
21
22 // Saturate the length of a paragraph to save time.
23 const unsigned kTextContentLengthSaturation = 1000;
24
25 // Filter out short P elements. The threshold is set to around 2 English sentenc es.
26 const unsigned kParagraphLengthThreshold = 140;
27
28 class DocumentStatisticsCollectorTest : public ::testing::Test {
29 protected:
30 void SetUp() override;
31
32 #if ENABLE(OILPAN)
33 void TearDown() override
34 {
35 Heap::collectAllGarbage();
36 }
37 #endif
38
39 Document& document() const { return m_dummyPageHolder->document(); }
40
41 void setHtmlInnerHTML(const String&);
42
43 private:
44 OwnPtr<DummyPageHolder> m_dummyPageHolder;
45 };
46
47 void DocumentStatisticsCollectorTest::SetUp()
48 {
49 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
50 }
51
52 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent )
53 {
54 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO N);
55 }
56
57 // This test checks open graph articles can be recognized.
58 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle)
59 {
60 setHtmlInnerHTML(
61 "<head>"
62 // Note the case-insensitive matching of the word "article".
63 " <meta property='og:type' content='arTiclE' />"
64 "</head>"
65 );
66 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
67
68 EXPECT_TRUE(features.openGraph);
69 }
70
71 // This test checks non-existence of open graph articles can be recognized.
72 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle)
73 {
74 setHtmlInnerHTML(
75 "<head>"
76 " <meta property='og:type' content='movie' />"
77 "</head>"
78 );
79 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
80
81 EXPECT_FALSE(features.openGraph);
82 }
83
84 // This test checks element counts are correct.
85 TEST_F(DocumentStatisticsCollectorTest, CountElements)
86 {
87 setHtmlInnerHTML(
88 "<form>"
89 " <input type='text'>"
90 " <input type='password'>"
91 "</form>"
92 "<pre></pre>"
93 "<p><a> </a></p>"
94 "<ul><li><p><a> </a></p></li></ul>"
95 );
96 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
97
98 EXPECT_FALSE(features.openGraph);
99
100 EXPECT_EQ(10u, features.elementCount);
101 EXPECT_EQ(2u, features.anchorCount);
102 EXPECT_EQ(1u, features.formCount);
103 EXPECT_EQ(1u, features.textInputCount);
104 EXPECT_EQ(1u, features.passwordInputCount);
105 EXPECT_EQ(2u, features.pCount);
106 EXPECT_EQ(1u, features.preCount);
107 }
108
109 // This test checks score calculations are correct.
110 TEST_F(DocumentStatisticsCollectorTest, CountScore)
111 {
112 setHtmlInnerHTML(
113 "<p class='menu' id='article'>1</p>" // textContentLength = 1
114 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because under li
115 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unli kelyCandidates
116 "<p>"
117 "12345678901234567890123456789012345678901234567890"
118 "12345678901234567890123456789012345678901234567890"
119 "12345678901234567890123456789012345678901234"
120 "</p>" // textContentLength = 144
121 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped be cause invisible
122 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6 , skipped because invisible
123 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLeng th = 7, skipped because invisible
124 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped be cause invisible
125 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9
126 );
127 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
128
129 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold));
130 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9));
131 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9);
132 }
133
134 // This test checks score calculations are correct.
135 TEST_F(DocumentStatisticsCollectorTest, CountScoreSaturation)
136 {
137 StringBuilder html;
138 for (int i = 0; i < 10; i++) {
139 html.append("<p>");
140 for (int j = 0; j < 1000; j++) {
141 html.append("0123456789");
142 }
143 html.append("</p>");
144 }
145 setHtmlInnerHTML(
146 html.toString()
147 );
148 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
149
150 double error = 1e-5;
151 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - kPara graphLengthThreshold), error);
152 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation) , error);
153 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, er ror);
154 }
155
156 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/dom/DocumentStatisticsCollector.cpp ('k') | third_party/WebKit/Source/web/WebDocument.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698