Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp

Issue 1419033004: Add feature extraction for distillability to Blink (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: add mobile friendly detection Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/dom/DocumentStatisticsCollector.h"
7
8 #include "core/dom/Document.h"
9 #include "core/dom/DocumentVisibilityObserver.h"
10 #include "core/frame/FrameView.h"
11 #include "core/html/HTMLHeadElement.h"
12 #include "core/html/HTMLLinkElement.h"
13 #include "core/testing/DummyPageHolder.h"
14 #include "public/platform/WebDistillability.h"
15 #include <gmock/gmock.h>
16 #include <gtest/gtest.h>
17
18 namespace blink {
19
20 class DocumentStatisticsCollectorTest : public ::testing::Test {
21 protected:
22 void SetUp() override;
23
24 #if ENABLE(OILPAN)
25 void TearDown() override
26 {
27 Heap::collectAllGarbage();
28 }
29 #endif
30
31 Document& document() const { return m_dummyPageHolder->document(); }
32
33 void setHtmlInnerHTML(const String&);
34
35 private:
36 OwnPtr<DummyPageHolder> m_dummyPageHolder;
37 };
38
39 void DocumentStatisticsCollectorTest::SetUp()
40 {
41 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
42 }
43
44 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent )
45 {
46 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO N);
47 document().view()->updateAllLifecyclePhases();
48 }
49
50 // This test checks open graph articles can be recognized.
51 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle)
52 {
53 setHtmlInnerHTML(
54 "<head>"
55 // Note the case-insensitive matching of the word "article".
56 " <meta property='og:type' content='arTiclE' />"
57 "</head>"
58 );
59 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
60
61 EXPECT_TRUE(features.openGraph);
62 }
63
64 // This test checks non-existence of open graph articles can be recognized.
65 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle)
66 {
67 setHtmlInnerHTML(
68 "<head>"
69 " <meta property='og:type' content='movie' />"
70 "</head>"
71 );
72 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
73
74 EXPECT_FALSE(features.openGraph);
75 }
76
77 // This test checks element counts are correct.
78 TEST_F(DocumentStatisticsCollectorTest, CountElements)
79 {
80 setHtmlInnerHTML(
81 "<form>"
82 " <input type='text'>"
83 " <input type='password'>"
84 "</form>"
85 "<pre></pre>"
86 "<p><a> </a></p>"
87 "<ul><li><p><a> </a></p></li></ul>"
88 );
89 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
90
91 EXPECT_FALSE(features.openGraph);
92
93 EXPECT_EQ(10u, features.elementCount);
94 EXPECT_EQ(2u, features.anchorCount);
95 EXPECT_EQ(1u, features.formCount);
96 EXPECT_EQ(1u, features.textInputCount);
97 EXPECT_EQ(1u, features.passwordInputCount);
98 EXPECT_EQ(2u, features.pCount);
99 EXPECT_EQ(1u, features.preCount);
100 }
101
102 // This test checks score calculations are correct.
103 TEST_F(DocumentStatisticsCollectorTest, CountScore)
104 {
105 setHtmlInnerHTML(
106 "<p class='menu' id='article'>1</p>" // textContentLength = 1
107 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because under li
108 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unli kelyCandidates
109 "<p>"
110 "12345678901234567890123456789012345678901234567890"
111 "12345678901234567890123456789012345678901234567890"
112 "12345678901234567890123456789012345678901234"
113 "</p>" // textContentLength = 144
114 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped be cause invisible
115 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6 , skipped because invisible
116 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLeng th = 7, skipped because invisible
117 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped be cause invisible
118 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9
119 );
120 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
121
122 const unsigned kParagraphLengthThreshold = 140;
123
124 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold));
125 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9));
126 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9);
127 }
128
129 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698