Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Side by Side Diff: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp

Issue 1419033004: Add feature extraction for distillability to Blink (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: address comments, add saturation Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "config.h"
6 #include "core/dom/DocumentStatisticsCollector.h"
7
8 #include "core/dom/Document.h"
9 #include "core/dom/DocumentVisibilityObserver.h"
10 #include "core/frame/FrameView.h"
11 #include "core/html/HTMLHeadElement.h"
12 #include "core/html/HTMLLinkElement.h"
13 #include "core/testing/DummyPageHolder.h"
14 #include "public/platform/WebDistillability.h"
15 #include <gmock/gmock.h>
16 #include <gtest/gtest.h>
17
18 namespace blink {
19
20 class DocumentStatisticsCollectorTest : public ::testing::Test {
21 protected:
22 void SetUp() override;
23
24 #if ENABLE(OILPAN)
25 void TearDown() override
26 {
27 Heap::collectAllGarbage();
28 }
29 #endif
30
31 Document& document() const { return m_dummyPageHolder->document(); }
32
33 void setHtmlInnerHTML(const String&);
34
35 private:
36 OwnPtr<DummyPageHolder> m_dummyPageHolder;
37 };
38
39 void DocumentStatisticsCollectorTest::SetUp()
40 {
41 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
42 }
43
44 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent )
45 {
46 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO N);
47 document().view()->updateAllLifecyclePhases();
48 }
49
50 // This test checks open graph articles can be recognized.
51 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle)
52 {
53 setHtmlInnerHTML(
54 "<head>"
55 // Note the case-insensitive matching of the word "article".
56 " <meta property='og:type' content='arTiclE' />"
57 "</head>"
58 );
59 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
60
61 EXPECT_EQ(true, features.openGraph);
62 }
63
64 // This test checks non-existence of open graph articles can be recognized.
65 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle)
66 {
67 setHtmlInnerHTML(
68 "<head>"
69 " <meta property='og:type' content='movie' />"
70 "</head>"
71 );
72 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
73
74 EXPECT_EQ(false, features.openGraph);
75 }
76
77 // This test checks element counts are correct.
78 TEST_F(DocumentStatisticsCollectorTest, CountElements)
79 {
80 setHtmlInnerHTML(
81 "<form>"
82 " <input type='text'>"
83 " <input type='password'>"
84 "</form>"
85 "<pre></pre>"
86 "<p><a> </a></p>"
87 "<ul><li><p><a> </a></p></li></ul>"
88 );
89 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
90
91 EXPECT_EQ(false, features.openGraph);
92
93 EXPECT_EQ(10u, features.elementCount);
94 EXPECT_EQ(2u, features.anchorCount);
95 EXPECT_EQ(1u, features.formCount);
96 EXPECT_EQ(1u, features.textInputCount);
97 EXPECT_EQ(1u, features.passwordInputCount);
98 EXPECT_EQ(2u, features.pCount);
99 EXPECT_EQ(1u, features.preCount);
100 }
101
102 // This test checks score calculations are correct.
103 TEST_F(DocumentStatisticsCollectorTest, CountScore)
104 {
105 setHtmlInnerHTML(
106 "<p class='menu' id='article'> 1 </p>" // trimmedTextContentLength = 1
107 "<ul><li><p>12</p></li></ul>" // trimmedTextContentLength = 2, skipped b ecause under li
108 "<p class='menu'>123</p>" // trimmedTextContentLength = 3, skipped becau se unlikelyCandidates
109 "<p>"
110 "12345678901234567890123456789012345678901234567890"
111 "12345678901234567890123456789012345678901234567890"
112 "12345678901234567890123456789012345678901234"
113 "</p>" // trimmedTextContentLength = 144
114 "<p style='display:none'>12345</p>" // trimmedTextContentLength = 5, ski pped because invisible
115 "<div style='visibility:hidden'><p>123456</p></div>" // trimmedTextConte ntLength = 6, skipped because invisible
116 "<p style='opacity:0'>1234567</p>" // trimmedTextContentLength = 7, skip ped because invisible
117 "<p> <a href='#'> 12345 </a> 9 <b> </b> </p>" // trimmedTextContentLen gth = 9
118 );
119 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document());
120
121 const unsigned kParagraphLengthThreshold = 140;
122
123 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold));
124 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9));
125 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9);
126 }
127
128 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698