OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "config.h" |
| 6 #include "core/dom/DocumentStatisticsCollector.h" |
| 7 |
| 8 #include "core/dom/Document.h" |
| 9 #include "core/dom/DocumentVisibilityObserver.h" |
| 10 #include "core/frame/FrameView.h" |
| 11 #include "core/html/HTMLHeadElement.h" |
| 12 #include "core/html/HTMLLinkElement.h" |
| 13 #include "core/testing/DummyPageHolder.h" |
| 14 #include "public/platform/WebDistillability.h" |
| 15 #include "wtf/text/StringBuilder.h" |
| 16 |
| 17 #include <gmock/gmock.h> |
| 18 #include <gtest/gtest.h> |
| 19 |
| 20 namespace blink { |
| 21 |
| 22 // Saturate the length of a paragraph to save time. |
| 23 const unsigned kTextContentLengthSaturation = 1000; |
| 24 |
| 25 // Filter out short P elements. The threshold is set to around 2 English sentenc
es. |
| 26 const unsigned kParagraphLengthThreshold = 140; |
| 27 |
| 28 class DocumentStatisticsCollectorTest : public ::testing::Test { |
| 29 protected: |
| 30 void SetUp() override; |
| 31 |
| 32 #if ENABLE(OILPAN) |
| 33 void TearDown() override |
| 34 { |
| 35 Heap::collectAllGarbage(); |
| 36 } |
| 37 #endif |
| 38 |
| 39 Document& document() const { return m_dummyPageHolder->document(); } |
| 40 |
| 41 void setHtmlInnerHTML(const String&); |
| 42 |
| 43 private: |
| 44 OwnPtr<DummyPageHolder> m_dummyPageHolder; |
| 45 }; |
| 46 |
| 47 void DocumentStatisticsCollectorTest::SetUp() |
| 48 { |
| 49 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
| 50 } |
| 51 |
| 52 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent
) |
| 53 { |
| 54 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO
N); |
| 55 document().view()->updateAllLifecyclePhases(); |
| 56 } |
| 57 |
| 58 // This test checks open graph articles can be recognized. |
| 59 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) |
| 60 { |
| 61 setHtmlInnerHTML( |
| 62 "<head>" |
| 63 // Note the case-insensitive matching of the word "article". |
| 64 " <meta property='og:type' content='arTiclE' />" |
| 65 "</head>" |
| 66 ); |
| 67 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta
tistics(document()); |
| 68 |
| 69 EXPECT_TRUE(features.openGraph); |
| 70 } |
| 71 |
| 72 // This test checks non-existence of open graph articles can be recognized. |
| 73 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) |
| 74 { |
| 75 setHtmlInnerHTML( |
| 76 "<head>" |
| 77 " <meta property='og:type' content='movie' />" |
| 78 "</head>" |
| 79 ); |
| 80 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta
tistics(document()); |
| 81 |
| 82 EXPECT_FALSE(features.openGraph); |
| 83 } |
| 84 |
| 85 // This test checks element counts are correct. |
| 86 TEST_F(DocumentStatisticsCollectorTest, CountElements) |
| 87 { |
| 88 setHtmlInnerHTML( |
| 89 "<form>" |
| 90 " <input type='text'>" |
| 91 " <input type='password'>" |
| 92 "</form>" |
| 93 "<pre></pre>" |
| 94 "<p><a> </a></p>" |
| 95 "<ul><li><p><a> </a></p></li></ul>" |
| 96 ); |
| 97 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta
tistics(document()); |
| 98 |
| 99 EXPECT_FALSE(features.openGraph); |
| 100 |
| 101 EXPECT_EQ(10u, features.elementCount); |
| 102 EXPECT_EQ(2u, features.anchorCount); |
| 103 EXPECT_EQ(1u, features.formCount); |
| 104 EXPECT_EQ(1u, features.textInputCount); |
| 105 EXPECT_EQ(1u, features.passwordInputCount); |
| 106 EXPECT_EQ(2u, features.pCount); |
| 107 EXPECT_EQ(1u, features.preCount); |
| 108 } |
| 109 |
| 110 // This test checks score calculations are correct. |
| 111 TEST_F(DocumentStatisticsCollectorTest, CountScore) |
| 112 { |
| 113 setHtmlInnerHTML( |
| 114 "<p class='menu' id='article'>1</p>" // textContentLength = 1 |
| 115 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because
under li |
| 116 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unli
kelyCandidates |
| 117 "<p>" |
| 118 "12345678901234567890123456789012345678901234567890" |
| 119 "12345678901234567890123456789012345678901234567890" |
| 120 "12345678901234567890123456789012345678901234" |
| 121 "</p>" // textContentLength = 144 |
| 122 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped be
cause invisible |
| 123 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6
, skipped because invisible |
| 124 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLeng
th = 7, skipped because invisible |
| 125 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped be
cause invisible |
| 126 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 |
| 127 ); |
| 128 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta
tistics(document()); |
| 129 |
| 130 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); |
| 131 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); |
| 132 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); |
| 133 } |
| 134 |
| 135 // This test checks score calculations are correct. |
| 136 TEST_F(DocumentStatisticsCollectorTest, CountScoreSaturation) |
| 137 { |
| 138 StringBuilder html; |
| 139 for (int i = 0; i < 10; i++) { |
| 140 html.append("<p>"); |
| 141 for (int j = 0; j < 1000; j++) { |
| 142 html.append("0123456789"); |
| 143 } |
| 144 html.append("</p>"); |
| 145 } |
| 146 setHtmlInnerHTML( |
| 147 html.toString() |
| 148 ); |
| 149 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta
tistics(document()); |
| 150 |
| 151 double error = 1e-5; |
| 152 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - kPara
graphLengthThreshold), error); |
| 153 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation)
, error); |
| 154 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, er
ror); |
| 155 } |
| 156 |
| 157 } // namespace blink |
OLD | NEW |