Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "config.h" | |
| 6 #include "core/dom/DocumentStatisticsCollector.h" | |
| 7 | |
| 8 #include "core/dom/Document.h" | |
| 9 #include "core/dom/DocumentVisibilityObserver.h" | |
| 10 #include "core/frame/FrameView.h" | |
| 11 #include "core/html/HTMLHeadElement.h" | |
| 12 #include "core/html/HTMLLinkElement.h" | |
| 13 #include "core/testing/DummyPageHolder.h" | |
| 14 #include "public/platform/WebDistillability.h" | |
| 15 #include "wtf/text/StringBuilder.h" | |
| 16 | |
| 17 #include <gmock/gmock.h> | |
| 18 #include <gtest/gtest.h> | |
| 19 | |
| 20 namespace blink { | |
| 21 | |
| 22 // Saturate the length of a paragraph to save time. | |
| 23 const unsigned kTextContentLengthSaturation = 1000; | |
| 24 | |
| 25 // Filter out short P elements. The threshold is set to around 2 English sentenc es. | |
| 26 const unsigned kParagraphLengthThreshold = 140; | |
| 27 | |
| 28 class DocumentStatisticsCollectorTest : public ::testing::Test { | |
| 29 protected: | |
| 30 void SetUp() override; | |
| 31 | |
| 32 #if ENABLE(OILPAN) | |
| 33 void TearDown() override | |
| 34 { | |
| 35 Heap::collectAllGarbage(); | |
| 36 } | |
| 37 #endif | |
| 38 | |
| 39 Document& document() const { return m_dummyPageHolder->document(); } | |
| 40 | |
| 41 void setHtmlInnerHTML(const String&); | |
| 42 | |
| 43 private: | |
| 44 OwnPtr<DummyPageHolder> m_dummyPageHolder; | |
| 45 }; | |
| 46 | |
| 47 void DocumentStatisticsCollectorTest::SetUp() | |
| 48 { | |
| 49 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | |
| 50 } | |
| 51 | |
| 52 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent ) | |
| 53 { | |
| 54 document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTIO N); | |
| 55 document().view()->updateAllLifecyclePhases(); | |
|
esprehn
2015/11/05 01:54:17
you can remove this if you do that.
wychen
2015/11/05 02:00:00
Right! I forgot to update this one.
| |
| 56 } | |
| 57 | |
| 58 // This test checks open graph articles can be recognized. | |
| 59 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) | |
| 60 { | |
| 61 setHtmlInnerHTML( | |
| 62 "<head>" | |
| 63 // Note the case-insensitive matching of the word "article". | |
| 64 " <meta property='og:type' content='arTiclE' />" | |
| 65 "</head>" | |
| 66 ); | |
| 67 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 68 | |
| 69 EXPECT_TRUE(features.openGraph); | |
| 70 } | |
| 71 | |
| 72 // This test checks non-existence of open graph articles can be recognized. | |
| 73 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) | |
| 74 { | |
| 75 setHtmlInnerHTML( | |
| 76 "<head>" | |
| 77 " <meta property='og:type' content='movie' />" | |
| 78 "</head>" | |
| 79 ); | |
| 80 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 81 | |
| 82 EXPECT_FALSE(features.openGraph); | |
| 83 } | |
| 84 | |
| 85 // This test checks element counts are correct. | |
| 86 TEST_F(DocumentStatisticsCollectorTest, CountElements) | |
| 87 { | |
| 88 setHtmlInnerHTML( | |
| 89 "<form>" | |
| 90 " <input type='text'>" | |
| 91 " <input type='password'>" | |
| 92 "</form>" | |
| 93 "<pre></pre>" | |
| 94 "<p><a> </a></p>" | |
| 95 "<ul><li><p><a> </a></p></li></ul>" | |
| 96 ); | |
| 97 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 98 | |
| 99 EXPECT_FALSE(features.openGraph); | |
| 100 | |
| 101 EXPECT_EQ(10u, features.elementCount); | |
| 102 EXPECT_EQ(2u, features.anchorCount); | |
| 103 EXPECT_EQ(1u, features.formCount); | |
| 104 EXPECT_EQ(1u, features.textInputCount); | |
| 105 EXPECT_EQ(1u, features.passwordInputCount); | |
| 106 EXPECT_EQ(2u, features.pCount); | |
| 107 EXPECT_EQ(1u, features.preCount); | |
| 108 } | |
| 109 | |
| 110 // This test checks score calculations are correct. | |
| 111 TEST_F(DocumentStatisticsCollectorTest, CountScore) | |
| 112 { | |
| 113 setHtmlInnerHTML( | |
| 114 "<p class='menu' id='article'>1</p>" // textContentLength = 1 | |
| 115 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because under li | |
| 116 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unli kelyCandidates | |
| 117 "<p>" | |
| 118 "12345678901234567890123456789012345678901234567890" | |
| 119 "12345678901234567890123456789012345678901234567890" | |
| 120 "12345678901234567890123456789012345678901234" | |
| 121 "</p>" // textContentLength = 144 | |
| 122 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped be cause invisible | |
| 123 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6 , skipped because invisible | |
| 124 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLeng th = 7, skipped because invisible | |
| 125 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped be cause invisible | |
| 126 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 | |
| 127 ); | |
| 128 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 129 | |
| 130 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); | |
| 131 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); | |
| 132 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); | |
| 133 } | |
| 134 | |
| 135 // This test checks score calculations are correct. | |
| 136 TEST_F(DocumentStatisticsCollectorTest, CountScoreSaturation) | |
| 137 { | |
| 138 StringBuilder html; | |
| 139 for (int i = 0; i < 10; i++) { | |
| 140 html.append("<p>"); | |
| 141 for (int j = 0; j < 1000; j++) { | |
| 142 html.append("0123456789"); | |
| 143 } | |
| 144 html.append("</p>"); | |
| 145 } | |
| 146 setHtmlInnerHTML( | |
| 147 html.toString() | |
| 148 ); | |
| 149 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 150 | |
| 151 double error = 1e-5; | |
| 152 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - kPara graphLengthThreshold), error); | |
| 153 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation) , error); | |
| 154 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, er ror); | |
| 155 } | |
| 156 | |
| 157 } // namespace blink | |
| OLD | NEW |