| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "core/dom/DocumentStatisticsCollector.h" | 5 #include "core/dom/DocumentStatisticsCollector.h" |
| 6 | 6 |
| 7 #include "core/dom/Document.h" | 7 #include "core/dom/Document.h" |
| 8 #include "core/frame/FrameView.h" | 8 #include "core/frame/FrameView.h" |
| 9 #include "core/html/HTMLHeadElement.h" | 9 #include "core/html/HTMLHeadElement.h" |
| 10 #include "core/html/HTMLLinkElement.h" | 10 #include "core/html/HTMLLinkElement.h" |
| 11 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
| 12 #include "public/platform/WebDistillability.h" | 12 #include "public/platform/WebDistillability.h" |
| 13 #include "testing/gmock/include/gmock/gmock.h" | 13 #include "testing/gmock/include/gmock/gmock.h" |
| 14 #include "testing/gtest/include/gtest/gtest.h" | 14 #include "testing/gtest/include/gtest/gtest.h" |
| 15 #include "wtf/text/StringBuilder.h" | 15 #include "wtf/text/StringBuilder.h" |
| 16 #include <memory> | 16 #include <memory> |
| 17 | 17 |
| 18 namespace blink { | 18 namespace blink { |
| 19 | 19 |
| 20 // Saturate the length of a paragraph to save time. | 20 // Saturate the length of a paragraph to save time. |
| 21 const unsigned kTextContentLengthSaturation = 1000; | 21 const unsigned kTextContentLengthSaturation = 1000; |
| 22 | 22 |
| 23 // Filter out short P elements. The threshold is set to around 2 English sentenc
es. | 23 // Filter out short P elements. The threshold is set to around 2 English |
| 24 // sentences. |
| 24 const unsigned kParagraphLengthThreshold = 140; | 25 const unsigned kParagraphLengthThreshold = 140; |
| 25 | 26 |
| 26 class DocumentStatisticsCollectorTest : public ::testing::Test { | 27 class DocumentStatisticsCollectorTest : public ::testing::Test { |
| 27 protected: | 28 protected: |
| 28 void SetUp() override; | 29 void SetUp() override; |
| 29 | 30 |
| 30 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 31 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
| 31 | 32 |
| 32 Document& document() const { return m_dummyPageHolder->document(); } | 33 Document& document() const { return m_dummyPageHolder->document(); } |
| 33 | 34 |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 93 EXPECT_EQ(1u, features.textInputCount); | 94 EXPECT_EQ(1u, features.textInputCount); |
| 94 EXPECT_EQ(1u, features.passwordInputCount); | 95 EXPECT_EQ(1u, features.passwordInputCount); |
| 95 EXPECT_EQ(2u, features.pCount); | 96 EXPECT_EQ(2u, features.pCount); |
| 96 EXPECT_EQ(1u, features.preCount); | 97 EXPECT_EQ(1u, features.preCount); |
| 97 } | 98 } |
| 98 | 99 |
| 99 // This test checks score calculations are correct. | 100 // This test checks score calculations are correct. |
| 100 TEST_F(DocumentStatisticsCollectorTest, CountScore) { | 101 TEST_F(DocumentStatisticsCollectorTest, CountScore) { |
| 101 setHtmlInnerHTML( | 102 setHtmlInnerHTML( |
| 102 "<p class='menu' id='article'>1</p>" // textContentLength = 1 | 103 "<p class='menu' id='article'>1</p>" // textContentLength = 1 |
| 103 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because u
nder li | 104 "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because |
| 104 "<p class='menu'>123</p>" // textContentLength = 3, skipped because unlik
elyCandidates | 105 // under li |
| 106 "<p class='menu'>123</p>" // textContentLength = 3, skipped because |
| 107 // unlikelyCandidates |
| 105 "<p>" | 108 "<p>" |
| 106 "12345678901234567890123456789012345678901234567890" | 109 "12345678901234567890123456789012345678901234567890" |
| 107 "12345678901234567890123456789012345678901234567890" | 110 "12345678901234567890123456789012345678901234567890" |
| 108 "12345678901234567890123456789012345678901234" | 111 "12345678901234567890123456789012345678901234" |
| 109 "</p>" // textContentLength = 144 | 112 "</p>" // textContentLength = 144 |
| 110 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped bec
ause invisible | 113 "<p style='display:none'>12345</p>" // textContentLength = 5, skipped |
| 111 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6,
skipped because invisible | 114 // because invisible |
| 112 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLengt
h = 7, skipped because invisible | 115 "<div style='display:none'><p>123456</p></div>" // textContentLength = 6, |
| 113 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped bec
ause invisible | 116 // skipped because |
| 117 // invisible |
| 118 "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLengt
h |
| 119 // = 7, skipped |
| 120 // because |
| 121 // invisible |
| 122 "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped |
| 123 // because invisible |
| 114 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 | 124 "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9 |
| 115 "<ul><li></li><p>123456789012</p></ul>" // textContentLength = 12 | 125 "<ul><li></li><p>123456789012</p></ul>" // textContentLength = 12 |
| 116 ); | 126 ); |
| 117 WebDistillabilityFeatures features = | 127 WebDistillabilityFeatures features = |
| 118 DocumentStatisticsCollector::collectStatistics(document()); | 128 DocumentStatisticsCollector::collectStatistics(document()); |
| 119 | 129 |
| 120 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); | 130 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); |
| 121 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, | 131 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, |
| 122 1 + sqrt(144) + sqrt(9) + sqrt(12)); | 132 1 + sqrt(144) + sqrt(9) + sqrt(12)); |
| 123 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9 + 12); | 133 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9 + 12); |
| (...skipping 17 matching lines...) Expand all Loading... |
| 141 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - | 151 EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - |
| 142 kParagraphLengthThreshold), | 152 kParagraphLengthThreshold), |
| 143 error); | 153 error); |
| 144 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation), | 154 EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation), |
| 145 error); | 155 error); |
| 146 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, | 156 EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, |
| 147 error); | 157 error); |
| 148 } | 158 } |
| 149 | 159 |
| 150 } // namespace blink | 160 } // namespace blink |
| OLD | NEW |