Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 /* | |
| 2 * Copyright (c) 2015, Google Inc. All rights reserved. | |
| 3 * | |
| 4 * Redistribution and use in source and binary forms, with or without | |
| 5 * modification, are permitted provided that the following conditions are | |
|
esprehn
2015/10/26 21:43:09
Use the modern short copyright.
wychen
2015/10/27 23:52:12
Done.
| |
| 6 * met: | |
| 7 * | |
| 8 * * Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * * Redistributions in binary form must reproduce the above | |
| 11 * copyright notice, this list of conditions and the following disclaimer | |
| 12 * in the documentation and/or other materials provided with the | |
| 13 * distribution. | |
| 14 * * Neither the name of Google Inc. nor the names of its | |
| 15 * contributors may be used to endorse or promote products derived from | |
| 16 * this software without specific prior written permission. | |
| 17 * | |
| 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
| 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
| 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
| 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
| 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
| 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
| 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
| 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
| 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 #include "config.h" | |
| 32 #include "core/dom/DocumentStatisticsCollector.h" | |
| 33 | |
| 34 #include "core/dom/Document.h" | |
| 35 #include "core/dom/DocumentVisibilityObserver.h" | |
| 36 #include "core/frame/FrameView.h" | |
| 37 #include "core/html/HTMLHeadElement.h" | |
| 38 #include "core/html/HTMLLinkElement.h" | |
| 39 #include "core/testing/DummyPageHolder.h" | |
| 40 #include "public/platform/WebDistillability.h" | |
| 41 #include <gmock/gmock.h> | |
| 42 #include <gtest/gtest.h> | |
| 43 | |
| 44 namespace blink { | |
| 45 | |
| 46 class DocumentStatisticsCollectorTest : public ::testing::Test { | |
| 47 protected: | |
| 48 void SetUp() override; | |
| 49 | |
| 50 #if ENABLE(OILPAN) | |
| 51 void TearDown() override | |
| 52 { | |
| 53 Heap::collectAllGarbage(); | |
| 54 } | |
| 55 #endif | |
| 56 | |
| 57 Document& document() const { return m_dummyPageHolder->document(); } | |
| 58 | |
| 59 void setHtmlInnerHTML(const char*); | |
|
esprehn
2015/10/26 21:43:09
const String&
wychen
2015/10/27 23:52:12
Done.
| |
| 60 | |
| 61 private: | |
| 62 OwnPtr<DummyPageHolder> m_dummyPageHolder; | |
| 63 }; | |
| 64 | |
| 65 void DocumentStatisticsCollectorTest::SetUp() | |
| 66 { | |
| 67 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | |
| 68 } | |
| 69 | |
| 70 void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const char* htmlContent) | |
| 71 { | |
| 72 document().documentElement()->setInnerHTML(String::fromUTF8(htmlContent), AS SERT_NO_EXCEPTION); | |
|
esprehn
2015/10/26 21:43:09
from fromtUTF8
wychen
2015/10/27 23:52:12
I'm not quite sure I understand this comment. For
| |
| 73 document().view()->updateAllLifecyclePhases(); | |
|
esprehn
2015/10/26 21:43:09
remove this, you don't need it.
wychen
2015/10/27 23:52:12
Without this line, there's an assertion error:
ASS
| |
| 74 } | |
| 75 | |
| 76 // This test checks open graph articles can be recognized. | |
| 77 TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) | |
| 78 { | |
| 79 setHtmlInnerHTML( | |
| 80 "<head>" | |
| 81 // Note the case-insensitive matching of the word "article". | |
| 82 " <meta property='og:type' content='arTiclE' />" | |
| 83 "</head>" | |
| 84 ); | |
| 85 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 86 | |
| 87 EXPECT_EQ(true, features.openGraph); | |
| 88 } | |
| 89 | |
| 90 // This test checks non-existence of open graph articles can be recognized. | |
| 91 TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) | |
| 92 { | |
| 93 setHtmlInnerHTML( | |
| 94 "<head>" | |
| 95 " <meta property='og:type' content='movie' />" | |
| 96 "</head>" | |
| 97 ); | |
| 98 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 99 | |
| 100 EXPECT_EQ(false, features.openGraph); | |
| 101 } | |
| 102 | |
| 103 // This test checks element counts are correct. | |
| 104 TEST_F(DocumentStatisticsCollectorTest, CountElements) | |
| 105 { | |
| 106 setHtmlInnerHTML( | |
| 107 "<form>" | |
| 108 " <input type='text'>" | |
| 109 " <input type='password'>" | |
| 110 "</form>" | |
| 111 "<pre></pre>" | |
| 112 "<p><a> </a></p>" | |
| 113 "<ul><li><p><a> </a></p></li></ul>" | |
| 114 ); | |
| 115 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 116 | |
| 117 EXPECT_EQ(false, features.openGraph); | |
| 118 | |
| 119 EXPECT_EQ(10u, features.elementCount); | |
| 120 EXPECT_EQ(2u, features.anchorCount); | |
| 121 EXPECT_EQ(1u, features.formCount); | |
| 122 EXPECT_EQ(1u, features.textInputCount); | |
| 123 EXPECT_EQ(1u, features.passwordInputCount); | |
| 124 EXPECT_EQ(2u, features.pCount); | |
| 125 EXPECT_EQ(1u, features.preCount); | |
| 126 } | |
| 127 | |
| 128 // This test checks score calculations are correct. | |
| 129 TEST_F(DocumentStatisticsCollectorTest, CountScore) | |
| 130 { | |
| 131 setHtmlInnerHTML( | |
| 132 "<p class='menu' id='article'> 1 </p>" // trimmedTextContentLength = 1 | |
| 133 "<ul><li><p>12</p></li></ul>" // trimmedTextContentLength = 2, skipped b ecause under li | |
| 134 "<p class='menu'>123</p>" // trimmedTextContentLength = 3, skipped becau se unlikelyCandidates | |
| 135 "<p>" | |
| 136 "12345678901234567890123456789012345678901234567890" | |
| 137 "12345678901234567890123456789012345678901234567890" | |
| 138 "12345678901234567890123456789012345678901234" | |
| 139 "</p>" // trimmedTextContentLength = 144 | |
| 140 "<p style='display:none'>12345</p>" // trimmedTextContentLength = 5, ski pped because invisible | |
| 141 "<div style='visibility:hidden'><p>123456</p></div>" // trimmedTextConte ntLength = 6, skipped because invisible | |
| 142 "<p style='opacity:0'>1234567</p>" // trimmedTextContentLength = 7, skip ped because invisible | |
| 143 "<p> <a href='#'> 12345 </a> 9 <b> </b> </p>" // trimmedTextContentLen gth = 9 | |
| 144 ); | |
| 145 WebDistillabilityFeatures features = DocumentStatisticsCollector::collectSta tistics(document()); | |
| 146 | |
| 147 const unsigned kParagraphLengthThreshold = 140; | |
| 148 | |
| 149 EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); | |
| 150 EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); | |
| 151 EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); | |
| 152 } | |
| 153 | |
| 154 } // namespace blink | |
| OLD | NEW |