Chromium Code Reviews| Index: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp |
| diff --git a/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..986f6bd1ff4a2f357c234b2a1256d56495352dfd |
| --- /dev/null |
| +++ b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp |
| @@ -0,0 +1,154 @@ |
| +/* |
| + * Copyright (c) 2015, Google Inc. All rights reserved. |
| + * |
| + * Redistribution and use in source and binary forms, with or without |
| + * modification, are permitted provided that the following conditions are |
|
esprehn
2015/10/26 21:43:09
Use the modern short copyright.
wychen
2015/10/27 23:52:12
Done.
|
| + * met: |
| + * |
| + * * Redistributions of source code must retain the above copyright |
| + * notice, this list of conditions and the following disclaimer. |
| + * * Redistributions in binary form must reproduce the above |
| + * copyright notice, this list of conditions and the following disclaimer |
| + * in the documentation and/or other materials provided with the |
| + * distribution. |
| + * * Neither the name of Google Inc. nor the names of its |
| + * contributors may be used to endorse or promote products derived from |
| + * this software without specific prior written permission. |
| + * |
| + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| + */ |
| + |
| +#include "config.h" |
| +#include "core/dom/DocumentStatisticsCollector.h" |
| + |
| +#include "core/dom/Document.h" |
| +#include "core/dom/DocumentVisibilityObserver.h" |
| +#include "core/frame/FrameView.h" |
| +#include "core/html/HTMLHeadElement.h" |
| +#include "core/html/HTMLLinkElement.h" |
| +#include "core/testing/DummyPageHolder.h" |
| +#include "public/platform/WebDistillability.h" |
| +#include <gmock/gmock.h> |
| +#include <gtest/gtest.h> |
| + |
| +namespace blink { |
| + |
| +class DocumentStatisticsCollectorTest : public ::testing::Test { |
| +protected: |
| + void SetUp() override; |
| + |
| +#if ENABLE(OILPAN) |
| + void TearDown() override |
| + { |
| + Heap::collectAllGarbage(); |
| + } |
| +#endif |
| + |
| + Document& document() const { return m_dummyPageHolder->document(); } |
| + |
| + void setHtmlInnerHTML(const char*); |
|
esprehn
2015/10/26 21:43:09
const String&
wychen
2015/10/27 23:52:12
Done.
|
| + |
| +private: |
| + OwnPtr<DummyPageHolder> m_dummyPageHolder; |
| +}; |
| + |
| +void DocumentStatisticsCollectorTest::SetUp() |
| +{ |
| + m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
| +} |
| + |
| +void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const char* htmlContent) |
| +{ |
| + document().documentElement()->setInnerHTML(String::fromUTF8(htmlContent), ASSERT_NO_EXCEPTION); |
|
esprehn
2015/10/26 21:43:09
from fromtUTF8
wychen
2015/10/27 23:52:12
I'm not quite sure I understand this comment. For
|
| + document().view()->updateAllLifecyclePhases(); |
|
esprehn
2015/10/26 21:43:09
remove this, you don't need it.
wychen
2015/10/27 23:52:12
Without this line, there's an assertion error:
ASS
|
| +} |
| + |
| +// This test checks open graph articles can be recognized. |
| +TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) |
| +{ |
| + setHtmlInnerHTML( |
| + "<head>" |
| + // Note the case-insensitive matching of the word "article". |
| + " <meta property='og:type' content='arTiclE' />" |
| + "</head>" |
| + ); |
| + WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
| + |
| + EXPECT_EQ(true, features.openGraph); |
| +} |
| + |
| +// This test checks non-existence of open graph articles can be recognized. |
| +TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) |
| +{ |
| + setHtmlInnerHTML( |
| + "<head>" |
| + " <meta property='og:type' content='movie' />" |
| + "</head>" |
| + ); |
| + WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
| + |
| + EXPECT_EQ(false, features.openGraph); |
| +} |
| + |
| +// This test checks element counts are correct. |
| +TEST_F(DocumentStatisticsCollectorTest, CountElements) |
| +{ |
| + setHtmlInnerHTML( |
| + "<form>" |
| + " <input type='text'>" |
| + " <input type='password'>" |
| + "</form>" |
| + "<pre></pre>" |
| + "<p><a> </a></p>" |
| + "<ul><li><p><a> </a></p></li></ul>" |
| + ); |
| + WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
| + |
| + EXPECT_EQ(false, features.openGraph); |
| + |
| + EXPECT_EQ(10u, features.elementCount); |
| + EXPECT_EQ(2u, features.anchorCount); |
| + EXPECT_EQ(1u, features.formCount); |
| + EXPECT_EQ(1u, features.textInputCount); |
| + EXPECT_EQ(1u, features.passwordInputCount); |
| + EXPECT_EQ(2u, features.pCount); |
| + EXPECT_EQ(1u, features.preCount); |
| +} |
| + |
| +// This test checks score calculations are correct. |
| +TEST_F(DocumentStatisticsCollectorTest, CountScore) |
| +{ |
| + setHtmlInnerHTML( |
| + "<p class='menu' id='article'> 1 </p>" // trimmedTextContentLength = 1 |
| + "<ul><li><p>12</p></li></ul>" // trimmedTextContentLength = 2, skipped because under li |
| + "<p class='menu'>123</p>" // trimmedTextContentLength = 3, skipped because unlikelyCandidates |
| + "<p>" |
| + "12345678901234567890123456789012345678901234567890" |
| + "12345678901234567890123456789012345678901234567890" |
| + "12345678901234567890123456789012345678901234" |
| + "</p>" // trimmedTextContentLength = 144 |
| + "<p style='display:none'>12345</p>" // trimmedTextContentLength = 5, skipped because invisible |
| + "<div style='visibility:hidden'><p>123456</p></div>" // trimmedTextContentLength = 6, skipped because invisible |
| + "<p style='opacity:0'>1234567</p>" // trimmedTextContentLength = 7, skipped because invisible |
| + "<p> <a href='#'> 12345 </a> 9 <b> </b> </p>" // trimmedTextContentLength = 9 |
| + ); |
| + WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
| + |
| + const unsigned kParagraphLengthThreshold = 140; |
| + |
| + EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); |
| + EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); |
| + EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); |
| +} |
| + |
| +} // namespace blink |