| Index: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp
 | 
| diff --git a/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..d3b5de685d3a78308130aafa89723ec204f448a1
 | 
| --- /dev/null
 | 
| +++ b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp
 | 
| @@ -0,0 +1,157 @@
 | 
| +// Copyright 2015 The Chromium Authors. All rights reserved.
 | 
| +// Use of this source code is governed by a BSD-style license that can be
 | 
| +// found in the LICENSE file.
 | 
| +
 | 
| +#include "config.h"
 | 
| +#include "core/dom/DocumentStatisticsCollector.h"
 | 
| +
 | 
| +#include "core/dom/Document.h"
 | 
| +#include "core/dom/DocumentVisibilityObserver.h"
 | 
| +#include "core/frame/FrameView.h"
 | 
| +#include "core/html/HTMLHeadElement.h"
 | 
| +#include "core/html/HTMLLinkElement.h"
 | 
| +#include "core/testing/DummyPageHolder.h"
 | 
| +#include "public/platform/WebDistillability.h"
 | 
| +#include "wtf/text/StringBuilder.h"
 | 
| +
 | 
| +#include <gmock/gmock.h>
 | 
| +#include <gtest/gtest.h>
 | 
| +
 | 
| +namespace blink {
 | 
| +
 | 
| +// Saturate the length of a paragraph to save time.
 | 
| +const unsigned kTextContentLengthSaturation = 1000;
 | 
| +
 | 
| +// Filter out short P elements. The threshold is set to around 2 English sentences.
 | 
| +const unsigned kParagraphLengthThreshold = 140;
 | 
| +
 | 
| +class DocumentStatisticsCollectorTest : public ::testing::Test {
 | 
| +protected:
 | 
| +    void SetUp() override;
 | 
| +
 | 
| +#if ENABLE(OILPAN)
 | 
| +    void TearDown() override
 | 
| +    {
 | 
| +        Heap::collectAllGarbage();
 | 
| +    }
 | 
| +#endif
 | 
| +
 | 
| +    Document& document() const { return m_dummyPageHolder->document(); }
 | 
| +
 | 
| +    void setHtmlInnerHTML(const String&);
 | 
| +
 | 
| +private:
 | 
| +    OwnPtr<DummyPageHolder> m_dummyPageHolder;
 | 
| +};
 | 
| +
 | 
| +void DocumentStatisticsCollectorTest::SetUp()
 | 
| +{
 | 
| +    m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
 | 
| +}
 | 
| +
 | 
| +void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent)
 | 
| +{
 | 
| +    document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTION);
 | 
| +    document().view()->updateAllLifecyclePhases();
 | 
| +}
 | 
| +
 | 
| +// This test checks open graph articles can be recognized.
 | 
| +TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle)
 | 
| +{
 | 
| +    setHtmlInnerHTML(
 | 
| +        "<head>"
 | 
| +        // Note the case-insensitive matching of the word "article".
 | 
| +        "    <meta property='og:type' content='arTiclE' />"
 | 
| +        "</head>"
 | 
| +    );
 | 
| +    WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
 | 
| +
 | 
| +    EXPECT_TRUE(features.openGraph);
 | 
| +}
 | 
| +
 | 
| +// This test checks non-existence of open graph articles can be recognized.
 | 
| +TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle)
 | 
| +{
 | 
| +    setHtmlInnerHTML(
 | 
| +        "<head>"
 | 
| +        "    <meta property='og:type' content='movie' />"
 | 
| +        "</head>"
 | 
| +    );
 | 
| +    WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
 | 
| +
 | 
| +    EXPECT_FALSE(features.openGraph);
 | 
| +}
 | 
| +
 | 
| +// This test checks element counts are correct.
 | 
| +TEST_F(DocumentStatisticsCollectorTest, CountElements)
 | 
| +{
 | 
| +    setHtmlInnerHTML(
 | 
| +        "<form>"
 | 
| +        "    <input type='text'>"
 | 
| +        "    <input type='password'>"
 | 
| +        "</form>"
 | 
| +        "<pre></pre>"
 | 
| +        "<p><a>    </a></p>"
 | 
| +        "<ul><li><p><a>    </a></p></li></ul>"
 | 
| +    );
 | 
| +    WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
 | 
| +
 | 
| +    EXPECT_FALSE(features.openGraph);
 | 
| +
 | 
| +    EXPECT_EQ(10u, features.elementCount);
 | 
| +    EXPECT_EQ(2u, features.anchorCount);
 | 
| +    EXPECT_EQ(1u, features.formCount);
 | 
| +    EXPECT_EQ(1u, features.textInputCount);
 | 
| +    EXPECT_EQ(1u, features.passwordInputCount);
 | 
| +    EXPECT_EQ(2u, features.pCount);
 | 
| +    EXPECT_EQ(1u, features.preCount);
 | 
| +}
 | 
| +
 | 
| +// This test checks score calculations are correct.
 | 
| +TEST_F(DocumentStatisticsCollectorTest, CountScore)
 | 
| +{
 | 
| +    setHtmlInnerHTML(
 | 
| +        "<p class='menu' id='article'>1</p>" // textContentLength = 1
 | 
| +        "<ul><li><p>12</p></li></ul>" // textContentLength = 2, skipped because under li
 | 
| +        "<p class='menu'>123</p>" // textContentLength = 3, skipped because unlikelyCandidates
 | 
| +        "<p>"
 | 
| +            "12345678901234567890123456789012345678901234567890"
 | 
| +            "12345678901234567890123456789012345678901234567890"
 | 
| +            "12345678901234567890123456789012345678901234"
 | 
| +        "</p>" // textContentLength = 144
 | 
| +        "<p style='display:none'>12345</p>" // textContentLength = 5, skipped because invisible
 | 
| +        "<div style='display:none'><p>123456</p></div>" // textContentLength = 6, skipped because invisible
 | 
| +        "<div style='visibility:hidden'><p>1234567</p></div>" // textContentLength = 7, skipped because invisible
 | 
| +        "<p style='opacity:0'>12345678</p>" // textContentLength = 8, skipped because invisible
 | 
| +        "<p><a href='#'>1234 </a>6 <b> 9</b></p>" // textContentLength = 9
 | 
| +    );
 | 
| +    WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
 | 
| +
 | 
| +    EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold));
 | 
| +    EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9));
 | 
| +    EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9);
 | 
| +}
 | 
| +
 | 
| +// This test checks score calculations are correct.
 | 
| +TEST_F(DocumentStatisticsCollectorTest, CountScoreSaturation)
 | 
| +{
 | 
| +    StringBuilder html;
 | 
| +    for (int i = 0; i < 10; i++) {
 | 
| +        html.append("<p>");
 | 
| +        for (int j = 0; j < 1000; j++) {
 | 
| +            html.append("0123456789");
 | 
| +        }
 | 
| +        html.append("</p>");
 | 
| +    }
 | 
| +    setHtmlInnerHTML(
 | 
| +        html.toString()
 | 
| +    );
 | 
| +    WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
 | 
| +
 | 
| +    double error = 1e-5;
 | 
| +    EXPECT_NEAR(features.mozScore, 6 * sqrt(kTextContentLengthSaturation - kParagraphLengthThreshold), error);
 | 
| +    EXPECT_NEAR(features.mozScoreAllSqrt, 6 * sqrt(kTextContentLengthSaturation), error);
 | 
| +    EXPECT_NEAR(features.mozScoreAllLinear, 6 * kTextContentLengthSaturation, error);
 | 
| +}
 | 
| +
 | 
| +} // namespace blink
 | 
| 
 |