Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Unified Diff: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp

Issue 1419033004: Add feature extraction for distillability to Blink (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: address comments, remove innerText Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp
diff --git a/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..292fd3820297c066d9c348a8aa6c0d2c1f3c8f71
--- /dev/null
+++ b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp
@@ -0,0 +1,129 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "config.h"
+#include "core/dom/DocumentStatisticsCollector.h"
+
+#include "core/dom/Document.h"
+#include "core/dom/DocumentVisibilityObserver.h"
+#include "core/frame/FrameView.h"
+#include "core/html/HTMLHeadElement.h"
+#include "core/html/HTMLLinkElement.h"
+#include "core/testing/DummyPageHolder.h"
+#include "public/platform/WebDistillability.h"
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace blink {
+
+class DocumentStatisticsCollectorTest : public ::testing::Test {
+protected:
+ void SetUp() override;
+
+#if ENABLE(OILPAN)
+ void TearDown() override
+ {
+ Heap::collectAllGarbage();
+ }
+#endif
+
+ Document& document() const { return m_dummyPageHolder->document(); }
+
+ void setHtmlInnerHTML(const String&);
+
+private:
+ OwnPtr<DummyPageHolder> m_dummyPageHolder;
+};
+
+void DocumentStatisticsCollectorTest::SetUp()
+{
+ m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
+}
+
+void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const String& htmlContent)
+{
+ document().documentElement()->setInnerHTML((htmlContent), ASSERT_NO_EXCEPTION);
+ document().view()->updateAllLifecyclePhases();
+}
+
+// This test checks open graph articles can be recognized.
+TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle)
+{
+ setHtmlInnerHTML(
+ "<head>"
+ // Note the case-insensitive matching of the word "article".
+ " <meta property='og:type' content='arTiclE' />"
+ "</head>"
+ );
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
+
+ EXPECT_EQ(true, features.openGraph);
+}
+
+// This test checks non-existence of open graph articles can be recognized.
+TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle)
+{
+ setHtmlInnerHTML(
+ "<head>"
+ " <meta property='og:type' content='movie' />"
+ "</head>"
+ );
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
+
+ EXPECT_EQ(false, features.openGraph);
+}
+
+// This test checks element counts are correct.
+TEST_F(DocumentStatisticsCollectorTest, CountElements)
+{
+ setHtmlInnerHTML(
+ "<form>"
+ " <input type='text'>"
+ " <input type='password'>"
+ "</form>"
+ "<pre></pre>"
+ "<p><a> </a></p>"
+ "<ul><li><p><a> </a></p></li></ul>"
+ );
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
+
+ EXPECT_EQ(false, features.openGraph);
+
+ EXPECT_EQ(10u, features.elementCount);
+ EXPECT_EQ(2u, features.anchorCount);
+ EXPECT_EQ(1u, features.formCount);
+ EXPECT_EQ(1u, features.textInputCount);
+ EXPECT_EQ(1u, features.passwordInputCount);
+ EXPECT_EQ(2u, features.pCount);
+ EXPECT_EQ(1u, features.preCount);
+}
+
+// This test checks score calculations are correct.
+TEST_F(DocumentStatisticsCollectorTest, CountScore)
+{
+ setHtmlInnerHTML(
+ "<p class='menu' id='article'> 1 </p>" // trimmedTextContentLength = 1
+ "<ul><li><p>12</p></li></ul>" // trimmedTextContentLength = 2, skipped because under li
+ "<p class='menu'>123</p>" // trimmedTextContentLength = 3, skipped because unlikelyCandidates
+ "<p>"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234567890"
+ "12345678901234567890123456789012345678901234"
+ "</p>" // trimmedTextContentLength = 144
+ "<p style='display:none'>12345</p>" // trimmedTextContentLength = 5, skipped because invisible
+ "<div style='display:none'><p>123456</p></div>" // trimmedTextContentLength = 6, skipped because invisible
+ "<div style='visibility:hidden'><p>1234567</p></div>" // trimmedTextContentLength = 7, skipped because invisible
+ "<p style='opacity:0'>12345678</p>" // trimmedTextContentLength = 8, skipped because invisible
+ "<p> <a href='#'> 12345 </a> 9 <b> </b> </p>" // trimmedTextContentLength = 9
+ );
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document());
+
+ const unsigned kParagraphLengthThreshold = 140;
+
+ EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold));
+ EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9));
+ EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9);
+}
+
+} // namespace blink

Powered by Google App Engine
This is Rietveld 408576698