Index: third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp |
diff --git a/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp |
new file mode 100644 |
index 0000000000000000000000000000000000000000..986f6bd1ff4a2f357c234b2a1256d56495352dfd |
--- /dev/null |
+++ b/third_party/WebKit/Source/core/dom/DocumentStatisticsCollectorTest.cpp |
@@ -0,0 +1,154 @@ |
+/* |
+ * Copyright (c) 2015, Google Inc. All rights reserved. |
+ * |
+ * Redistribution and use in source and binary forms, with or without |
+ * modification, are permitted provided that the following conditions are |
esprehn
2015/10/26 21:43:09
Use the modern short copyright.
wychen
2015/10/27 23:52:12
Done.
|
+ * met: |
+ * |
+ * * Redistributions of source code must retain the above copyright |
+ * notice, this list of conditions and the following disclaimer. |
+ * * Redistributions in binary form must reproduce the above |
+ * copyright notice, this list of conditions and the following disclaimer |
+ * in the documentation and/or other materials provided with the |
+ * distribution. |
+ * * Neither the name of Google Inc. nor the names of its |
+ * contributors may be used to endorse or promote products derived from |
+ * this software without specific prior written permission. |
+ * |
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+ */ |
+ |
+#include "config.h" |
+#include "core/dom/DocumentStatisticsCollector.h" |
+ |
+#include "core/dom/Document.h" |
+#include "core/dom/DocumentVisibilityObserver.h" |
+#include "core/frame/FrameView.h" |
+#include "core/html/HTMLHeadElement.h" |
+#include "core/html/HTMLLinkElement.h" |
+#include "core/testing/DummyPageHolder.h" |
+#include "public/platform/WebDistillability.h" |
+#include <gmock/gmock.h> |
+#include <gtest/gtest.h> |
+ |
+namespace blink { |
+ |
+class DocumentStatisticsCollectorTest : public ::testing::Test { |
+protected: |
+ void SetUp() override; |
+ |
+#if ENABLE(OILPAN) |
+ void TearDown() override |
+ { |
+ Heap::collectAllGarbage(); |
+ } |
+#endif |
+ |
+ Document& document() const { return m_dummyPageHolder->document(); } |
+ |
+ void setHtmlInnerHTML(const char*); |
esprehn
2015/10/26 21:43:09
const String&
wychen
2015/10/27 23:52:12
Done.
|
+ |
+private: |
+ OwnPtr<DummyPageHolder> m_dummyPageHolder; |
+}; |
+ |
+void DocumentStatisticsCollectorTest::SetUp() |
+{ |
+ m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
+} |
+ |
+void DocumentStatisticsCollectorTest::setHtmlInnerHTML(const char* htmlContent) |
+{ |
+ document().documentElement()->setInnerHTML(String::fromUTF8(htmlContent), ASSERT_NO_EXCEPTION); |
esprehn
2015/10/26 21:43:09
from fromtUTF8
wychen
2015/10/27 23:52:12
I'm not quite sure I understand this comment. For
|
+ document().view()->updateAllLifecyclePhases(); |
esprehn
2015/10/26 21:43:09
remove this, you don't need it.
wychen
2015/10/27 23:52:12
Without this line, there's an assertion error:
ASS
|
+} |
+ |
+// This test checks open graph articles can be recognized. |
+TEST_F(DocumentStatisticsCollectorTest, HasOpenGraphArticle) |
+{ |
+ setHtmlInnerHTML( |
+ "<head>" |
+ // Note the case-insensitive matching of the word "article". |
+ " <meta property='og:type' content='arTiclE' />" |
+ "</head>" |
+ ); |
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
+ |
+ EXPECT_EQ(true, features.openGraph); |
+} |
+ |
+// This test checks non-existence of open graph articles can be recognized. |
+TEST_F(DocumentStatisticsCollectorTest, NoOpenGraphArticle) |
+{ |
+ setHtmlInnerHTML( |
+ "<head>" |
+ " <meta property='og:type' content='movie' />" |
+ "</head>" |
+ ); |
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
+ |
+ EXPECT_EQ(false, features.openGraph); |
+} |
+ |
+// This test checks element counts are correct. |
+TEST_F(DocumentStatisticsCollectorTest, CountElements) |
+{ |
+ setHtmlInnerHTML( |
+ "<form>" |
+ " <input type='text'>" |
+ " <input type='password'>" |
+ "</form>" |
+ "<pre></pre>" |
+ "<p><a> </a></p>" |
+ "<ul><li><p><a> </a></p></li></ul>" |
+ ); |
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
+ |
+ EXPECT_EQ(false, features.openGraph); |
+ |
+ EXPECT_EQ(10u, features.elementCount); |
+ EXPECT_EQ(2u, features.anchorCount); |
+ EXPECT_EQ(1u, features.formCount); |
+ EXPECT_EQ(1u, features.textInputCount); |
+ EXPECT_EQ(1u, features.passwordInputCount); |
+ EXPECT_EQ(2u, features.pCount); |
+ EXPECT_EQ(1u, features.preCount); |
+} |
+ |
+// This test checks score calculations are correct. |
+TEST_F(DocumentStatisticsCollectorTest, CountScore) |
+{ |
+ setHtmlInnerHTML( |
+ "<p class='menu' id='article'> 1 </p>" // trimmedTextContentLength = 1 |
+ "<ul><li><p>12</p></li></ul>" // trimmedTextContentLength = 2, skipped because under li |
+ "<p class='menu'>123</p>" // trimmedTextContentLength = 3, skipped because unlikelyCandidates |
+ "<p>" |
+ "12345678901234567890123456789012345678901234567890" |
+ "12345678901234567890123456789012345678901234567890" |
+ "12345678901234567890123456789012345678901234" |
+ "</p>" // trimmedTextContentLength = 144 |
+ "<p style='display:none'>12345</p>" // trimmedTextContentLength = 5, skipped because invisible |
+ "<div style='visibility:hidden'><p>123456</p></div>" // trimmedTextContentLength = 6, skipped because invisible |
+ "<p style='opacity:0'>1234567</p>" // trimmedTextContentLength = 7, skipped because invisible |
+ "<p> <a href='#'> 12345 </a> 9 <b> </b> </p>" // trimmedTextContentLength = 9 |
+ ); |
+ WebDistillabilityFeatures features = DocumentStatisticsCollector::collectStatistics(document()); |
+ |
+ const unsigned kParagraphLengthThreshold = 140; |
+ |
+ EXPECT_DOUBLE_EQ(features.mozScore, sqrt(144 - kParagraphLengthThreshold)); |
+ EXPECT_DOUBLE_EQ(features.mozScoreAllSqrt, 1 + sqrt(144) + sqrt(9)); |
+ EXPECT_DOUBLE_EQ(features.mozScoreAllLinear, 1 + 144 + 9); |
+} |
+ |
+} // namespace blink |