Index: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp |
diff --git a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp |
index e65edf0ee4b28723b4ce837e5a924173b82b2694..972e25761b1741373aedafb115b5928ccdd9f5bd 100644 |
--- a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp |
+++ b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp |
@@ -2,12 +2,16 @@ |
// Use of this source code is governed by a BSD-style license that can be |
// found in the LICENSE file. |
+#include "modules/document_metadata/CopylessPasteExtractor.cpp" |
#include "modules/document_metadata/CopylessPasteExtractor.h" |
#include <memory> |
+#include <string> |
#include "core/dom/Document.h" |
#include "core/dom/Element.h" |
#include "core/testing/DummyPageHolder.h" |
+#include "platform/json/JSONValues.h" |
+#include "platform/testing/URLTestHelpers.h" |
#include "testing/gtest/include/gtest/gtest.h" |
#include "wtf/text/StringBuilder.h" |
@@ -17,14 +21,7 @@ namespace { |
class CopylessPasteExtractorTest : public ::testing::Test { |
public: |
- CopylessPasteExtractorTest() |
- : m_content( |
- "\n" |
- "\n" |
- "{\"@type\": \"NewsArticle\"," |
- "\"headline\": \"Special characters for ya >_<;\"\n" |
- "}\n" |
- "\n") {} |
+ CopylessPasteExtractorTest() {} |
protected: |
void SetUp() override; |
@@ -33,10 +30,16 @@ class CopylessPasteExtractorTest : public ::testing::Test { |
Document& document() const { return m_dummyPageHolder->document(); } |
- String extract() { return CopylessPasteExtractor::extract(document()); } |
+ bool extract(WebPage* page) { |
+ return CopylessPasteExtractor::extract(document(), page); |
+ } |
void setHtmlInnerHTML(const String&); |
+ void setURL(const std::string); |
+ |
+ void setTitle(const String&); |
+ |
String m_content; |
private: |
@@ -51,56 +54,566 @@ void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { |
document().documentElement()->setInnerHTML((htmlContent)); |
} |
+void CopylessPasteExtractorTest::setURL(const std::string url) { |
+ document().setURL(URLTestHelpers::toKURL(url)); |
+} |
+ |
+void CopylessPasteExtractorTest::setTitle(const String& title) { |
+ document().setTitle(title); |
+} |
+ |
TEST_F(CopylessPasteExtractorTest, empty) { |
- String extracted = extract(); |
- String expected = "[]"; |
- EXPECT_EQ(expected, extracted); |
+ WebPage page; |
+ ASSERT_FALSE(extract(&page)); |
+ EXPECT_EQ(WebPage(), page); |
} |
TEST_F(CopylessPasteExtractorTest, basic) { |
setHtmlInnerHTML( |
"<body>" |
- "<script type=\"application/ld+json\">" + |
- m_content + |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
"</script>" |
"</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
- String extracted = extract(); |
- String expected = "[" + m_content + "]"; |
+ Property nameProperty; |
+ nameProperty.name = "name"; |
+ nameProperty.type = JSONValue::TypeString; |
+ nameProperty.strVal.push_back("Special characters for ya >_<;"); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(nameProperty); |
+ |
+ expected.entities.push_back(restaurant); |
EXPECT_EQ(expected, extracted); |
} |
TEST_F(CopylessPasteExtractorTest, header) { |
setHtmlInnerHTML( |
"<head>" |
- "<script type=\"application/ld+json\">" + |
- m_content + |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
"</script>" |
"</head>"); |
- String extracted = extract(); |
- String expected = "[" + m_content + "]"; |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property nameProperty; |
+ nameProperty.name = "name"; |
+ nameProperty.type = JSONValue::TypeString; |
+ nameProperty.strVal.push_back("Special characters for ya >_<;"); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(nameProperty); |
+ |
+ expected.entities.push_back(restaurant); |
EXPECT_EQ(expected, extracted); |
} |
TEST_F(CopylessPasteExtractorTest, multiple) { |
setHtmlInnerHTML( |
"<head>" |
- "<script type=\"application/ld+json\">" + |
- m_content + |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
"</script>" |
"</head>" |
"<body>" |
- "<script type=\"application/ld+json\">" + |
- m_content + |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property nameProperty; |
+ nameProperty.name = "name"; |
+ nameProperty.type = JSONValue::TypeString; |
+ nameProperty.strVal.push_back("Special characters for ya >_<;"); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(nameProperty); |
+ |
+ expected.entities.push_back(restaurant); |
+ expected.entities.push_back(restaurant); |
+ expected.entities.push_back(restaurant); |
+ |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, nested) { |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Ye ol greasy diner\"," |
+ "\"address\": {" |
+ "\n" |
+ " \"streetAddress\": \"123 Big Oak Road\"," |
+ " \"addressLocality\": \"San Francisco\"" |
+ " }\n" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property name; |
+ name.name = "name"; |
+ name.type = JSONValue::TypeString; |
+ name.strVal.push_back("Ye ol greasy diner"); |
+ |
+ Property streetAddress; |
+ streetAddress.name = "streetAddress"; |
+ streetAddress.type = JSONValue::TypeString; |
+ streetAddress.strVal.push_back("123 Big Oak Road"); |
+ |
+ Property addressLocality; |
+ addressLocality.name = "addressLocality"; |
+ addressLocality.type = JSONValue::TypeString; |
+ addressLocality.strVal.push_back("San Francisco"); |
+ |
+ Entity address; |
+ address.properties.push_back(streetAddress); |
+ address.properties.push_back(addressLocality); |
+ |
+ Property addressProperty; |
+ addressProperty.name = "address"; |
+ addressProperty.type = JSONValue::TypeObject; |
+ addressProperty.entityVal.push_back(address); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(name); |
+ restaurant.properties.push_back(addressProperty); |
+ |
+ expected.entities.push_back(restaurant); |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, repeated) { |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": [ \"First name\", \"Second name\"]" |
+ "}\n" |
+ "\n" |
"</script>" |
- "<script type=\"application/ld+json\">" + |
- m_content + |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property nameProperty; |
+ nameProperty.name = "name"; |
+ nameProperty.type = JSONValue::TypeString; |
+ nameProperty.strVal.push_back("First name"); |
+ nameProperty.strVal.push_back("Second name"); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(nameProperty); |
+ |
+ expected.entities.push_back(restaurant); |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, repeatedObject) { |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"Ye ol greasy diner\"," |
+ "\"address\": [" |
+ "\n" |
+ " {" |
+ " \"streetAddress\": \"123 Big Oak Road\"," |
+ " \"addressLocality\": \"San Francisco\"" |
+ " },\n" |
+ " {" |
+ " \"streetAddress\": \"123 Big Oak Road\"," |
+ " \"addressLocality\": \"San Francisco\"" |
+ " }\n" |
+ "]\n" |
+ "}\n" |
+ "\n" |
"</script>" |
"</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property name; |
+ name.name = "name"; |
+ name.type = JSONValue::TypeString; |
+ name.strVal.push_back("Ye ol greasy diner"); |
+ |
+ Property streetAddress; |
+ streetAddress.name = "streetAddress"; |
+ streetAddress.type = JSONValue::TypeString; |
+ streetAddress.strVal.push_back("123 Big Oak Road"); |
+ |
+ Property addressLocality; |
+ addressLocality.name = "addressLocality"; |
+ addressLocality.type = JSONValue::TypeString; |
+ addressLocality.strVal.push_back("San Francisco"); |
+ |
+ Entity address; |
+ address.properties.push_back(streetAddress); |
+ address.properties.push_back(addressLocality); |
+ |
+ Property addressProperty; |
+ addressProperty.name = "address"; |
+ addressProperty.type = JSONValue::TypeObject; |
+ addressProperty.entityVal.push_back(address); |
+ addressProperty.entityVal.push_back(address); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(name); |
+ restaurant.properties.push_back(addressProperty); |
+ |
+ expected.entities.push_back(restaurant); |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, truncateLongString) { |
+ String maxLengthString; |
+ for (int i = 0; i < 200; ++i) { |
+ maxLengthString.append("a"); |
+ } |
+ String tooLongString(maxLengthString); |
+ tooLongString.append("a"); |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": \"" + |
+ tooLongString + |
+ "\"" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property nameProperty; |
+ nameProperty.name = "name"; |
+ nameProperty.type = JSONValue::TypeString; |
+ nameProperty.strVal.push_back(maxLengthString); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(nameProperty); |
+ |
+ expected.entities.push_back(restaurant); |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_FALSE(extract(&extracted)); |
+ WebPage expected; |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"UnsupportedType\"," |
+ "\"name\": \"Special characters for ya >_<;\"" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_FALSE(extract(&extracted)); |
+ WebPage expected; |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { |
+ String largeRepeatedField = "["; |
+ for (int i = 0; i < 101; ++i) { |
+ largeRepeatedField.append("\"a\""); |
+ if (i != 100) { |
+ largeRepeatedField.append(", "); |
+ } |
+ } |
+ largeRepeatedField.append("]"); |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"name\": " + |
+ largeRepeatedField + |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property nameProperty; |
+ nameProperty.name = "name"; |
+ nameProperty.type = JSONValue::TypeString; |
+ for (int i = 0; i < 100; ++i) { |
+ nameProperty.strVal.push_back("a"); |
+ } |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(nameProperty); |
+ |
+ expected.entities.push_back(restaurant); |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { |
+ String tooManyFields; |
+ for (int i = 0; i < 20; ++i) { |
+ tooManyFields.append(String::format("\"%d\": \"a\"", i)); |
+ if (i != 19) { |
+ tooManyFields.append(",\n"); |
+ } |
+ } |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," + |
+ tooManyFields + |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ restaurant.properties.push_back(type); |
+ |
+ // App Indexing limits to 20 fields. One of these is the @type, so there are |
+ // 19 left. |
+ for (int i = 0; i < 19; ++i) { |
+ Property p; |
+ p.name = String::number(i); |
+ p.type = JSONValue::TypeString; |
+ p.strVal.push_back("a"); |
+ restaurant.properties.push_back(p); |
+ } |
+ |
+ expected.entities.push_back(restaurant); |
+ EXPECT_EQ(expected, extracted); |
+} |
+ |
+TEST_F(CopylessPasteExtractorTest, numbers) { |
+ setHtmlInnerHTML( |
+ "<body>" |
+ "<script type=\"application/ld+json\">" |
+ "\n" |
+ "\n" |
+ "{\"@type\": \"Restaurant\"," |
+ "\"int\": 1," |
+ "\"double\": 1.5" |
+ "}\n" |
+ "\n" |
+ "</script>" |
+ "</body>"); |
+ setURL("http://www.test.com/"); |
+ setTitle("My neat website about cool stuff"); |
+ |
+ WebPage extracted; |
+ ASSERT_TRUE(extract(&extracted)); |
+ WebPage expected; |
+ expected.url = "http://www.test.com/"; |
+ expected.title = "My neat website about cool stuff"; |
+ Entity restaurant; |
+ |
+ Property type; |
+ type.name = "@type"; |
+ type.type = JSONValue::TypeString; |
+ type.strVal.push_back("Restaurant"); |
+ |
+ Property intProperty; |
+ intProperty.name = "int"; |
+ intProperty.type = JSONValue::TypeInteger; |
+ intProperty.intVal.push_back(1); |
+ |
+ Property longProperty; |
+ longProperty.name = "double"; |
+ longProperty.type = JSONValue::TypeDouble; |
+ longProperty.doubleVal.push_back(1.5); |
+ |
+ restaurant.properties.push_back(type); |
+ restaurant.properties.push_back(intProperty); |
+ restaurant.properties.push_back(longProperty); |
- String extracted = extract(); |
- String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; |
+ expected.entities.push_back(restaurant); |
EXPECT_EQ(expected, extracted); |
} |