Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(245)

Unified Diff: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp

Issue 2777623002: Move json-ld parsing to Blink.
Patch Set: update policy enforcement in blink, clank handling of repeated values Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
diff --git a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
index e65edf0ee4b28723b4ce837e5a924173b82b2694..972e25761b1741373aedafb115b5928ccdd9f5bd 100644
--- a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
+++ b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
@@ -2,12 +2,16 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
+#include "modules/document_metadata/CopylessPasteExtractor.cpp"
#include "modules/document_metadata/CopylessPasteExtractor.h"
#include <memory>
+#include <string>
#include "core/dom/Document.h"
#include "core/dom/Element.h"
#include "core/testing/DummyPageHolder.h"
+#include "platform/json/JSONValues.h"
+#include "platform/testing/URLTestHelpers.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "wtf/text/StringBuilder.h"
@@ -17,14 +21,7 @@ namespace {
class CopylessPasteExtractorTest : public ::testing::Test {
public:
- CopylessPasteExtractorTest()
- : m_content(
- "\n"
- "\n"
- "{\"@type\": \"NewsArticle\","
- "\"headline\": \"Special characters for ya >_<;\"\n"
- "}\n"
- "\n") {}
+ CopylessPasteExtractorTest() {}
protected:
void SetUp() override;
@@ -33,10 +30,16 @@ class CopylessPasteExtractorTest : public ::testing::Test {
Document& document() const { return m_dummyPageHolder->document(); }
- String extract() { return CopylessPasteExtractor::extract(document()); }
+ bool extract(WebPage* page) {
+ return CopylessPasteExtractor::extract(document(), page);
+ }
void setHtmlInnerHTML(const String&);
+ void setURL(const std::string);
+
+ void setTitle(const String&);
+
String m_content;
private:
@@ -51,56 +54,566 @@ void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) {
document().documentElement()->setInnerHTML((htmlContent));
}
+void CopylessPasteExtractorTest::setURL(const std::string url) {
+ document().setURL(URLTestHelpers::toKURL(url));
+}
+
+void CopylessPasteExtractorTest::setTitle(const String& title) {
+ document().setTitle(title);
+}
+
TEST_F(CopylessPasteExtractorTest, empty) {
- String extracted = extract();
- String expected = "[]";
- EXPECT_EQ(expected, extracted);
+ WebPage page;
+ ASSERT_FALSE(extract(&page));
+ EXPECT_EQ(WebPage(), page);
}
TEST_F(CopylessPasteExtractorTest, basic) {
setHtmlInnerHTML(
"<body>"
- "<script type=\"application/ld+json\">" +
- m_content +
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
"</script>"
"</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
- String extracted = extract();
- String expected = "[" + m_content + "]";
+ Property nameProperty;
+ nameProperty.name = "name";
+ nameProperty.type = JSONValue::TypeString;
+ nameProperty.strVal.push_back("Special characters for ya >_<;");
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(nameProperty);
+
+ expected.entities.push_back(restaurant);
EXPECT_EQ(expected, extracted);
}
TEST_F(CopylessPasteExtractorTest, header) {
setHtmlInnerHTML(
"<head>"
- "<script type=\"application/ld+json\">" +
- m_content +
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
"</script>"
"</head>");
- String extracted = extract();
- String expected = "[" + m_content + "]";
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property nameProperty;
+ nameProperty.name = "name";
+ nameProperty.type = JSONValue::TypeString;
+ nameProperty.strVal.push_back("Special characters for ya >_<;");
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(nameProperty);
+
+ expected.entities.push_back(restaurant);
EXPECT_EQ(expected, extracted);
}
TEST_F(CopylessPasteExtractorTest, multiple) {
setHtmlInnerHTML(
"<head>"
- "<script type=\"application/ld+json\">" +
- m_content +
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
"</script>"
"</head>"
"<body>"
- "<script type=\"application/ld+json\">" +
- m_content +
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
+ "</script>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property nameProperty;
+ nameProperty.name = "name";
+ nameProperty.type = JSONValue::TypeString;
+ nameProperty.strVal.push_back("Special characters for ya >_<;");
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(nameProperty);
+
+ expected.entities.push_back(restaurant);
+ expected.entities.push_back(restaurant);
+ expected.entities.push_back(restaurant);
+
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, nested) {
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Ye ol greasy diner\","
+ "\"address\": {"
+ "\n"
+ " \"streetAddress\": \"123 Big Oak Road\","
+ " \"addressLocality\": \"San Francisco\""
+ " }\n"
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property name;
+ name.name = "name";
+ name.type = JSONValue::TypeString;
+ name.strVal.push_back("Ye ol greasy diner");
+
+ Property streetAddress;
+ streetAddress.name = "streetAddress";
+ streetAddress.type = JSONValue::TypeString;
+ streetAddress.strVal.push_back("123 Big Oak Road");
+
+ Property addressLocality;
+ addressLocality.name = "addressLocality";
+ addressLocality.type = JSONValue::TypeString;
+ addressLocality.strVal.push_back("San Francisco");
+
+ Entity address;
+ address.properties.push_back(streetAddress);
+ address.properties.push_back(addressLocality);
+
+ Property addressProperty;
+ addressProperty.name = "address";
+ addressProperty.type = JSONValue::TypeObject;
+ addressProperty.entityVal.push_back(address);
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(name);
+ restaurant.properties.push_back(addressProperty);
+
+ expected.entities.push_back(restaurant);
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, repeated) {
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": [ \"First name\", \"Second name\"]"
+ "}\n"
+ "\n"
"</script>"
- "<script type=\"application/ld+json\">" +
- m_content +
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property nameProperty;
+ nameProperty.name = "name";
+ nameProperty.type = JSONValue::TypeString;
+ nameProperty.strVal.push_back("First name");
+ nameProperty.strVal.push_back("Second name");
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(nameProperty);
+
+ expected.entities.push_back(restaurant);
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, repeatedObject) {
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"Ye ol greasy diner\","
+ "\"address\": ["
+ "\n"
+ " {"
+ " \"streetAddress\": \"123 Big Oak Road\","
+ " \"addressLocality\": \"San Francisco\""
+ " },\n"
+ " {"
+ " \"streetAddress\": \"123 Big Oak Road\","
+ " \"addressLocality\": \"San Francisco\""
+ " }\n"
+ "]\n"
+ "}\n"
+ "\n"
"</script>"
"</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property name;
+ name.name = "name";
+ name.type = JSONValue::TypeString;
+ name.strVal.push_back("Ye ol greasy diner");
+
+ Property streetAddress;
+ streetAddress.name = "streetAddress";
+ streetAddress.type = JSONValue::TypeString;
+ streetAddress.strVal.push_back("123 Big Oak Road");
+
+ Property addressLocality;
+ addressLocality.name = "addressLocality";
+ addressLocality.type = JSONValue::TypeString;
+ addressLocality.strVal.push_back("San Francisco");
+
+ Entity address;
+ address.properties.push_back(streetAddress);
+ address.properties.push_back(addressLocality);
+
+ Property addressProperty;
+ addressProperty.name = "address";
+ addressProperty.type = JSONValue::TypeObject;
+ addressProperty.entityVal.push_back(address);
+ addressProperty.entityVal.push_back(address);
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(name);
+ restaurant.properties.push_back(addressProperty);
+
+ expected.entities.push_back(restaurant);
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, truncateLongString) {
+ String maxLengthString;
+ for (int i = 0; i < 200; ++i) {
+ maxLengthString.append("a");
+ }
+ String tooLongString(maxLengthString);
+ tooLongString.append("a");
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": \"" +
+ tooLongString +
+ "\""
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property nameProperty;
+ nameProperty.name = "name";
+ nameProperty.type = JSONValue::TypeString;
+ nameProperty.strVal.push_back(maxLengthString);
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(nameProperty);
+
+ expected.entities.push_back(restaurant);
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, enforceTypeExists) {
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_FALSE(extract(&extracted));
+ WebPage expected;
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) {
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"UnsupportedType\","
+ "\"name\": \"Special characters for ya >_<;\""
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_FALSE(extract(&extracted));
+ WebPage expected;
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) {
+ String largeRepeatedField = "[";
+ for (int i = 0; i < 101; ++i) {
+ largeRepeatedField.append("\"a\"");
+ if (i != 100) {
+ largeRepeatedField.append(", ");
+ }
+ }
+ largeRepeatedField.append("]");
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"name\": " +
+ largeRepeatedField +
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property nameProperty;
+ nameProperty.name = "name";
+ nameProperty.type = JSONValue::TypeString;
+ for (int i = 0; i < 100; ++i) {
+ nameProperty.strVal.push_back("a");
+ }
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(nameProperty);
+
+ expected.entities.push_back(restaurant);
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) {
+ String tooManyFields;
+ for (int i = 0; i < 20; ++i) {
+ tooManyFields.append(String::format("\"%d\": \"a\"", i));
+ if (i != 19) {
+ tooManyFields.append(",\n");
+ }
+ }
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\"," +
+ tooManyFields +
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ restaurant.properties.push_back(type);
+
+ // App Indexing limits to 20 fields. One of these is the @type, so there are
+ // 19 left.
+ for (int i = 0; i < 19; ++i) {
+ Property p;
+ p.name = String::number(i);
+ p.type = JSONValue::TypeString;
+ p.strVal.push_back("a");
+ restaurant.properties.push_back(p);
+ }
+
+ expected.entities.push_back(restaurant);
+ EXPECT_EQ(expected, extracted);
+}
+
+TEST_F(CopylessPasteExtractorTest, numbers) {
+ setHtmlInnerHTML(
+ "<body>"
+ "<script type=\"application/ld+json\">"
+ "\n"
+ "\n"
+ "{\"@type\": \"Restaurant\","
+ "\"int\": 1,"
+ "\"double\": 1.5"
+ "}\n"
+ "\n"
+ "</script>"
+ "</body>");
+ setURL("http://www.test.com/");
+ setTitle("My neat website about cool stuff");
+
+ WebPage extracted;
+ ASSERT_TRUE(extract(&extracted));
+ WebPage expected;
+ expected.url = "http://www.test.com/";
+ expected.title = "My neat website about cool stuff";
+ Entity restaurant;
+
+ Property type;
+ type.name = "@type";
+ type.type = JSONValue::TypeString;
+ type.strVal.push_back("Restaurant");
+
+ Property intProperty;
+ intProperty.name = "int";
+ intProperty.type = JSONValue::TypeInteger;
+ intProperty.intVal.push_back(1);
+
+ Property longProperty;
+ longProperty.name = "double";
+ longProperty.type = JSONValue::TypeDouble;
+ longProperty.doubleVal.push_back(1.5);
+
+ restaurant.properties.push_back(type);
+ restaurant.properties.push_back(intProperty);
+ restaurant.properties.push_back(longProperty);
- String extracted = extract();
- String expected = "[" + m_content + "," + m_content + "," + m_content + "]";
+ expected.entities.push_back(restaurant);
EXPECT_EQ(expected, extracted);
}
« no previous file with comments | « third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698