| Index: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| diff --git a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| index e65edf0ee4b28723b4ce837e5a924173b82b2694..972e25761b1741373aedafb115b5928ccdd9f5bd 100644
|
| --- a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| +++ b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| @@ -2,12 +2,16 @@
|
| // Use of this source code is governed by a BSD-style license that can be
|
| // found in the LICENSE file.
|
|
|
| +#include "modules/document_metadata/CopylessPasteExtractor.cpp"
|
| #include "modules/document_metadata/CopylessPasteExtractor.h"
|
|
|
| #include <memory>
|
| +#include <string>
|
| #include "core/dom/Document.h"
|
| #include "core/dom/Element.h"
|
| #include "core/testing/DummyPageHolder.h"
|
| +#include "platform/json/JSONValues.h"
|
| +#include "platform/testing/URLTestHelpers.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
| #include "wtf/text/StringBuilder.h"
|
|
|
| @@ -17,14 +21,7 @@ namespace {
|
|
|
| class CopylessPasteExtractorTest : public ::testing::Test {
|
| public:
|
| - CopylessPasteExtractorTest()
|
| - : m_content(
|
| - "\n"
|
| - "\n"
|
| - "{\"@type\": \"NewsArticle\","
|
| - "\"headline\": \"Special characters for ya >_<;\"\n"
|
| - "}\n"
|
| - "\n") {}
|
| + CopylessPasteExtractorTest() {}
|
|
|
| protected:
|
| void SetUp() override;
|
| @@ -33,10 +30,16 @@ class CopylessPasteExtractorTest : public ::testing::Test {
|
|
|
| Document& document() const { return m_dummyPageHolder->document(); }
|
|
|
| - String extract() { return CopylessPasteExtractor::extract(document()); }
|
| + bool extract(WebPage* page) {
|
| + return CopylessPasteExtractor::extract(document(), page);
|
| + }
|
|
|
| void setHtmlInnerHTML(const String&);
|
|
|
| + void setURL(const std::string);
|
| +
|
| + void setTitle(const String&);
|
| +
|
| String m_content;
|
|
|
| private:
|
| @@ -51,56 +54,566 @@ void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) {
|
| document().documentElement()->setInnerHTML((htmlContent));
|
| }
|
|
|
| +void CopylessPasteExtractorTest::setURL(const std::string url) {
|
| + document().setURL(URLTestHelpers::toKURL(url));
|
| +}
|
| +
|
| +void CopylessPasteExtractorTest::setTitle(const String& title) {
|
| + document().setTitle(title);
|
| +}
|
| +
|
| TEST_F(CopylessPasteExtractorTest, empty) {
|
| - String extracted = extract();
|
| - String expected = "[]";
|
| - EXPECT_EQ(expected, extracted);
|
| + WebPage page;
|
| + ASSERT_FALSE(extract(&page));
|
| + EXPECT_EQ(WebPage(), page);
|
| }
|
|
|
| TEST_F(CopylessPasteExtractorTest, basic) {
|
| setHtmlInnerHTML(
|
| "<body>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
|
|
| - String extracted = extract();
|
| - String expected = "[" + m_content + "]";
|
| + Property nameProperty;
|
| + nameProperty.name = "name";
|
| + nameProperty.type = JSONValue::TypeString;
|
| + nameProperty.strVal.push_back("Special characters for ya >_<;");
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(nameProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| EXPECT_EQ(expected, extracted);
|
| }
|
|
|
| TEST_F(CopylessPasteExtractorTest, header) {
|
| setHtmlInnerHTML(
|
| "<head>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</head>");
|
|
|
| - String extracted = extract();
|
| - String expected = "[" + m_content + "]";
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property nameProperty;
|
| + nameProperty.name = "name";
|
| + nameProperty.type = JSONValue::TypeString;
|
| + nameProperty.strVal.push_back("Special characters for ya >_<;");
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(nameProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| EXPECT_EQ(expected, extracted);
|
| }
|
|
|
| TEST_F(CopylessPasteExtractorTest, multiple) {
|
| setHtmlInnerHTML(
|
| "<head>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</head>"
|
| "<body>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| +
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property nameProperty;
|
| + nameProperty.name = "name";
|
| + nameProperty.type = JSONValue::TypeString;
|
| + nameProperty.strVal.push_back("Special characters for ya >_<;");
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(nameProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + expected.entities.push_back(restaurant);
|
| + expected.entities.push_back(restaurant);
|
| +
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, nested) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Ye ol greasy diner\","
|
| + "\"address\": {"
|
| + "\n"
|
| + " \"streetAddress\": \"123 Big Oak Road\","
|
| + " \"addressLocality\": \"San Francisco\""
|
| + " }\n"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property name;
|
| + name.name = "name";
|
| + name.type = JSONValue::TypeString;
|
| + name.strVal.push_back("Ye ol greasy diner");
|
| +
|
| + Property streetAddress;
|
| + streetAddress.name = "streetAddress";
|
| + streetAddress.type = JSONValue::TypeString;
|
| + streetAddress.strVal.push_back("123 Big Oak Road");
|
| +
|
| + Property addressLocality;
|
| + addressLocality.name = "addressLocality";
|
| + addressLocality.type = JSONValue::TypeString;
|
| + addressLocality.strVal.push_back("San Francisco");
|
| +
|
| + Entity address;
|
| + address.properties.push_back(streetAddress);
|
| + address.properties.push_back(addressLocality);
|
| +
|
| + Property addressProperty;
|
| + addressProperty.name = "address";
|
| + addressProperty.type = JSONValue::TypeObject;
|
| + addressProperty.entityVal.push_back(address);
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(name);
|
| + restaurant.properties.push_back(addressProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, repeated) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": [ \"First name\", \"Second name\"]"
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property nameProperty;
|
| + nameProperty.name = "name";
|
| + nameProperty.type = JSONValue::TypeString;
|
| + nameProperty.strVal.push_back("First name");
|
| + nameProperty.strVal.push_back("Second name");
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(nameProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, repeatedObject) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Ye ol greasy diner\","
|
| + "\"address\": ["
|
| + "\n"
|
| + " {"
|
| + " \"streetAddress\": \"123 Big Oak Road\","
|
| + " \"addressLocality\": \"San Francisco\""
|
| + " },\n"
|
| + " {"
|
| + " \"streetAddress\": \"123 Big Oak Road\","
|
| + " \"addressLocality\": \"San Francisco\""
|
| + " }\n"
|
| + "]\n"
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property name;
|
| + name.name = "name";
|
| + name.type = JSONValue::TypeString;
|
| + name.strVal.push_back("Ye ol greasy diner");
|
| +
|
| + Property streetAddress;
|
| + streetAddress.name = "streetAddress";
|
| + streetAddress.type = JSONValue::TypeString;
|
| + streetAddress.strVal.push_back("123 Big Oak Road");
|
| +
|
| + Property addressLocality;
|
| + addressLocality.name = "addressLocality";
|
| + addressLocality.type = JSONValue::TypeString;
|
| + addressLocality.strVal.push_back("San Francisco");
|
| +
|
| + Entity address;
|
| + address.properties.push_back(streetAddress);
|
| + address.properties.push_back(addressLocality);
|
| +
|
| + Property addressProperty;
|
| + addressProperty.name = "address";
|
| + addressProperty.type = JSONValue::TypeObject;
|
| + addressProperty.entityVal.push_back(address);
|
| + addressProperty.entityVal.push_back(address);
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(name);
|
| + restaurant.properties.push_back(addressProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, truncateLongString) {
|
| + String maxLengthString;
|
| + for (int i = 0; i < 200; ++i) {
|
| + maxLengthString.append("a");
|
| + }
|
| + String tooLongString(maxLengthString);
|
| + tooLongString.append("a");
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"" +
|
| + tooLongString +
|
| + "\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property nameProperty;
|
| + nameProperty.name = "name";
|
| + nameProperty.type = JSONValue::TypeString;
|
| + nameProperty.strVal.push_back(maxLengthString);
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(nameProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, enforceTypeExists) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_FALSE(extract(&extracted));
|
| + WebPage expected;
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"UnsupportedType\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_FALSE(extract(&extracted));
|
| + WebPage expected;
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) {
|
| + String largeRepeatedField = "[";
|
| + for (int i = 0; i < 101; ++i) {
|
| + largeRepeatedField.append("\"a\"");
|
| + if (i != 100) {
|
| + largeRepeatedField.append(", ");
|
| + }
|
| + }
|
| + largeRepeatedField.append("]");
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": " +
|
| + largeRepeatedField +
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property nameProperty;
|
| + nameProperty.name = "name";
|
| + nameProperty.type = JSONValue::TypeString;
|
| + for (int i = 0; i < 100; ++i) {
|
| + nameProperty.strVal.push_back("a");
|
| + }
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(nameProperty);
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) {
|
| + String tooManyFields;
|
| + for (int i = 0; i < 20; ++i) {
|
| + tooManyFields.append(String::format("\"%d\": \"a\"", i));
|
| + if (i != 19) {
|
| + tooManyFields.append(",\n");
|
| + }
|
| + }
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\"," +
|
| + tooManyFields +
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + restaurant.properties.push_back(type);
|
| +
|
| + // App Indexing limits to 20 fields. One of these is the @type, so there are
|
| + // 19 left.
|
| + for (int i = 0; i < 19; ++i) {
|
| + Property p;
|
| + p.name = String::number(i);
|
| + p.type = JSONValue::TypeString;
|
| + p.strVal.push_back("a");
|
| + restaurant.properties.push_back(p);
|
| + }
|
| +
|
| + expected.entities.push_back(restaurant);
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, numbers) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"int\": 1,"
|
| + "\"double\": 1.5"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPage extracted;
|
| + ASSERT_TRUE(extract(&extracted));
|
| + WebPage expected;
|
| + expected.url = "http://www.test.com/";
|
| + expected.title = "My neat website about cool stuff";
|
| + Entity restaurant;
|
| +
|
| + Property type;
|
| + type.name = "@type";
|
| + type.type = JSONValue::TypeString;
|
| + type.strVal.push_back("Restaurant");
|
| +
|
| + Property intProperty;
|
| + intProperty.name = "int";
|
| + intProperty.type = JSONValue::TypeInteger;
|
| + intProperty.intVal.push_back(1);
|
| +
|
| + Property longProperty;
|
| + longProperty.name = "double";
|
| + longProperty.type = JSONValue::TypeDouble;
|
| + longProperty.doubleVal.push_back(1.5);
|
| +
|
| + restaurant.properties.push_back(type);
|
| + restaurant.properties.push_back(intProperty);
|
| + restaurant.properties.push_back(longProperty);
|
|
|
| - String extracted = extract();
|
| - String expected = "[" + m_content + "," + m_content + "," + m_content + "]";
|
| + expected.entities.push_back(restaurant);
|
| EXPECT_EQ(expected, extracted);
|
| }
|
|
|
|
|