| Index: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| diff --git a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| index e65edf0ee4b28723b4ce837e5a924173b82b2694..319b496a1a12033bffcb094a78429b2995a06d97 100644
|
| --- a/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| +++ b/third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp
|
| @@ -2,12 +2,17 @@
|
| // Use of this source code is governed by a BSD-style license that can be
|
| // found in the LICENSE file.
|
|
|
| -#include "modules/document_metadata/CopylessPasteExtractor.h"
|
|
|
| #include <memory>
|
| +#include <string>
|
| +#include <utility>
|
| #include "core/dom/Document.h"
|
| #include "core/dom/Element.h"
|
| #include "core/testing/DummyPageHolder.h"
|
| +#include "modules/document_metadata/CopylessPasteExtractor.h"
|
| +#include "platform/json/JSONValues.h"
|
| +#include "platform/testing/URLTestHelpers.h"
|
| +#include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
| #include "wtf/text/StringBuilder.h"
|
|
|
| @@ -15,16 +20,18 @@ namespace blink {
|
|
|
| namespace {
|
|
|
| +using mojom::blink::Entity;
|
| +using mojom::blink::EntityPtr;
|
| +using mojom::blink::Property;
|
| +using mojom::blink::PropertyPtr;
|
| +using mojom::blink::Values;
|
| +using mojom::blink::ValuesPtr;
|
| +using mojom::blink::WebPage;
|
| +using mojom::blink::WebPagePtr;
|
| +
|
| class CopylessPasteExtractorTest : public ::testing::Test {
|
| public:
|
| - CopylessPasteExtractorTest()
|
| - : m_content(
|
| - "\n"
|
| - "\n"
|
| - "{\"@type\": \"NewsArticle\","
|
| - "\"headline\": \"Special characters for ya >_<;\"\n"
|
| - "}\n"
|
| - "\n") {}
|
| + CopylessPasteExtractorTest() {}
|
|
|
| protected:
|
| void SetUp() override;
|
| @@ -33,11 +40,25 @@ class CopylessPasteExtractorTest : public ::testing::Test {
|
|
|
| Document& document() const { return m_dummyPageHolder->document(); }
|
|
|
| - String extract() { return CopylessPasteExtractor::extract(document()); }
|
| + bool extract(WebPagePtr& page) {
|
| + return CopylessPasteExtractor::extract(document(), *page);
|
| + }
|
|
|
| void setHtmlInnerHTML(const String&);
|
|
|
| - String m_content;
|
| + void setURL(const std::string);
|
| +
|
| + void setTitle(const String&);
|
| +
|
| + PropertyPtr createStringProperty(const String&, const String&);
|
| +
|
| + PropertyPtr createBooleanProperty(const String&, const bool&);
|
| +
|
| + PropertyPtr createLongProperty(const String&, const int64_t&);
|
| +
|
| + PropertyPtr createEntityProperty(const String&, EntityPtr);
|
| +
|
| + WebPagePtr createWebPage(const String&, const String&);
|
|
|
| private:
|
| std::unique_ptr<DummyPageHolder> m_dummyPageHolder;
|
| @@ -51,59 +72,771 @@ void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) {
|
| document().documentElement()->setInnerHTML((htmlContent));
|
| }
|
|
|
| +void CopylessPasteExtractorTest::setURL(const std::string url) {
|
| + document().setURL(URLTestHelpers::toKURL(url));
|
| +}
|
| +
|
| +void CopylessPasteExtractorTest::setTitle(const String& title) {
|
| + document().setTitle(title);
|
| +}
|
| +
|
| +PropertyPtr CopylessPasteExtractorTest::createStringProperty(
|
| + const String& name,
|
| + const String& value) {
|
| + PropertyPtr p = Property::New();
|
| + p->name = name;
|
| + p->values = Values::New();
|
| + p->values->set_string_values(Vector<String>(1, value));
|
| + return p;
|
| +}
|
| +
|
| +PropertyPtr CopylessPasteExtractorTest::createBooleanProperty(
|
| + const String& name,
|
| + const bool& value) {
|
| + PropertyPtr p = Property::New();
|
| + p->name = name;
|
| + p->values = Values::New();
|
| + p->values->set_bool_values(Vector<bool>(1, value));
|
| + return p;
|
| +}
|
| +
|
| +PropertyPtr CopylessPasteExtractorTest::createLongProperty(
|
| + const String& name,
|
| + const int64_t& value) {
|
| + PropertyPtr p = Property::New();
|
| + p->name = name;
|
| + p->values = Values::New();
|
| + p->values->set_long_values(Vector<int64_t>(1, value));
|
| + return p;
|
| +}
|
| +
|
| +PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name,
|
| + EntityPtr value) {
|
| + PropertyPtr p = Property::New();
|
| + p->name = name;
|
| + p->values = Values::New();
|
| + p->values->set_entity_values(Vector<EntityPtr>());
|
| + p->values->get_entity_values().push_back(std::move(value));
|
| + return p;
|
| +}
|
| +
|
| +WebPagePtr CopylessPasteExtractorTest::createWebPage(const String& url,
|
| + const String& title) {
|
| + WebPagePtr wp = WebPage::New();
|
| + wp->url = url;
|
| + wp->title = title;
|
| + return wp;
|
| +}
|
| +
|
| TEST_F(CopylessPasteExtractorTest, empty) {
|
| - String extracted = extract();
|
| - String expected = "[]";
|
| - EXPECT_EQ(expected, extracted);
|
| + WebPagePtr page = WebPage::New();
|
| + ASSERT_FALSE(extract(page));
|
| + EXPECT_EQ(WebPage::New(), page);
|
| }
|
|
|
| TEST_F(CopylessPasteExtractorTest, basic) {
|
| setHtmlInnerHTML(
|
| "<body>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
|
|
| - String extracted = extract();
|
| - String expected = "[" + m_content + "]";
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Special characters for ya >_<;"));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| EXPECT_EQ(expected, extracted);
|
| }
|
|
|
| TEST_F(CopylessPasteExtractorTest, header) {
|
| setHtmlInnerHTML(
|
| "<head>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</head>");
|
|
|
| - String extracted = extract();
|
| - String expected = "[" + m_content + "]";
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Special characters for ya >_<;"));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, booleanValue) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"open\": true"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(createBooleanProperty("open", true));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, longValue) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"long\": 1"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(createLongProperty("long", 1ll));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, doubleValue) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"double\": 1.5"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(createStringProperty("double", "1.5"));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| EXPECT_EQ(expected, extracted);
|
| }
|
|
|
| TEST_F(CopylessPasteExtractorTest, multiple) {
|
| setHtmlInnerHTML(
|
| "<head>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</head>"
|
| "<body>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| - "<script type=\"application/ld+json\">" +
|
| - m_content +
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| "</script>"
|
| "</body>");
|
|
|
| - String extracted = extract();
|
| - String expected = "[" + m_content + "," + m_content + "," + m_content + "]";
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + for (int i = 0; i < 3; ++i) {
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Special characters for ya >_<;"));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + }
|
| EXPECT_EQ(expected, extracted);
|
| }
|
|
|
| -} // namespace
|
| +TEST_F(CopylessPasteExtractorTest, nested) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Ye ol greasy diner\","
|
| + "\"address\": {"
|
| + "\n"
|
| + " \"streetAddress\": \"123 Big Oak Road\","
|
| + " \"addressLocality\": \"San Francisco\""
|
| + " }\n"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Ye ol greasy diner"));
|
| +
|
| + EntityPtr address = Entity::New();
|
| + address->type = "Thing";
|
| + address->properties.push_back(
|
| + createStringProperty("streetAddress", "123 Big Oak Road"));
|
| + address->properties.push_back(
|
| + createStringProperty("addressLocality", "San Francisco"));
|
| +
|
| + restaurant->properties.push_back(
|
| + createEntityProperty("address", std::move(address)));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, repeated) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": [ \"First name\", \"Second name\" ]"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| +
|
| + PropertyPtr name = Property::New();
|
| + name->name = "name";
|
| + name->values = Values::New();
|
| + Vector<String> nameValues;
|
| + nameValues.push_back("First name");
|
| + nameValues.push_back("Second name");
|
| + name->values->set_string_values(nameValues);
|
| +
|
| + restaurant->properties.push_back(std::move(name));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| +
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, repeatedObject) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Ye ol greasy diner\","
|
| + "\"address\": ["
|
| + "\n"
|
| + " {"
|
| + " \"streetAddress\": \"123 Big Oak Road\","
|
| + " \"addressLocality\": \"San Francisco\""
|
| + " },\n"
|
| + " {"
|
| + " \"streetAddress\": \"123 Big Oak Road\","
|
| + " \"addressLocality\": \"San Francisco\""
|
| + " }\n"
|
| + "]\n"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Ye ol greasy diner"));
|
| +
|
| + PropertyPtr addressProperty = Property::New();
|
| + addressProperty->name = "address";
|
| + addressProperty->values = Values::New();
|
| + addressProperty->values->set_entity_values(Vector<EntityPtr>());
|
| + for (int i = 0; i < 2; ++i) {
|
| + EntityPtr address = Entity::New();
|
| + address->type = "Thing";
|
| + address->properties.push_back(
|
| + createStringProperty("streetAddress", "123 Big Oak Road"));
|
| + address->properties.push_back(
|
| + createStringProperty("addressLocality", "San Francisco"));
|
| + addressProperty->values->get_entity_values().push_back(std::move(address));
|
| + }
|
| + restaurant->properties.push_back(std::move(addressProperty));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, truncateLongString) {
|
| + String maxLengthString;
|
| + for (int i = 0; i < 200; ++i) {
|
| + maxLengthString.append("a");
|
| + }
|
| + String tooLongString(maxLengthString);
|
| + tooLongString.append("a");
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"" +
|
| + tooLongString +
|
| + "\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", maxLengthString));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, enforceTypeExists) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_FALSE(extract(extracted));
|
| + WebPagePtr expected = WebPage::New();
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"UnsupportedType\","
|
| + "\"name\": \"Special characters for ya >_<;\""
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_FALSE(extract(extracted));
|
| + WebPagePtr expected = WebPage::New();
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) {
|
| + String largeRepeatedField = "[";
|
| + for (int i = 0; i < 101; ++i) {
|
| + largeRepeatedField.append("\"a\"");
|
| + if (i != 100) {
|
| + largeRepeatedField.append(", ");
|
| + }
|
| + }
|
| + largeRepeatedField.append("]");
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": " +
|
| + largeRepeatedField +
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| +
|
| + PropertyPtr name = Property::New();
|
| + name->name = "name";
|
| + name->values = Values::New();
|
| + Vector<String> nameValues;
|
| + for (int i = 0; i < 100; ++i) {
|
| + nameValues.push_back("a");
|
| + }
|
| + name->values->set_string_values(nameValues);
|
| +
|
| + restaurant->properties.push_back(std::move(name));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| +
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) {
|
| + String tooManyFields;
|
| + for (int i = 0; i < 20; ++i) {
|
| + tooManyFields.append(String::format("\"%d\": \"a\"", i));
|
| + if (i != 19) {
|
| + tooManyFields.append(",\n");
|
| + }
|
| + }
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\"," +
|
| + tooManyFields +
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| +
|
| + for (int i = 0; i < 19; ++i) {
|
| + restaurant->properties.push_back(
|
| + createStringProperty(String::number(i), "a"));
|
| + }
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, ignorePropertyWithEmptyArray) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": []"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| +
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, ignorePropertyWithMixedTypes) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": [ \"Name\", 1 ]"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| +
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, ignorePropertyWithNestedArray) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": [ [ \"Name\" ] ]"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| +
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, enforceMaxNestingDepth) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Ye ol greasy diner\","
|
| + "\"1\": {"
|
| + " \"2\": {"
|
| + " \"3\": {"
|
| + " \"4\": {"
|
| + " \"5\": 6"
|
| + " }\n"
|
| + " }\n"
|
| + " }\n"
|
| + "}\n"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
|
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Ye ol greasy diner"));
|
| +
|
| + EntityPtr entity1 = Entity::New();
|
| + entity1->type = "Thing";
|
| +
|
| + EntityPtr entity2 = Entity::New();
|
| + entity2->type = "Thing";
|
| +
|
| + EntityPtr entity3 = Entity::New();
|
| + entity3->type = "Thing";
|
| +
|
| + entity2->properties.push_back(createEntityProperty("3", std::move(entity3)));
|
| +
|
| + entity1->properties.push_back(createEntityProperty("2", std::move(entity2)));
|
| +
|
| + restaurant->properties.push_back(
|
| + createEntityProperty("1", std::move(entity1)));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +TEST_F(CopylessPasteExtractorTest, maxNestingDepthWithTerminalProperty) {
|
| + setHtmlInnerHTML(
|
| + "<body>"
|
| + "<script type=\"application/ld+json\">"
|
| + "\n"
|
| + "\n"
|
| + "{\"@type\": \"Restaurant\","
|
| + "\"name\": \"Ye ol greasy diner\","
|
| + "\"1\": {"
|
| + " \"2\": {"
|
| + " \"3\": {"
|
| + " \"4\": 5"
|
| + " }\n"
|
| + " }\n"
|
| + "}\n"
|
| + "}\n"
|
| + "\n"
|
| + "</script>"
|
| + "</body>");
|
| + setURL("http://www.test.com/");
|
| + setTitle("My neat website about cool stuff");
|
| +
|
| + WebPagePtr extracted = WebPage::New();
|
| + ASSERT_TRUE(extract(extracted));
|
| +
|
| + WebPagePtr expected =
|
| + createWebPage("http://www.test.com/", "My neat website about cool stuff");
|
| +
|
| + EntityPtr restaurant = Entity::New();
|
| + restaurant->type = "Restaurant";
|
| + restaurant->properties.push_back(
|
| + createStringProperty("name", "Ye ol greasy diner"));
|
| +
|
| + EntityPtr entity1 = Entity::New();
|
| + entity1->type = "Thing";
|
| +
|
| + EntityPtr entity2 = Entity::New();
|
| + entity2->type = "Thing";
|
| +
|
| + EntityPtr entity3 = Entity::New();
|
| + entity3->type = "Thing";
|
| +
|
| + entity3->properties.push_back(createLongProperty("4", 5));
|
| +
|
| + entity2->properties.push_back(createEntityProperty("3", std::move(entity3)));
|
| +
|
| + entity1->properties.push_back(createEntityProperty("2", std::move(entity2)));
|
| +
|
| + restaurant->properties.push_back(
|
| + createEntityProperty("1", std::move(entity1)));
|
| +
|
| + expected->entities.push_back(std::move(restaurant));
|
| + EXPECT_EQ(expected, extracted);
|
| +}
|
| +
|
| +} // namespace
|
| } // namespace blink
|
|
|