OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
6 | 5 |
7 #include <memory> | 6 #include <memory> |
| 7 #include <string> |
| 8 #include <utility> |
8 #include "core/dom/Document.h" | 9 #include "core/dom/Document.h" |
9 #include "core/dom/Element.h" | 10 #include "core/dom/Element.h" |
10 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
| 12 #include "modules/document_metadata/CopylessPasteExtractor.h" |
| 13 #include "platform/json/JSONValues.h" |
| 14 #include "platform/testing/URLTestHelpers.h" |
| 15 #include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h
" |
11 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
12 #include "wtf/text/StringBuilder.h" | 17 #include "wtf/text/StringBuilder.h" |
13 | 18 |
14 namespace blink { | 19 namespace blink { |
15 | 20 |
16 namespace { | 21 namespace { |
17 | 22 |
| 23 using mojom::blink::Entity; |
| 24 using mojom::blink::EntityPtr; |
| 25 using mojom::blink::Property; |
| 26 using mojom::blink::PropertyPtr; |
| 27 using mojom::blink::Values; |
| 28 using mojom::blink::ValuesPtr; |
| 29 using mojom::blink::WebPage; |
| 30 using mojom::blink::WebPagePtr; |
| 31 |
18 class CopylessPasteExtractorTest : public ::testing::Test { | 32 class CopylessPasteExtractorTest : public ::testing::Test { |
19 public: | 33 public: |
20 CopylessPasteExtractorTest() | 34 CopylessPasteExtractorTest() {} |
21 : m_content( | |
22 "\n" | |
23 "\n" | |
24 "{\"@type\": \"NewsArticle\"," | |
25 "\"headline\": \"Special characters for ya >_<;\"\n" | |
26 "}\n" | |
27 "\n") {} | |
28 | 35 |
29 protected: | 36 protected: |
30 void SetUp() override; | 37 void SetUp() override; |
31 | 38 |
32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 39 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
33 | 40 |
34 Document& document() const { return m_dummyPageHolder->document(); } | 41 Document& document() const { return m_dummyPageHolder->document(); } |
35 | 42 |
36 String extract() { return CopylessPasteExtractor::extract(document()); } | 43 bool extract(WebPagePtr& page) { |
| 44 page = CopylessPasteExtractor::extract(document()); |
| 45 return !page.is_null(); |
| 46 } |
37 | 47 |
38 void setHtmlInnerHTML(const String&); | 48 void setHtmlInnerHTML(const String&); |
39 | 49 |
40 String m_content; | 50 void setURL(const std::string); |
| 51 |
| 52 void setTitle(const String&); |
| 53 |
| 54 PropertyPtr createStringProperty(const String&, const String&); |
| 55 |
| 56 PropertyPtr createBooleanProperty(const String&, const bool&); |
| 57 |
| 58 PropertyPtr createLongProperty(const String&, const int64_t&); |
| 59 |
| 60 PropertyPtr createEntityProperty(const String&, EntityPtr); |
| 61 |
| 62 WebPagePtr createWebPage(const std::string&, const String&); |
41 | 63 |
42 private: | 64 private: |
43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; | 65 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; |
44 }; | 66 }; |
45 | 67 |
46 void CopylessPasteExtractorTest::SetUp() { | 68 void CopylessPasteExtractorTest::SetUp() { |
47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | 69 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
48 } | 70 } |
49 | 71 |
50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { | 72 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { |
51 document().documentElement()->setInnerHTML((htmlContent)); | 73 document().documentElement()->setInnerHTML((htmlContent)); |
52 } | 74 } |
53 | 75 |
| 76 void CopylessPasteExtractorTest::setURL(const std::string url) { |
| 77 document().setURL(URLTestHelpers::toKURL(url)); |
| 78 } |
| 79 |
| 80 void CopylessPasteExtractorTest::setTitle(const String& title) { |
| 81 document().setTitle(title); |
| 82 } |
| 83 |
| 84 PropertyPtr CopylessPasteExtractorTest::createStringProperty( |
| 85 const String& name, |
| 86 const String& value) { |
| 87 PropertyPtr p = Property::New(); |
| 88 p->name = name; |
| 89 p->values = Values::New(); |
| 90 p->values->set_string_values(Vector<String>(1, value)); |
| 91 return p; |
| 92 } |
| 93 |
| 94 PropertyPtr CopylessPasteExtractorTest::createBooleanProperty( |
| 95 const String& name, |
| 96 const bool& value) { |
| 97 PropertyPtr p = Property::New(); |
| 98 p->name = name; |
| 99 p->values = Values::New(); |
| 100 p->values->set_bool_values(Vector<bool>(1, value)); |
| 101 return p; |
| 102 } |
| 103 |
| 104 PropertyPtr CopylessPasteExtractorTest::createLongProperty( |
| 105 const String& name, |
| 106 const int64_t& value) { |
| 107 PropertyPtr p = Property::New(); |
| 108 p->name = name; |
| 109 p->values = Values::New(); |
| 110 p->values->set_long_values(Vector<int64_t>(1, value)); |
| 111 return p; |
| 112 } |
| 113 |
| 114 PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name, |
| 115 EntityPtr value) { |
| 116 PropertyPtr p = Property::New(); |
| 117 p->name = name; |
| 118 p->values = Values::New(); |
| 119 p->values->set_entity_values(Vector<EntityPtr>()); |
| 120 p->values->get_entity_values().push_back(std::move(value)); |
| 121 return p; |
| 122 } |
| 123 |
| 124 WebPagePtr CopylessPasteExtractorTest::createWebPage(const std::string& url, |
| 125 const String& title) { |
| 126 WebPagePtr wp = WebPage::New(); |
| 127 wp->url = URLTestHelpers::toKURL(url); |
| 128 wp->title = title; |
| 129 return wp; |
| 130 } |
| 131 |
54 TEST_F(CopylessPasteExtractorTest, empty) { | 132 TEST_F(CopylessPasteExtractorTest, empty) { |
55 String extracted = extract(); | 133 WebPagePtr page = WebPage::New(); |
56 String expected = "[]"; | 134 ASSERT_FALSE(extract(page)); |
57 EXPECT_EQ(expected, extracted); | 135 ASSERT_TRUE(page.is_null()); |
58 } | 136 } |
59 | 137 |
60 TEST_F(CopylessPasteExtractorTest, basic) { | 138 TEST_F(CopylessPasteExtractorTest, basic) { |
61 setHtmlInnerHTML( | 139 setHtmlInnerHTML( |
62 "<body>" | 140 "<body>" |
63 "<script type=\"application/ld+json\">" + | 141 "<script type=\"application/ld+json\">" |
64 m_content + | 142 "\n" |
65 "</script>" | 143 "\n" |
66 "</body>"); | 144 "{\"@type\": \"Restaurant\"," |
67 | 145 "\"name\": \"Special characters for ya >_<;\"" |
68 String extracted = extract(); | 146 "}\n" |
69 String expected = "[" + m_content + "]"; | 147 "\n" |
| 148 "</script>" |
| 149 "</body>"); |
| 150 setURL("http://www.test.com/"); |
| 151 setTitle("My neat website about cool stuff"); |
| 152 |
| 153 WebPagePtr extracted = WebPage::New(); |
| 154 ASSERT_TRUE(extract(extracted)); |
| 155 |
| 156 WebPagePtr expected = |
| 157 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 158 |
| 159 EntityPtr restaurant = Entity::New(); |
| 160 restaurant->type = "Restaurant"; |
| 161 restaurant->properties.push_back( |
| 162 createStringProperty("name", "Special characters for ya >_<;")); |
| 163 |
| 164 expected->entities.push_back(std::move(restaurant)); |
70 EXPECT_EQ(expected, extracted); | 165 EXPECT_EQ(expected, extracted); |
71 } | 166 } |
72 | 167 |
73 TEST_F(CopylessPasteExtractorTest, header) { | 168 TEST_F(CopylessPasteExtractorTest, header) { |
74 setHtmlInnerHTML( | 169 setHtmlInnerHTML( |
75 "<head>" | 170 "<head>" |
76 "<script type=\"application/ld+json\">" + | 171 "<script type=\"application/ld+json\">" |
77 m_content + | 172 "\n" |
| 173 "\n" |
| 174 "{\"@type\": \"Restaurant\"," |
| 175 "\"name\": \"Special characters for ya >_<;\"" |
| 176 "}\n" |
| 177 "\n" |
78 "</script>" | 178 "</script>" |
79 "</head>"); | 179 "</head>"); |
80 | 180 |
81 String extracted = extract(); | 181 setURL("http://www.test.com/"); |
82 String expected = "[" + m_content + "]"; | 182 setTitle("My neat website about cool stuff"); |
| 183 |
| 184 WebPagePtr extracted = WebPage::New(); |
| 185 ASSERT_TRUE(extract(extracted)); |
| 186 |
| 187 WebPagePtr expected = |
| 188 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 189 |
| 190 EntityPtr restaurant = Entity::New(); |
| 191 restaurant->type = "Restaurant"; |
| 192 restaurant->properties.push_back( |
| 193 createStringProperty("name", "Special characters for ya >_<;")); |
| 194 |
| 195 expected->entities.push_back(std::move(restaurant)); |
| 196 EXPECT_EQ(expected, extracted); |
| 197 } |
| 198 |
| 199 TEST_F(CopylessPasteExtractorTest, booleanValue) { |
| 200 setHtmlInnerHTML( |
| 201 "<body>" |
| 202 "<script type=\"application/ld+json\">" |
| 203 "\n" |
| 204 "\n" |
| 205 "{\"@type\": \"Restaurant\"," |
| 206 "\"open\": true" |
| 207 "}\n" |
| 208 "\n" |
| 209 "</script>" |
| 210 "</body>"); |
| 211 setURL("http://www.test.com/"); |
| 212 setTitle("My neat website about cool stuff"); |
| 213 |
| 214 WebPagePtr extracted = WebPage::New(); |
| 215 ASSERT_TRUE(extract(extracted)); |
| 216 |
| 217 WebPagePtr expected = |
| 218 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 219 |
| 220 EntityPtr restaurant = Entity::New(); |
| 221 restaurant->type = "Restaurant"; |
| 222 restaurant->properties.push_back(createBooleanProperty("open", true)); |
| 223 |
| 224 expected->entities.push_back(std::move(restaurant)); |
| 225 EXPECT_EQ(expected, extracted); |
| 226 } |
| 227 |
| 228 TEST_F(CopylessPasteExtractorTest, longValue) { |
| 229 setHtmlInnerHTML( |
| 230 "<body>" |
| 231 "<script type=\"application/ld+json\">" |
| 232 "\n" |
| 233 "\n" |
| 234 "{\"@type\": \"Restaurant\"," |
| 235 "\"long\": 1" |
| 236 "}\n" |
| 237 "\n" |
| 238 "</script>" |
| 239 "</body>"); |
| 240 setURL("http://www.test.com/"); |
| 241 setTitle("My neat website about cool stuff"); |
| 242 |
| 243 WebPagePtr extracted = WebPage::New(); |
| 244 ASSERT_TRUE(extract(extracted)); |
| 245 |
| 246 WebPagePtr expected = |
| 247 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 248 |
| 249 EntityPtr restaurant = Entity::New(); |
| 250 restaurant->type = "Restaurant"; |
| 251 restaurant->properties.push_back(createLongProperty("long", 1ll)); |
| 252 |
| 253 expected->entities.push_back(std::move(restaurant)); |
| 254 EXPECT_EQ(expected, extracted); |
| 255 } |
| 256 |
| 257 TEST_F(CopylessPasteExtractorTest, doubleValue) { |
| 258 setHtmlInnerHTML( |
| 259 "<body>" |
| 260 "<script type=\"application/ld+json\">" |
| 261 "\n" |
| 262 "\n" |
| 263 "{\"@type\": \"Restaurant\"," |
| 264 "\"double\": 1.5" |
| 265 "}\n" |
| 266 "\n" |
| 267 "</script>" |
| 268 "</body>"); |
| 269 setURL("http://www.test.com/"); |
| 270 setTitle("My neat website about cool stuff"); |
| 271 |
| 272 WebPagePtr extracted = WebPage::New(); |
| 273 ASSERT_TRUE(extract(extracted)); |
| 274 |
| 275 WebPagePtr expected = |
| 276 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 277 |
| 278 EntityPtr restaurant = Entity::New(); |
| 279 restaurant->type = "Restaurant"; |
| 280 restaurant->properties.push_back(createStringProperty("double", "1.5")); |
| 281 |
| 282 expected->entities.push_back(std::move(restaurant)); |
83 EXPECT_EQ(expected, extracted); | 283 EXPECT_EQ(expected, extracted); |
84 } | 284 } |
85 | 285 |
86 TEST_F(CopylessPasteExtractorTest, multiple) { | 286 TEST_F(CopylessPasteExtractorTest, multiple) { |
87 setHtmlInnerHTML( | 287 setHtmlInnerHTML( |
88 "<head>" | 288 "<head>" |
89 "<script type=\"application/ld+json\">" + | 289 "<script type=\"application/ld+json\">" |
90 m_content + | 290 "\n" |
| 291 "\n" |
| 292 "{\"@type\": \"Restaurant\"," |
| 293 "\"name\": \"Special characters for ya >_<;\"" |
| 294 "}\n" |
| 295 "\n" |
91 "</script>" | 296 "</script>" |
92 "</head>" | 297 "</head>" |
93 "<body>" | 298 "<body>" |
94 "<script type=\"application/ld+json\">" + | 299 "<script type=\"application/ld+json\">" |
95 m_content + | 300 "\n" |
96 "</script>" | 301 "\n" |
97 "<script type=\"application/ld+json\">" + | 302 "{\"@type\": \"Restaurant\"," |
98 m_content + | 303 "\"name\": \"Special characters for ya >_<;\"" |
99 "</script>" | 304 "}\n" |
100 "</body>"); | 305 "\n" |
101 | 306 "</script>" |
102 String extracted = extract(); | 307 "<script type=\"application/ld+json\">" |
103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; | 308 "\n" |
| 309 "\n" |
| 310 "{\"@type\": \"Restaurant\"," |
| 311 "\"name\": \"Special characters for ya >_<;\"" |
| 312 "}\n" |
| 313 "\n" |
| 314 "</script>" |
| 315 "</body>"); |
| 316 |
| 317 setURL("http://www.test.com/"); |
| 318 setTitle("My neat website about cool stuff"); |
| 319 |
| 320 WebPagePtr extracted = WebPage::New(); |
| 321 ASSERT_TRUE(extract(extracted)); |
| 322 |
| 323 WebPagePtr expected = |
| 324 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 325 |
| 326 for (int i = 0; i < 3; ++i) { |
| 327 EntityPtr restaurant = Entity::New(); |
| 328 restaurant->type = "Restaurant"; |
| 329 restaurant->properties.push_back( |
| 330 createStringProperty("name", "Special characters for ya >_<;")); |
| 331 |
| 332 expected->entities.push_back(std::move(restaurant)); |
| 333 } |
| 334 EXPECT_EQ(expected, extracted); |
| 335 } |
| 336 |
| 337 TEST_F(CopylessPasteExtractorTest, nested) { |
| 338 setHtmlInnerHTML( |
| 339 "<body>" |
| 340 "<script type=\"application/ld+json\">" |
| 341 "\n" |
| 342 "\n" |
| 343 "{\"@type\": \"Restaurant\"," |
| 344 "\"name\": \"Ye ol greasy diner\"," |
| 345 "\"address\": {" |
| 346 "\n" |
| 347 " \"streetAddress\": \"123 Big Oak Road\"," |
| 348 " \"addressLocality\": \"San Francisco\"" |
| 349 " }\n" |
| 350 "}\n" |
| 351 "\n" |
| 352 "</script>" |
| 353 "</body>"); |
| 354 setURL("http://www.test.com/"); |
| 355 setTitle("My neat website about cool stuff"); |
| 356 |
| 357 WebPagePtr extracted = WebPage::New(); |
| 358 ASSERT_TRUE(extract(extracted)); |
| 359 |
| 360 WebPagePtr expected = |
| 361 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 362 |
| 363 EntityPtr restaurant = Entity::New(); |
| 364 restaurant->type = "Restaurant"; |
| 365 restaurant->properties.push_back( |
| 366 createStringProperty("name", "Ye ol greasy diner")); |
| 367 |
| 368 EntityPtr address = Entity::New(); |
| 369 address->type = "Thing"; |
| 370 address->properties.push_back( |
| 371 createStringProperty("streetAddress", "123 Big Oak Road")); |
| 372 address->properties.push_back( |
| 373 createStringProperty("addressLocality", "San Francisco")); |
| 374 |
| 375 restaurant->properties.push_back( |
| 376 createEntityProperty("address", std::move(address))); |
| 377 |
| 378 expected->entities.push_back(std::move(restaurant)); |
| 379 EXPECT_EQ(expected, extracted); |
| 380 } |
| 381 |
| 382 TEST_F(CopylessPasteExtractorTest, repeated) { |
| 383 setHtmlInnerHTML( |
| 384 "<body>" |
| 385 "<script type=\"application/ld+json\">" |
| 386 "\n" |
| 387 "\n" |
| 388 "{\"@type\": \"Restaurant\"," |
| 389 "\"name\": [ \"First name\", \"Second name\" ]" |
| 390 "}\n" |
| 391 "\n" |
| 392 "</script>" |
| 393 "</body>"); |
| 394 setURL("http://www.test.com/"); |
| 395 setTitle("My neat website about cool stuff"); |
| 396 |
| 397 WebPagePtr extracted = WebPage::New(); |
| 398 ASSERT_TRUE(extract(extracted)); |
| 399 |
| 400 WebPagePtr expected = |
| 401 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 402 |
| 403 EntityPtr restaurant = Entity::New(); |
| 404 restaurant->type = "Restaurant"; |
| 405 |
| 406 PropertyPtr name = Property::New(); |
| 407 name->name = "name"; |
| 408 name->values = Values::New(); |
| 409 Vector<String> nameValues; |
| 410 nameValues.push_back("First name"); |
| 411 nameValues.push_back("Second name"); |
| 412 name->values->set_string_values(nameValues); |
| 413 |
| 414 restaurant->properties.push_back(std::move(name)); |
| 415 |
| 416 expected->entities.push_back(std::move(restaurant)); |
| 417 |
| 418 EXPECT_EQ(expected, extracted); |
| 419 } |
| 420 |
| 421 TEST_F(CopylessPasteExtractorTest, repeatedObject) { |
| 422 setHtmlInnerHTML( |
| 423 "<body>" |
| 424 "<script type=\"application/ld+json\">" |
| 425 "\n" |
| 426 "\n" |
| 427 "{\"@type\": \"Restaurant\"," |
| 428 "\"name\": \"Ye ol greasy diner\"," |
| 429 "\"address\": [" |
| 430 "\n" |
| 431 " {" |
| 432 " \"streetAddress\": \"123 Big Oak Road\"," |
| 433 " \"addressLocality\": \"San Francisco\"" |
| 434 " },\n" |
| 435 " {" |
| 436 " \"streetAddress\": \"123 Big Oak Road\"," |
| 437 " \"addressLocality\": \"San Francisco\"" |
| 438 " }\n" |
| 439 "]\n" |
| 440 "}\n" |
| 441 "\n" |
| 442 "</script>" |
| 443 "</body>"); |
| 444 setURL("http://www.test.com/"); |
| 445 setTitle("My neat website about cool stuff"); |
| 446 |
| 447 WebPagePtr extracted = WebPage::New(); |
| 448 ASSERT_TRUE(extract(extracted)); |
| 449 |
| 450 WebPagePtr expected = |
| 451 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 452 |
| 453 EntityPtr restaurant = Entity::New(); |
| 454 restaurant->type = "Restaurant"; |
| 455 restaurant->properties.push_back( |
| 456 createStringProperty("name", "Ye ol greasy diner")); |
| 457 |
| 458 PropertyPtr addressProperty = Property::New(); |
| 459 addressProperty->name = "address"; |
| 460 addressProperty->values = Values::New(); |
| 461 addressProperty->values->set_entity_values(Vector<EntityPtr>()); |
| 462 for (int i = 0; i < 2; ++i) { |
| 463 EntityPtr address = Entity::New(); |
| 464 address->type = "Thing"; |
| 465 address->properties.push_back( |
| 466 createStringProperty("streetAddress", "123 Big Oak Road")); |
| 467 address->properties.push_back( |
| 468 createStringProperty("addressLocality", "San Francisco")); |
| 469 addressProperty->values->get_entity_values().push_back(std::move(address)); |
| 470 } |
| 471 restaurant->properties.push_back(std::move(addressProperty)); |
| 472 |
| 473 expected->entities.push_back(std::move(restaurant)); |
| 474 EXPECT_EQ(expected, extracted); |
| 475 } |
| 476 |
| 477 TEST_F(CopylessPasteExtractorTest, truncateLongString) { |
| 478 String maxLengthString; |
| 479 for (int i = 0; i < 200; ++i) { |
| 480 maxLengthString.append("a"); |
| 481 } |
| 482 String tooLongString(maxLengthString); |
| 483 tooLongString.append("a"); |
| 484 setHtmlInnerHTML( |
| 485 "<body>" |
| 486 "<script type=\"application/ld+json\">" |
| 487 "\n" |
| 488 "\n" |
| 489 "{\"@type\": \"Restaurant\"," |
| 490 "\"name\": \"" + |
| 491 tooLongString + |
| 492 "\"" |
| 493 "}\n" |
| 494 "\n" |
| 495 "</script>" |
| 496 "</body>"); |
| 497 setURL("http://www.test.com/"); |
| 498 setTitle("My neat website about cool stuff"); |
| 499 |
| 500 WebPagePtr extracted = WebPage::New(); |
| 501 ASSERT_TRUE(extract(extracted)); |
| 502 |
| 503 WebPagePtr expected = |
| 504 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 505 |
| 506 EntityPtr restaurant = Entity::New(); |
| 507 restaurant->type = "Restaurant"; |
| 508 restaurant->properties.push_back( |
| 509 createStringProperty("name", maxLengthString)); |
| 510 |
| 511 expected->entities.push_back(std::move(restaurant)); |
| 512 EXPECT_EQ(expected, extracted); |
| 513 } |
| 514 |
| 515 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { |
| 516 setHtmlInnerHTML( |
| 517 "<body>" |
| 518 "<script type=\"application/ld+json\">" |
| 519 "\n" |
| 520 "\n" |
| 521 "{\"name\": \"Special characters for ya >_<;\"" |
| 522 "}\n" |
| 523 "\n" |
| 524 "</script>" |
| 525 "</body>"); |
| 526 setURL("http://www.test.com/"); |
| 527 setTitle("My neat website about cool stuff"); |
| 528 |
| 529 WebPagePtr extracted = WebPage::New(); |
| 530 ASSERT_FALSE(extract(extracted)); |
| 531 ASSERT_TRUE(extracted.is_null()); |
| 532 } |
| 533 |
| 534 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { |
| 535 setHtmlInnerHTML( |
| 536 "<body>" |
| 537 "<script type=\"application/ld+json\">" |
| 538 "\n" |
| 539 "\n" |
| 540 "{\"@type\": \"UnsupportedType\"," |
| 541 "\"name\": \"Special characters for ya >_<;\"" |
| 542 "}\n" |
| 543 "\n" |
| 544 "</script>" |
| 545 "</body>"); |
| 546 setURL("http://www.test.com/"); |
| 547 setTitle("My neat website about cool stuff"); |
| 548 |
| 549 WebPagePtr extracted = WebPage::New(); |
| 550 ASSERT_FALSE(extract(extracted)); |
| 551 ASSERT_TRUE(extracted.is_null()); |
| 552 } |
| 553 |
| 554 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { |
| 555 String largeRepeatedField = "["; |
| 556 for (int i = 0; i < 101; ++i) { |
| 557 largeRepeatedField.append("\"a\""); |
| 558 if (i != 100) { |
| 559 largeRepeatedField.append(", "); |
| 560 } |
| 561 } |
| 562 largeRepeatedField.append("]"); |
| 563 setHtmlInnerHTML( |
| 564 "<body>" |
| 565 "<script type=\"application/ld+json\">" |
| 566 "\n" |
| 567 "\n" |
| 568 "{\"@type\": \"Restaurant\"," |
| 569 "\"name\": " + |
| 570 largeRepeatedField + |
| 571 "}\n" |
| 572 "\n" |
| 573 "</script>" |
| 574 "</body>"); |
| 575 setURL("http://www.test.com/"); |
| 576 setTitle("My neat website about cool stuff"); |
| 577 |
| 578 WebPagePtr extracted = WebPage::New(); |
| 579 ASSERT_TRUE(extract(extracted)); |
| 580 |
| 581 WebPagePtr expected = |
| 582 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 583 |
| 584 EntityPtr restaurant = Entity::New(); |
| 585 restaurant->type = "Restaurant"; |
| 586 |
| 587 PropertyPtr name = Property::New(); |
| 588 name->name = "name"; |
| 589 name->values = Values::New(); |
| 590 Vector<String> nameValues; |
| 591 for (int i = 0; i < 100; ++i) { |
| 592 nameValues.push_back("a"); |
| 593 } |
| 594 name->values->set_string_values(nameValues); |
| 595 |
| 596 restaurant->properties.push_back(std::move(name)); |
| 597 |
| 598 expected->entities.push_back(std::move(restaurant)); |
| 599 |
| 600 EXPECT_EQ(expected, extracted); |
| 601 } |
| 602 |
| 603 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { |
| 604 String tooManyFields; |
| 605 for (int i = 0; i < 20; ++i) { |
| 606 tooManyFields.append(String::format("\"%d\": \"a\"", i)); |
| 607 if (i != 19) { |
| 608 tooManyFields.append(",\n"); |
| 609 } |
| 610 } |
| 611 setHtmlInnerHTML( |
| 612 "<body>" |
| 613 "<script type=\"application/ld+json\">" |
| 614 "\n" |
| 615 "\n" |
| 616 "{\"@type\": \"Restaurant\"," + |
| 617 tooManyFields + |
| 618 "}\n" |
| 619 "\n" |
| 620 "</script>" |
| 621 "</body>"); |
| 622 setURL("http://www.test.com/"); |
| 623 setTitle("My neat website about cool stuff"); |
| 624 |
| 625 WebPagePtr extracted = WebPage::New(); |
| 626 ASSERT_TRUE(extract(extracted)); |
| 627 |
| 628 WebPagePtr expected = |
| 629 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 630 |
| 631 EntityPtr restaurant = Entity::New(); |
| 632 restaurant->type = "Restaurant"; |
| 633 |
| 634 for (int i = 0; i < 19; ++i) { |
| 635 restaurant->properties.push_back( |
| 636 createStringProperty(String::number(i), "a")); |
| 637 } |
| 638 |
| 639 expected->entities.push_back(std::move(restaurant)); |
| 640 EXPECT_EQ(expected, extracted); |
| 641 } |
| 642 |
| 643 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithEmptyArray) { |
| 644 setHtmlInnerHTML( |
| 645 "<body>" |
| 646 "<script type=\"application/ld+json\">" |
| 647 "\n" |
| 648 "\n" |
| 649 "{\"@type\": \"Restaurant\"," |
| 650 "\"name\": []" |
| 651 "}\n" |
| 652 "\n" |
| 653 "</script>" |
| 654 "</body>"); |
| 655 setURL("http://www.test.com/"); |
| 656 setTitle("My neat website about cool stuff"); |
| 657 |
| 658 WebPagePtr extracted = WebPage::New(); |
| 659 ASSERT_TRUE(extract(extracted)); |
| 660 |
| 661 WebPagePtr expected = |
| 662 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 663 |
| 664 EntityPtr restaurant = Entity::New(); |
| 665 restaurant->type = "Restaurant"; |
| 666 |
| 667 expected->entities.push_back(std::move(restaurant)); |
| 668 |
| 669 EXPECT_EQ(expected, extracted); |
| 670 } |
| 671 |
| 672 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithMixedTypes) { |
| 673 setHtmlInnerHTML( |
| 674 "<body>" |
| 675 "<script type=\"application/ld+json\">" |
| 676 "\n" |
| 677 "\n" |
| 678 "{\"@type\": \"Restaurant\"," |
| 679 "\"name\": [ \"Name\", 1 ]" |
| 680 "}\n" |
| 681 "\n" |
| 682 "</script>" |
| 683 "</body>"); |
| 684 setURL("http://www.test.com/"); |
| 685 setTitle("My neat website about cool stuff"); |
| 686 |
| 687 WebPagePtr extracted = WebPage::New(); |
| 688 ASSERT_TRUE(extract(extracted)); |
| 689 |
| 690 WebPagePtr expected = |
| 691 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 692 |
| 693 EntityPtr restaurant = Entity::New(); |
| 694 restaurant->type = "Restaurant"; |
| 695 |
| 696 expected->entities.push_back(std::move(restaurant)); |
| 697 |
| 698 EXPECT_EQ(expected, extracted); |
| 699 } |
| 700 |
| 701 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithNestedArray) { |
| 702 setHtmlInnerHTML( |
| 703 "<body>" |
| 704 "<script type=\"application/ld+json\">" |
| 705 "\n" |
| 706 "\n" |
| 707 "{\"@type\": \"Restaurant\"," |
| 708 "\"name\": [ [ \"Name\" ] ]" |
| 709 "}\n" |
| 710 "\n" |
| 711 "</script>" |
| 712 "</body>"); |
| 713 setURL("http://www.test.com/"); |
| 714 setTitle("My neat website about cool stuff"); |
| 715 |
| 716 WebPagePtr extracted = WebPage::New(); |
| 717 ASSERT_TRUE(extract(extracted)); |
| 718 |
| 719 WebPagePtr expected = |
| 720 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 721 |
| 722 EntityPtr restaurant = Entity::New(); |
| 723 restaurant->type = "Restaurant"; |
| 724 |
| 725 expected->entities.push_back(std::move(restaurant)); |
| 726 |
| 727 EXPECT_EQ(expected, extracted); |
| 728 } |
| 729 |
| 730 TEST_F(CopylessPasteExtractorTest, enforceMaxNestingDepth) { |
| 731 setHtmlInnerHTML( |
| 732 "<body>" |
| 733 "<script type=\"application/ld+json\">" |
| 734 "\n" |
| 735 "\n" |
| 736 "{\"@type\": \"Restaurant\"," |
| 737 "\"name\": \"Ye ol greasy diner\"," |
| 738 "\"1\": {" |
| 739 " \"2\": {" |
| 740 " \"3\": {" |
| 741 " \"4\": {" |
| 742 " \"5\": 6" |
| 743 " }\n" |
| 744 " }\n" |
| 745 " }\n" |
| 746 "}\n" |
| 747 "}\n" |
| 748 "\n" |
| 749 "</script>" |
| 750 "</body>"); |
| 751 setURL("http://www.test.com/"); |
| 752 setTitle("My neat website about cool stuff"); |
| 753 |
| 754 WebPagePtr extracted = WebPage::New(); |
| 755 ASSERT_TRUE(extract(extracted)); |
| 756 |
| 757 WebPagePtr expected = |
| 758 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 759 |
| 760 EntityPtr restaurant = Entity::New(); |
| 761 restaurant->type = "Restaurant"; |
| 762 restaurant->properties.push_back( |
| 763 createStringProperty("name", "Ye ol greasy diner")); |
| 764 |
| 765 EntityPtr entity1 = Entity::New(); |
| 766 entity1->type = "Thing"; |
| 767 |
| 768 EntityPtr entity2 = Entity::New(); |
| 769 entity2->type = "Thing"; |
| 770 |
| 771 EntityPtr entity3 = Entity::New(); |
| 772 entity3->type = "Thing"; |
| 773 |
| 774 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); |
| 775 |
| 776 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); |
| 777 |
| 778 restaurant->properties.push_back( |
| 779 createEntityProperty("1", std::move(entity1))); |
| 780 |
| 781 expected->entities.push_back(std::move(restaurant)); |
| 782 EXPECT_EQ(expected, extracted); |
| 783 } |
| 784 |
| 785 TEST_F(CopylessPasteExtractorTest, maxNestingDepthWithTerminalProperty) { |
| 786 setHtmlInnerHTML( |
| 787 "<body>" |
| 788 "<script type=\"application/ld+json\">" |
| 789 "\n" |
| 790 "\n" |
| 791 "{\"@type\": \"Restaurant\"," |
| 792 "\"name\": \"Ye ol greasy diner\"," |
| 793 "\"1\": {" |
| 794 " \"2\": {" |
| 795 " \"3\": {" |
| 796 " \"4\": 5" |
| 797 " }\n" |
| 798 " }\n" |
| 799 "}\n" |
| 800 "}\n" |
| 801 "\n" |
| 802 "</script>" |
| 803 "</body>"); |
| 804 setURL("http://www.test.com/"); |
| 805 setTitle("My neat website about cool stuff"); |
| 806 |
| 807 WebPagePtr extracted = WebPage::New(); |
| 808 ASSERT_TRUE(extract(extracted)); |
| 809 |
| 810 WebPagePtr expected = |
| 811 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 812 |
| 813 EntityPtr restaurant = Entity::New(); |
| 814 restaurant->type = "Restaurant"; |
| 815 restaurant->properties.push_back( |
| 816 createStringProperty("name", "Ye ol greasy diner")); |
| 817 |
| 818 EntityPtr entity1 = Entity::New(); |
| 819 entity1->type = "Thing"; |
| 820 |
| 821 EntityPtr entity2 = Entity::New(); |
| 822 entity2->type = "Thing"; |
| 823 |
| 824 EntityPtr entity3 = Entity::New(); |
| 825 entity3->type = "Thing"; |
| 826 |
| 827 entity3->properties.push_back(createLongProperty("4", 5)); |
| 828 |
| 829 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); |
| 830 |
| 831 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); |
| 832 |
| 833 restaurant->properties.push_back( |
| 834 createEntityProperty("1", std::move(entity1))); |
| 835 |
| 836 expected->entities.push_back(std::move(restaurant)); |
104 EXPECT_EQ(expected, extracted); | 837 EXPECT_EQ(expected, extracted); |
105 } | 838 } |
106 | 839 |
107 } // namespace | 840 } // namespace |
108 | |
109 } // namespace blink | 841 } // namespace blink |
OLD | NEW |