OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
6 | 5 |
7 #include <memory> | 6 #include <memory> |
| 7 #include <string> |
| 8 #include <utility> |
8 #include "core/dom/Document.h" | 9 #include "core/dom/Document.h" |
9 #include "core/dom/Element.h" | 10 #include "core/dom/Element.h" |
10 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
| 12 #include "modules/document_metadata/CopylessPasteExtractor.h" |
| 13 #include "platform/json/JSONValues.h" |
| 14 #include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h
" |
11 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
12 #include "wtf/text/StringBuilder.h" | 16 #include "wtf/text/StringBuilder.h" |
13 | 17 |
14 namespace blink { | 18 namespace blink { |
15 | 19 |
16 namespace { | 20 namespace { |
17 | 21 |
| 22 using mojom::document_metadata::blink::Entity; |
| 23 using mojom::document_metadata::blink::EntityPtr; |
| 24 using mojom::document_metadata::blink::Property; |
| 25 using mojom::document_metadata::blink::PropertyPtr; |
| 26 using mojom::document_metadata::blink::Values; |
| 27 using mojom::document_metadata::blink::ValuesPtr; |
| 28 using mojom::document_metadata::blink::WebPage; |
| 29 using mojom::document_metadata::blink::WebPagePtr; |
| 30 |
18 class CopylessPasteExtractorTest : public ::testing::Test { | 31 class CopylessPasteExtractorTest : public ::testing::Test { |
19 public: | 32 public: |
20 CopylessPasteExtractorTest() | 33 CopylessPasteExtractorTest() {} |
21 : content_( | |
22 "\n" | |
23 "\n" | |
24 "{\"@type\": \"NewsArticle\"," | |
25 "\"headline\": \"Special characters for ya >_<;\"\n" | |
26 "}\n" | |
27 "\n") {} | |
28 | 34 |
29 protected: | 35 protected: |
30 void SetUp() override; | 36 void SetUp() override; |
31 | 37 |
32 void TearDown() override { ThreadState::Current()->CollectAllGarbage(); } | 38 void TearDown() override { ThreadState::Current()->CollectAllGarbage(); } |
33 | 39 |
34 Document& GetDocument() const { return dummy_page_holder_->GetDocument(); } | 40 Document& GetDocument() const { return dummy_page_holder_->GetDocument(); } |
35 | 41 |
36 String Extract() { return CopylessPasteExtractor::Extract(GetDocument()); } | 42 WebPagePtr Extract() { |
| 43 return CopylessPasteExtractor::extract(GetDocument()); |
| 44 } |
37 | 45 |
38 void SetHtmlInnerHTML(const String&); | 46 void SetHTMLInnerHTML(const String&); |
39 | 47 |
40 String content_; | 48 void SetURL(const String&); |
| 49 |
| 50 void SetTitle(const String&); |
| 51 |
| 52 PropertyPtr createStringProperty(const String& name, const String& value); |
| 53 |
| 54 PropertyPtr createBooleanProperty(const String& name, const bool& value); |
| 55 |
| 56 PropertyPtr createLongProperty(const String& name, const int64_t& value); |
| 57 |
| 58 PropertyPtr createEntityProperty(const String& name, EntityPtr value); |
| 59 |
| 60 WebPagePtr createWebPage(const String& url, const String& title); |
41 | 61 |
42 private: | 62 private: |
43 std::unique_ptr<DummyPageHolder> dummy_page_holder_; | 63 std::unique_ptr<DummyPageHolder> dummy_page_holder_; |
44 }; | 64 }; |
45 | 65 |
46 void CopylessPasteExtractorTest::SetUp() { | 66 void CopylessPasteExtractorTest::SetUp() { |
47 dummy_page_holder_ = DummyPageHolder::Create(IntSize(800, 600)); | 67 dummy_page_holder_ = DummyPageHolder::Create(IntSize(800, 600)); |
48 } | 68 } |
49 | 69 |
50 void CopylessPasteExtractorTest::SetHtmlInnerHTML(const String& html_content) { | 70 void CopylessPasteExtractorTest::SetHTMLInnerHTML(const String& html_content) { |
51 GetDocument().documentElement()->setInnerHTML((html_content)); | 71 GetDocument().documentElement()->setInnerHTML((html_content)); |
52 } | 72 } |
53 | 73 |
| 74 void CopylessPasteExtractorTest::SetURL(const String& url) { |
| 75 GetDocument().SetURL(blink::KURL(blink::kParsedURLString, url)); |
| 76 } |
| 77 |
| 78 void CopylessPasteExtractorTest::SetTitle(const String& title) { |
| 79 GetDocument().setTitle(title); |
| 80 } |
| 81 |
| 82 PropertyPtr CopylessPasteExtractorTest::createStringProperty( |
| 83 const String& name, |
| 84 const String& value) { |
| 85 PropertyPtr property = Property::New(); |
| 86 property->name = name; |
| 87 property->values = Values::New(); |
| 88 property->values->set_string_values({value}); |
| 89 return property; |
| 90 } |
| 91 |
| 92 PropertyPtr CopylessPasteExtractorTest::createBooleanProperty( |
| 93 const String& name, |
| 94 const bool& value) { |
| 95 PropertyPtr property = Property::New(); |
| 96 property->name = name; |
| 97 property->values = Values::New(); |
| 98 property->values->set_bool_values({value}); |
| 99 return property; |
| 100 } |
| 101 |
| 102 PropertyPtr CopylessPasteExtractorTest::createLongProperty( |
| 103 const String& name, |
| 104 const int64_t& value) { |
| 105 PropertyPtr property = Property::New(); |
| 106 property->name = name; |
| 107 property->values = Values::New(); |
| 108 property->values->set_long_values({value}); |
| 109 return property; |
| 110 } |
| 111 |
| 112 PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name, |
| 113 EntityPtr value) { |
| 114 PropertyPtr property = Property::New(); |
| 115 property->name = name; |
| 116 property->values = Values::New(); |
| 117 property->values->set_entity_values(Vector<EntityPtr>()); |
| 118 property->values->get_entity_values().push_back(std::move(value)); |
| 119 return property; |
| 120 } |
| 121 |
| 122 WebPagePtr CopylessPasteExtractorTest::createWebPage(const String& url, |
| 123 const String& title) { |
| 124 WebPagePtr page = WebPage::New(); |
| 125 page->url = blink::KURL(blink::kParsedURLString, url); |
| 126 page->title = title; |
| 127 return page; |
| 128 } |
| 129 |
54 TEST_F(CopylessPasteExtractorTest, empty) { | 130 TEST_F(CopylessPasteExtractorTest, empty) { |
55 String extracted = Extract(); | 131 ASSERT_TRUE(Extract().is_null()); |
56 String expected = "[]"; | |
57 EXPECT_EQ(expected, extracted); | |
58 } | 132 } |
59 | 133 |
60 TEST_F(CopylessPasteExtractorTest, basic) { | 134 TEST_F(CopylessPasteExtractorTest, basic) { |
61 SetHtmlInnerHTML( | 135 SetHTMLInnerHTML( |
62 "<body>" | 136 "<body>" |
63 "<script type=\"application/ld+json\">" + | 137 "<script type=\"application/ld+json\">" |
64 content_ + | 138 "\n" |
65 "</script>" | 139 "\n" |
66 "</body>"); | 140 "{\"@type\": \"Restaurant\"," |
67 | 141 "\"name\": \"Special characters for ya >_<;\"" |
68 String extracted = Extract(); | 142 "}\n" |
69 String expected = "[" + content_ + "]"; | 143 "\n" |
| 144 "</script>" |
| 145 "</body>"); |
| 146 SetURL("http://www.test.com/"); |
| 147 SetTitle("My neat website about cool stuff"); |
| 148 |
| 149 WebPagePtr extracted = Extract(); |
| 150 ASSERT_FALSE(extracted.is_null()); |
| 151 |
| 152 WebPagePtr expected = |
| 153 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 154 |
| 155 EntityPtr restaurant = Entity::New(); |
| 156 restaurant->type = "Restaurant"; |
| 157 restaurant->properties.push_back( |
| 158 createStringProperty("name", "Special characters for ya >_<;")); |
| 159 |
| 160 expected->entities.push_back(std::move(restaurant)); |
70 EXPECT_EQ(expected, extracted); | 161 EXPECT_EQ(expected, extracted); |
71 } | 162 } |
72 | 163 |
73 TEST_F(CopylessPasteExtractorTest, header) { | 164 TEST_F(CopylessPasteExtractorTest, header) { |
74 SetHtmlInnerHTML( | 165 SetHTMLInnerHTML( |
75 "<head>" | 166 "<head>" |
76 "<script type=\"application/ld+json\">" + | 167 "<script type=\"application/ld+json\">" |
77 content_ + | 168 "\n" |
| 169 "\n" |
| 170 "{\"@type\": \"Restaurant\"," |
| 171 "\"name\": \"Special characters for ya >_<;\"" |
| 172 "}\n" |
| 173 "\n" |
78 "</script>" | 174 "</script>" |
79 "</head>"); | 175 "</head>"); |
80 | 176 |
81 String extracted = Extract(); | 177 SetURL("http://www.test.com/"); |
82 String expected = "[" + content_ + "]"; | 178 SetTitle("My neat website about cool stuff"); |
| 179 |
| 180 WebPagePtr extracted = Extract(); |
| 181 ASSERT_FALSE(extracted.is_null()); |
| 182 |
| 183 WebPagePtr expected = |
| 184 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 185 |
| 186 EntityPtr restaurant = Entity::New(); |
| 187 restaurant->type = "Restaurant"; |
| 188 restaurant->properties.push_back( |
| 189 createStringProperty("name", "Special characters for ya >_<;")); |
| 190 |
| 191 expected->entities.push_back(std::move(restaurant)); |
| 192 EXPECT_EQ(expected, extracted); |
| 193 } |
| 194 |
| 195 TEST_F(CopylessPasteExtractorTest, booleanValue) { |
| 196 SetHTMLInnerHTML( |
| 197 "<body>" |
| 198 "<script type=\"application/ld+json\">" |
| 199 "\n" |
| 200 "\n" |
| 201 "{\"@type\": \"Restaurant\"," |
| 202 "\"open\": true" |
| 203 "}\n" |
| 204 "\n" |
| 205 "</script>" |
| 206 "</body>"); |
| 207 SetURL("http://www.test.com/"); |
| 208 SetTitle("My neat website about cool stuff"); |
| 209 |
| 210 WebPagePtr extracted = Extract(); |
| 211 ASSERT_FALSE(extracted.is_null()); |
| 212 |
| 213 WebPagePtr expected = |
| 214 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 215 |
| 216 EntityPtr restaurant = Entity::New(); |
| 217 restaurant->type = "Restaurant"; |
| 218 restaurant->properties.push_back(createBooleanProperty("open", true)); |
| 219 |
| 220 expected->entities.push_back(std::move(restaurant)); |
| 221 EXPECT_EQ(expected, extracted); |
| 222 } |
| 223 |
| 224 TEST_F(CopylessPasteExtractorTest, longValue) { |
| 225 SetHTMLInnerHTML( |
| 226 "<body>" |
| 227 "<script type=\"application/ld+json\">" |
| 228 "\n" |
| 229 "\n" |
| 230 "{\"@type\": \"Restaurant\"," |
| 231 "\"long\": 1" |
| 232 "}\n" |
| 233 "\n" |
| 234 "</script>" |
| 235 "</body>"); |
| 236 SetURL("http://www.test.com/"); |
| 237 SetTitle("My neat website about cool stuff"); |
| 238 |
| 239 WebPagePtr extracted = Extract(); |
| 240 ASSERT_FALSE(extracted.is_null()); |
| 241 |
| 242 WebPagePtr expected = |
| 243 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 244 |
| 245 EntityPtr restaurant = Entity::New(); |
| 246 restaurant->type = "Restaurant"; |
| 247 restaurant->properties.push_back(createLongProperty("long", 1ll)); |
| 248 |
| 249 expected->entities.push_back(std::move(restaurant)); |
| 250 EXPECT_EQ(expected, extracted); |
| 251 } |
| 252 |
| 253 TEST_F(CopylessPasteExtractorTest, doubleValue) { |
| 254 SetHTMLInnerHTML( |
| 255 "<body>" |
| 256 "<script type=\"application/ld+json\">" |
| 257 "\n" |
| 258 "\n" |
| 259 "{\"@type\": \"Restaurant\"," |
| 260 "\"double\": 1.5" |
| 261 "}\n" |
| 262 "\n" |
| 263 "</script>" |
| 264 "</body>"); |
| 265 SetURL("http://www.test.com/"); |
| 266 SetTitle("My neat website about cool stuff"); |
| 267 |
| 268 WebPagePtr extracted = Extract(); |
| 269 ASSERT_FALSE(extracted.is_null()); |
| 270 |
| 271 WebPagePtr expected = |
| 272 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 273 |
| 274 EntityPtr restaurant = Entity::New(); |
| 275 restaurant->type = "Restaurant"; |
| 276 restaurant->properties.push_back(createStringProperty("double", "1.5")); |
| 277 |
| 278 expected->entities.push_back(std::move(restaurant)); |
83 EXPECT_EQ(expected, extracted); | 279 EXPECT_EQ(expected, extracted); |
84 } | 280 } |
85 | 281 |
86 TEST_F(CopylessPasteExtractorTest, multiple) { | 282 TEST_F(CopylessPasteExtractorTest, multiple) { |
87 SetHtmlInnerHTML( | 283 SetHTMLInnerHTML( |
88 "<head>" | 284 "<head>" |
89 "<script type=\"application/ld+json\">" + | 285 "<script type=\"application/ld+json\">" |
90 content_ + | 286 "\n" |
| 287 "\n" |
| 288 "{\"@type\": \"Restaurant\"," |
| 289 "\"name\": \"Special characters for ya >_<;\"" |
| 290 "}\n" |
| 291 "\n" |
91 "</script>" | 292 "</script>" |
92 "</head>" | 293 "</head>" |
93 "<body>" | 294 "<body>" |
94 "<script type=\"application/ld+json\">" + | 295 "<script type=\"application/ld+json\">" |
95 content_ + | 296 "\n" |
96 "</script>" | 297 "\n" |
97 "<script type=\"application/ld+json\">" + | 298 "{\"@type\": \"Restaurant\"," |
98 content_ + | 299 "\"name\": \"Special characters for ya >_<;\"" |
99 "</script>" | 300 "}\n" |
100 "</body>"); | 301 "\n" |
101 | 302 "</script>" |
102 String extracted = Extract(); | 303 "<script type=\"application/ld+json\">" |
103 String expected = "[" + content_ + "," + content_ + "," + content_ + "]"; | 304 "\n" |
| 305 "\n" |
| 306 "{\"@type\": \"Restaurant\"," |
| 307 "\"name\": \"Special characters for ya >_<;\"" |
| 308 "}\n" |
| 309 "\n" |
| 310 "</script>" |
| 311 "</body>"); |
| 312 |
| 313 SetURL("http://www.test.com/"); |
| 314 SetTitle("My neat website about cool stuff"); |
| 315 |
| 316 WebPagePtr extracted = Extract(); |
| 317 ASSERT_FALSE(extracted.is_null()); |
| 318 |
| 319 WebPagePtr expected = |
| 320 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 321 |
| 322 for (int i = 0; i < 3; ++i) { |
| 323 EntityPtr restaurant = Entity::New(); |
| 324 restaurant->type = "Restaurant"; |
| 325 restaurant->properties.push_back( |
| 326 createStringProperty("name", "Special characters for ya >_<;")); |
| 327 |
| 328 expected->entities.push_back(std::move(restaurant)); |
| 329 } |
| 330 EXPECT_EQ(expected, extracted); |
| 331 } |
| 332 |
| 333 TEST_F(CopylessPasteExtractorTest, nested) { |
| 334 SetHTMLInnerHTML( |
| 335 "<body>" |
| 336 "<script type=\"application/ld+json\">" |
| 337 "\n" |
| 338 "\n" |
| 339 "{\"@type\": \"Restaurant\"," |
| 340 "\"name\": \"Ye ol greasy diner\"," |
| 341 "\"address\": {" |
| 342 "\n" |
| 343 " \"streetAddress\": \"123 Big Oak Road\"," |
| 344 " \"addressLocality\": \"San Francisco\"" |
| 345 " }\n" |
| 346 "}\n" |
| 347 "\n" |
| 348 "</script>" |
| 349 "</body>"); |
| 350 SetURL("http://www.test.com/"); |
| 351 SetTitle("My neat website about cool stuff"); |
| 352 |
| 353 WebPagePtr extracted = Extract(); |
| 354 ASSERT_FALSE(extracted.is_null()); |
| 355 |
| 356 WebPagePtr expected = |
| 357 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 358 |
| 359 EntityPtr restaurant = Entity::New(); |
| 360 restaurant->type = "Restaurant"; |
| 361 restaurant->properties.push_back( |
| 362 createStringProperty("name", "Ye ol greasy diner")); |
| 363 |
| 364 EntityPtr address = Entity::New(); |
| 365 address->type = "Thing"; |
| 366 address->properties.push_back( |
| 367 createStringProperty("streetAddress", "123 Big Oak Road")); |
| 368 address->properties.push_back( |
| 369 createStringProperty("addressLocality", "San Francisco")); |
| 370 |
| 371 restaurant->properties.push_back( |
| 372 createEntityProperty("address", std::move(address))); |
| 373 |
| 374 expected->entities.push_back(std::move(restaurant)); |
| 375 EXPECT_EQ(expected, extracted); |
| 376 } |
| 377 |
| 378 TEST_F(CopylessPasteExtractorTest, repeated) { |
| 379 SetHTMLInnerHTML( |
| 380 "<body>" |
| 381 "<script type=\"application/ld+json\">" |
| 382 "\n" |
| 383 "\n" |
| 384 "{\"@type\": \"Restaurant\"," |
| 385 "\"name\": [ \"First name\", \"Second name\" ]" |
| 386 "}\n" |
| 387 "\n" |
| 388 "</script>" |
| 389 "</body>"); |
| 390 SetURL("http://www.test.com/"); |
| 391 SetTitle("My neat website about cool stuff"); |
| 392 |
| 393 WebPagePtr extracted = Extract(); |
| 394 ASSERT_FALSE(extracted.is_null()); |
| 395 |
| 396 WebPagePtr expected = |
| 397 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 398 |
| 399 EntityPtr restaurant = Entity::New(); |
| 400 restaurant->type = "Restaurant"; |
| 401 |
| 402 PropertyPtr name = Property::New(); |
| 403 name->name = "name"; |
| 404 name->values = Values::New(); |
| 405 Vector<String> nameValues; |
| 406 nameValues.push_back("First name"); |
| 407 nameValues.push_back("Second name"); |
| 408 name->values->set_string_values(nameValues); |
| 409 |
| 410 restaurant->properties.push_back(std::move(name)); |
| 411 |
| 412 expected->entities.push_back(std::move(restaurant)); |
| 413 |
| 414 EXPECT_EQ(expected, extracted); |
| 415 } |
| 416 |
| 417 TEST_F(CopylessPasteExtractorTest, repeatedObject) { |
| 418 SetHTMLInnerHTML( |
| 419 "<body>" |
| 420 "<script type=\"application/ld+json\">" |
| 421 "\n" |
| 422 "\n" |
| 423 "{\"@type\": \"Restaurant\"," |
| 424 "\"name\": \"Ye ol greasy diner\"," |
| 425 "\"address\": [" |
| 426 "\n" |
| 427 " {" |
| 428 " \"streetAddress\": \"123 Big Oak Road\"," |
| 429 " \"addressLocality\": \"San Francisco\"" |
| 430 " },\n" |
| 431 " {" |
| 432 " \"streetAddress\": \"123 Big Oak Road\"," |
| 433 " \"addressLocality\": \"San Francisco\"" |
| 434 " }\n" |
| 435 "]\n" |
| 436 "}\n" |
| 437 "\n" |
| 438 "</script>" |
| 439 "</body>"); |
| 440 SetURL("http://www.test.com/"); |
| 441 SetTitle("My neat website about cool stuff"); |
| 442 |
| 443 WebPagePtr extracted = Extract(); |
| 444 ASSERT_FALSE(extracted.is_null()); |
| 445 |
| 446 WebPagePtr expected = |
| 447 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 448 |
| 449 EntityPtr restaurant = Entity::New(); |
| 450 restaurant->type = "Restaurant"; |
| 451 restaurant->properties.push_back( |
| 452 createStringProperty("name", "Ye ol greasy diner")); |
| 453 |
| 454 PropertyPtr addressProperty = Property::New(); |
| 455 addressProperty->name = "address"; |
| 456 addressProperty->values = Values::New(); |
| 457 addressProperty->values->set_entity_values(Vector<EntityPtr>()); |
| 458 for (int i = 0; i < 2; ++i) { |
| 459 EntityPtr address = Entity::New(); |
| 460 address->type = "Thing"; |
| 461 address->properties.push_back( |
| 462 createStringProperty("streetAddress", "123 Big Oak Road")); |
| 463 address->properties.push_back( |
| 464 createStringProperty("addressLocality", "San Francisco")); |
| 465 addressProperty->values->get_entity_values().push_back(std::move(address)); |
| 466 } |
| 467 restaurant->properties.push_back(std::move(addressProperty)); |
| 468 |
| 469 expected->entities.push_back(std::move(restaurant)); |
| 470 EXPECT_EQ(expected, extracted); |
| 471 } |
| 472 |
| 473 TEST_F(CopylessPasteExtractorTest, truncateLongString) { |
| 474 String maxLengthString; |
| 475 for (int i = 0; i < 200; ++i) { |
| 476 maxLengthString.Append("a"); |
| 477 } |
| 478 String tooLongString(maxLengthString); |
| 479 tooLongString.Append("a"); |
| 480 SetHTMLInnerHTML( |
| 481 "<body>" |
| 482 "<script type=\"application/ld+json\">" |
| 483 "\n" |
| 484 "\n" |
| 485 "{\"@type\": \"Restaurant\"," |
| 486 "\"name\": \"" + |
| 487 tooLongString + |
| 488 "\"" |
| 489 "}\n" |
| 490 "\n" |
| 491 "</script>" |
| 492 "</body>"); |
| 493 SetURL("http://www.test.com/"); |
| 494 SetTitle("My neat website about cool stuff"); |
| 495 |
| 496 WebPagePtr extracted = Extract(); |
| 497 ASSERT_FALSE(extracted.is_null()); |
| 498 |
| 499 WebPagePtr expected = |
| 500 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 501 |
| 502 EntityPtr restaurant = Entity::New(); |
| 503 restaurant->type = "Restaurant"; |
| 504 restaurant->properties.push_back( |
| 505 createStringProperty("name", maxLengthString)); |
| 506 |
| 507 expected->entities.push_back(std::move(restaurant)); |
| 508 EXPECT_EQ(expected, extracted); |
| 509 } |
| 510 |
| 511 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { |
| 512 SetHTMLInnerHTML( |
| 513 "<body>" |
| 514 "<script type=\"application/ld+json\">" |
| 515 "\n" |
| 516 "\n" |
| 517 "{\"name\": \"Special characters for ya >_<;\"" |
| 518 "}\n" |
| 519 "\n" |
| 520 "</script>" |
| 521 "</body>"); |
| 522 SetURL("http://www.test.com/"); |
| 523 SetTitle("My neat website about cool stuff"); |
| 524 |
| 525 WebPagePtr extracted = Extract(); |
| 526 ASSERT_TRUE(extracted.is_null()); |
| 527 } |
| 528 |
| 529 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { |
| 530 SetHTMLInnerHTML( |
| 531 "<body>" |
| 532 "<script type=\"application/ld+json\">" |
| 533 "\n" |
| 534 "\n" |
| 535 "{\"@type\": \"UnsupportedType\"," |
| 536 "\"name\": \"Special characters for ya >_<;\"" |
| 537 "}\n" |
| 538 "\n" |
| 539 "</script>" |
| 540 "</body>"); |
| 541 SetURL("http://www.test.com/"); |
| 542 SetTitle("My neat website about cool stuff"); |
| 543 |
| 544 WebPagePtr extracted = Extract(); |
| 545 ASSERT_TRUE(extracted.is_null()); |
| 546 } |
| 547 |
| 548 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { |
| 549 String largeRepeatedField = "["; |
| 550 for (int i = 0; i < 101; ++i) { |
| 551 largeRepeatedField.Append("\"a\""); |
| 552 if (i != 100) { |
| 553 largeRepeatedField.Append(", "); |
| 554 } |
| 555 } |
| 556 largeRepeatedField.Append("]"); |
| 557 SetHTMLInnerHTML( |
| 558 "<body>" |
| 559 "<script type=\"application/ld+json\">" |
| 560 "\n" |
| 561 "\n" |
| 562 "{\"@type\": \"Restaurant\"," |
| 563 "\"name\": " + |
| 564 largeRepeatedField + |
| 565 "}\n" |
| 566 "\n" |
| 567 "</script>" |
| 568 "</body>"); |
| 569 SetURL("http://www.test.com/"); |
| 570 SetTitle("My neat website about cool stuff"); |
| 571 |
| 572 WebPagePtr extracted = Extract(); |
| 573 ASSERT_FALSE(extracted.is_null()); |
| 574 |
| 575 WebPagePtr expected = |
| 576 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 577 |
| 578 EntityPtr restaurant = Entity::New(); |
| 579 restaurant->type = "Restaurant"; |
| 580 |
| 581 PropertyPtr name = Property::New(); |
| 582 name->name = "name"; |
| 583 name->values = Values::New(); |
| 584 Vector<String> nameValues; |
| 585 for (int i = 0; i < 100; ++i) { |
| 586 nameValues.push_back("a"); |
| 587 } |
| 588 name->values->set_string_values(nameValues); |
| 589 |
| 590 restaurant->properties.push_back(std::move(name)); |
| 591 |
| 592 expected->entities.push_back(std::move(restaurant)); |
| 593 |
| 594 EXPECT_EQ(expected, extracted); |
| 595 } |
| 596 |
| 597 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { |
| 598 String tooManyFields; |
| 599 for (int i = 0; i < 20; ++i) { |
| 600 tooManyFields.Append(String::Format("\"%d\": \"a\"", i)); |
| 601 if (i != 19) { |
| 602 tooManyFields.Append(",\n"); |
| 603 } |
| 604 } |
| 605 SetHTMLInnerHTML( |
| 606 "<body>" |
| 607 "<script type=\"application/ld+json\">" |
| 608 "\n" |
| 609 "\n" |
| 610 "{\"@type\": \"Restaurant\"," + |
| 611 tooManyFields + |
| 612 "}\n" |
| 613 "\n" |
| 614 "</script>" |
| 615 "</body>"); |
| 616 SetURL("http://www.test.com/"); |
| 617 SetTitle("My neat website about cool stuff"); |
| 618 |
| 619 WebPagePtr extracted = Extract(); |
| 620 ASSERT_FALSE(extracted.is_null()); |
| 621 |
| 622 WebPagePtr expected = |
| 623 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 624 |
| 625 EntityPtr restaurant = Entity::New(); |
| 626 restaurant->type = "Restaurant"; |
| 627 |
| 628 for (int i = 0; i < 19; ++i) { |
| 629 restaurant->properties.push_back( |
| 630 createStringProperty(String::Number(i), "a")); |
| 631 } |
| 632 |
| 633 expected->entities.push_back(std::move(restaurant)); |
| 634 EXPECT_EQ(expected, extracted); |
| 635 } |
| 636 |
| 637 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithEmptyArray) { |
| 638 SetHTMLInnerHTML( |
| 639 "<body>" |
| 640 "<script type=\"application/ld+json\">" |
| 641 "\n" |
| 642 "\n" |
| 643 "{\"@type\": \"Restaurant\"," |
| 644 "\"name\": []" |
| 645 "}\n" |
| 646 "\n" |
| 647 "</script>" |
| 648 "</body>"); |
| 649 SetURL("http://www.test.com/"); |
| 650 SetTitle("My neat website about cool stuff"); |
| 651 |
| 652 WebPagePtr extracted = Extract(); |
| 653 ASSERT_FALSE(extracted.is_null()); |
| 654 |
| 655 WebPagePtr expected = |
| 656 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 657 |
| 658 EntityPtr restaurant = Entity::New(); |
| 659 restaurant->type = "Restaurant"; |
| 660 |
| 661 expected->entities.push_back(std::move(restaurant)); |
| 662 |
| 663 EXPECT_EQ(expected, extracted); |
| 664 } |
| 665 |
| 666 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithMixedTypes) { |
| 667 SetHTMLInnerHTML( |
| 668 "<body>" |
| 669 "<script type=\"application/ld+json\">" |
| 670 "\n" |
| 671 "\n" |
| 672 "{\"@type\": \"Restaurant\"," |
| 673 "\"name\": [ \"Name\", 1 ]" |
| 674 "}\n" |
| 675 "\n" |
| 676 "</script>" |
| 677 "</body>"); |
| 678 SetURL("http://www.test.com/"); |
| 679 SetTitle("My neat website about cool stuff"); |
| 680 |
| 681 WebPagePtr extracted = Extract(); |
| 682 ASSERT_FALSE(extracted.is_null()); |
| 683 |
| 684 WebPagePtr expected = |
| 685 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 686 |
| 687 EntityPtr restaurant = Entity::New(); |
| 688 restaurant->type = "Restaurant"; |
| 689 |
| 690 expected->entities.push_back(std::move(restaurant)); |
| 691 |
| 692 EXPECT_EQ(expected, extracted); |
| 693 } |
| 694 |
| 695 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithNestedArray) { |
| 696 SetHTMLInnerHTML( |
| 697 "<body>" |
| 698 "<script type=\"application/ld+json\">" |
| 699 "\n" |
| 700 "\n" |
| 701 "{\"@type\": \"Restaurant\"," |
| 702 "\"name\": [ [ \"Name\" ] ]" |
| 703 "}\n" |
| 704 "\n" |
| 705 "</script>" |
| 706 "</body>"); |
| 707 SetURL("http://www.test.com/"); |
| 708 SetTitle("My neat website about cool stuff"); |
| 709 |
| 710 WebPagePtr extracted = Extract(); |
| 711 ASSERT_FALSE(extracted.is_null()); |
| 712 |
| 713 WebPagePtr expected = |
| 714 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 715 |
| 716 EntityPtr restaurant = Entity::New(); |
| 717 restaurant->type = "Restaurant"; |
| 718 |
| 719 expected->entities.push_back(std::move(restaurant)); |
| 720 |
| 721 EXPECT_EQ(expected, extracted); |
| 722 } |
| 723 |
| 724 TEST_F(CopylessPasteExtractorTest, enforceMaxNestingDepth) { |
| 725 SetHTMLInnerHTML( |
| 726 "<body>" |
| 727 "<script type=\"application/ld+json\">" |
| 728 "\n" |
| 729 "\n" |
| 730 "{\"@type\": \"Restaurant\"," |
| 731 "\"name\": \"Ye ol greasy diner\"," |
| 732 "\"1\": {" |
| 733 " \"2\": {" |
| 734 " \"3\": {" |
| 735 " \"4\": {" |
| 736 " \"5\": 6" |
| 737 " }\n" |
| 738 " }\n" |
| 739 " }\n" |
| 740 "}\n" |
| 741 "}\n" |
| 742 "\n" |
| 743 "</script>" |
| 744 "</body>"); |
| 745 SetURL("http://www.test.com/"); |
| 746 SetTitle("My neat website about cool stuff"); |
| 747 |
| 748 WebPagePtr extracted = Extract(); |
| 749 ASSERT_FALSE(extracted.is_null()); |
| 750 |
| 751 WebPagePtr expected = |
| 752 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 753 |
| 754 EntityPtr restaurant = Entity::New(); |
| 755 restaurant->type = "Restaurant"; |
| 756 restaurant->properties.push_back( |
| 757 createStringProperty("name", "Ye ol greasy diner")); |
| 758 |
| 759 EntityPtr entity1 = Entity::New(); |
| 760 entity1->type = "Thing"; |
| 761 |
| 762 EntityPtr entity2 = Entity::New(); |
| 763 entity2->type = "Thing"; |
| 764 |
| 765 EntityPtr entity3 = Entity::New(); |
| 766 entity3->type = "Thing"; |
| 767 |
| 768 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); |
| 769 |
| 770 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); |
| 771 |
| 772 restaurant->properties.push_back( |
| 773 createEntityProperty("1", std::move(entity1))); |
| 774 |
| 775 expected->entities.push_back(std::move(restaurant)); |
| 776 EXPECT_EQ(expected, extracted); |
| 777 } |
| 778 |
| 779 TEST_F(CopylessPasteExtractorTest, maxNestingDepthWithTerminalProperty) { |
| 780 SetHTMLInnerHTML( |
| 781 "<body>" |
| 782 "<script type=\"application/ld+json\">" |
| 783 "\n" |
| 784 "\n" |
| 785 "{\"@type\": \"Restaurant\"," |
| 786 "\"name\": \"Ye ol greasy diner\"," |
| 787 "\"1\": {" |
| 788 " \"2\": {" |
| 789 " \"3\": {" |
| 790 " \"4\": 5" |
| 791 " }\n" |
| 792 " }\n" |
| 793 "}\n" |
| 794 "}\n" |
| 795 "\n" |
| 796 "</script>" |
| 797 "</body>"); |
| 798 SetURL("http://www.test.com/"); |
| 799 SetTitle("My neat website about cool stuff"); |
| 800 |
| 801 WebPagePtr extracted = Extract(); |
| 802 ASSERT_FALSE(extracted.is_null()); |
| 803 |
| 804 WebPagePtr expected = |
| 805 createWebPage("http://www.test.com/", "My neat website about cool stuff"); |
| 806 |
| 807 EntityPtr restaurant = Entity::New(); |
| 808 restaurant->type = "Restaurant"; |
| 809 restaurant->properties.push_back( |
| 810 createStringProperty("name", "Ye ol greasy diner")); |
| 811 |
| 812 EntityPtr entity1 = Entity::New(); |
| 813 entity1->type = "Thing"; |
| 814 |
| 815 EntityPtr entity2 = Entity::New(); |
| 816 entity2->type = "Thing"; |
| 817 |
| 818 EntityPtr entity3 = Entity::New(); |
| 819 entity3->type = "Thing"; |
| 820 |
| 821 entity3->properties.push_back(createLongProperty("4", 5)); |
| 822 |
| 823 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); |
| 824 |
| 825 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); |
| 826 |
| 827 restaurant->properties.push_back( |
| 828 createEntityProperty("1", std::move(entity1))); |
| 829 |
| 830 expected->entities.push_back(std::move(restaurant)); |
104 EXPECT_EQ(expected, extracted); | 831 EXPECT_EQ(expected, extracted); |
105 } | 832 } |
106 | 833 |
107 } // namespace | 834 } // namespace |
108 | |
109 } // namespace blink | 835 } // namespace blink |
OLD | NEW |