Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
| 6 | 5 |
| 7 #include <memory> | 6 #include <memory> |
| 7 #include <string> | |
| 8 #include <utility> | |
| 8 #include "core/dom/Document.h" | 9 #include "core/dom/Document.h" |
| 9 #include "core/dom/Element.h" | 10 #include "core/dom/Element.h" |
| 10 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
| 12 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
| 13 #include "platform/json/JSONValues.h" | |
| 14 #include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h " | |
| 11 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
| 12 #include "wtf/text/StringBuilder.h" | 16 #include "wtf/text/StringBuilder.h" |
| 13 | 17 |
| 14 namespace blink { | 18 namespace blink { |
| 15 | 19 |
| 16 namespace { | 20 namespace { |
| 17 | 21 |
| 22 using mojom::document_metadata::blink::Entity; | |
| 23 using mojom::document_metadata::blink::EntityPtr; | |
| 24 using mojom::document_metadata::blink::Property; | |
| 25 using mojom::document_metadata::blink::PropertyPtr; | |
| 26 using mojom::document_metadata::blink::Values; | |
| 27 using mojom::document_metadata::blink::ValuesPtr; | |
| 28 using mojom::document_metadata::blink::WebPage; | |
| 29 using mojom::document_metadata::blink::WebPagePtr; | |
| 30 | |
| 18 class CopylessPasteExtractorTest : public ::testing::Test { | 31 class CopylessPasteExtractorTest : public ::testing::Test { |
| 19 public: | 32 public: |
| 20 CopylessPasteExtractorTest() | 33 CopylessPasteExtractorTest() {} |
| 21 : m_content( | |
| 22 "\n" | |
| 23 "\n" | |
| 24 "{\"@type\": \"NewsArticle\"," | |
| 25 "\"headline\": \"Special characters for ya >_<;\"\n" | |
| 26 "}\n" | |
| 27 "\n") {} | |
| 28 | 34 |
| 29 protected: | 35 protected: |
| 30 void SetUp() override; | 36 void SetUp() override; |
| 31 | 37 |
| 32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 38 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
| 33 | 39 |
| 34 Document& document() const { return m_dummyPageHolder->document(); } | 40 Document& document() const { return m_dummyPageHolder->document(); } |
| 35 | 41 |
| 36 String extract() { return CopylessPasteExtractor::extract(document()); } | 42 WebPagePtr extract() { return CopylessPasteExtractor::extract(document()); } |
| 37 | 43 |
| 38 void setHtmlInnerHTML(const String&); | 44 void setHtmlInnerHTML(const String&); |
| 39 | 45 |
| 40 String m_content; | 46 void setURL(const String&); |
| 47 | |
| 48 void setTitle(const String&); | |
| 49 | |
| 50 PropertyPtr createStringProperty(const String& name, const String& value); | |
| 51 | |
| 52 PropertyPtr createBooleanProperty(const String& name, const bool& value); | |
| 53 | |
| 54 PropertyPtr createLongProperty(const String& name, const int64_t& value); | |
| 55 | |
| 56 PropertyPtr createEntityProperty(const String& name, EntityPtr value); | |
| 57 | |
| 58 WebPagePtr createWebPage(const String& url, const String& title); | |
| 41 | 59 |
| 42 private: | 60 private: |
| 43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; | 61 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; |
| 44 }; | 62 }; |
| 45 | 63 |
| 46 void CopylessPasteExtractorTest::SetUp() { | 64 void CopylessPasteExtractorTest::SetUp() { |
| 47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | 65 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
| 48 } | 66 } |
| 49 | 67 |
| 50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { | 68 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { |
| 51 document().documentElement()->setInnerHTML((htmlContent)); | 69 document().documentElement()->setInnerHTML((htmlContent)); |
| 52 } | 70 } |
| 53 | 71 |
| 72 void CopylessPasteExtractorTest::setURL(const String& url) { | |
| 73 document().setURL(blink::KURL(blink::ParsedURLString, url)); | |
| 74 } | |
| 75 | |
| 76 void CopylessPasteExtractorTest::setTitle(const String& title) { | |
| 77 document().setTitle(title); | |
| 78 } | |
| 79 | |
| 80 PropertyPtr CopylessPasteExtractorTest::createStringProperty( | |
| 81 const String& name, | |
| 82 const String& value) { | |
| 83 PropertyPtr property = Property::New(); | |
| 84 property->name = name; | |
| 85 property->values = Values::New(); | |
| 86 property->values->set_string_values(Vector<String>(1, value)); | |
|
dcheng
2017/04/10 22:57:12
Ditto: consider using initializer list syntax here
dproctor
2017/04/11 00:22:01
Done.
| |
| 87 return property; | |
| 88 } | |
| 89 | |
| 90 PropertyPtr CopylessPasteExtractorTest::createBooleanProperty( | |
| 91 const String& name, | |
| 92 const bool& value) { | |
| 93 PropertyPtr property = Property::New(); | |
| 94 property->name = name; | |
| 95 property->values = Values::New(); | |
| 96 property->values->set_bool_values(Vector<bool>(1, value)); | |
| 97 return property; | |
| 98 } | |
| 99 | |
| 100 PropertyPtr CopylessPasteExtractorTest::createLongProperty( | |
| 101 const String& name, | |
| 102 const int64_t& value) { | |
| 103 PropertyPtr property = Property::New(); | |
| 104 property->name = name; | |
| 105 property->values = Values::New(); | |
| 106 property->values->set_long_values(Vector<int64_t>(1, value)); | |
| 107 return property; | |
| 108 } | |
| 109 | |
| 110 PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name, | |
| 111 EntityPtr value) { | |
| 112 PropertyPtr property = Property::New(); | |
| 113 property->name = name; | |
| 114 property->values = Values::New(); | |
| 115 property->values->set_entity_values(Vector<EntityPtr>()); | |
| 116 property->values->get_entity_values().push_back(std::move(value)); | |
| 117 return property; | |
| 118 } | |
| 119 | |
| 120 WebPagePtr CopylessPasteExtractorTest::createWebPage(const String& url, | |
| 121 const String& title) { | |
| 122 WebPagePtr page = WebPage::New(); | |
| 123 page->url = blink::KURL(blink::ParsedURLString, url); | |
| 124 page->title = title; | |
| 125 return page; | |
| 126 } | |
| 127 | |
| 54 TEST_F(CopylessPasteExtractorTest, empty) { | 128 TEST_F(CopylessPasteExtractorTest, empty) { |
| 55 String extracted = extract(); | 129 ASSERT_TRUE(extract().is_null()); |
| 56 String expected = "[]"; | |
| 57 EXPECT_EQ(expected, extracted); | |
| 58 } | 130 } |
| 59 | 131 |
| 60 TEST_F(CopylessPasteExtractorTest, basic) { | 132 TEST_F(CopylessPasteExtractorTest, basic) { |
| 61 setHtmlInnerHTML( | 133 setHtmlInnerHTML( |
| 62 "<body>" | 134 "<body>" |
| 63 "<script type=\"application/ld+json\">" + | 135 "<script type=\"application/ld+json\">" |
| 64 m_content + | 136 "\n" |
| 65 "</script>" | 137 "\n" |
| 66 "</body>"); | 138 "{\"@type\": \"Restaurant\"," |
| 67 | 139 "\"name\": \"Special characters for ya >_<;\"" |
| 68 String extracted = extract(); | 140 "}\n" |
| 69 String expected = "[" + m_content + "]"; | 141 "\n" |
| 142 "</script>" | |
| 143 "</body>"); | |
| 144 setURL("http://www.test.com/"); | |
| 145 setTitle("My neat website about cool stuff"); | |
| 146 | |
| 147 WebPagePtr extracted = extract(); | |
| 148 ASSERT_FALSE(extracted.is_null()); | |
| 149 | |
| 150 WebPagePtr expected = | |
| 151 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 152 | |
| 153 EntityPtr restaurant = Entity::New(); | |
| 154 restaurant->type = "Restaurant"; | |
| 155 restaurant->properties.push_back( | |
| 156 createStringProperty("name", "Special characters for ya >_<;")); | |
| 157 | |
| 158 expected->entities.push_back(std::move(restaurant)); | |
| 70 EXPECT_EQ(expected, extracted); | 159 EXPECT_EQ(expected, extracted); |
| 71 } | 160 } |
| 72 | 161 |
| 73 TEST_F(CopylessPasteExtractorTest, header) { | 162 TEST_F(CopylessPasteExtractorTest, header) { |
| 74 setHtmlInnerHTML( | 163 setHtmlInnerHTML( |
| 75 "<head>" | 164 "<head>" |
| 76 "<script type=\"application/ld+json\">" + | 165 "<script type=\"application/ld+json\">" |
| 77 m_content + | 166 "\n" |
| 167 "\n" | |
| 168 "{\"@type\": \"Restaurant\"," | |
| 169 "\"name\": \"Special characters for ya >_<;\"" | |
| 170 "}\n" | |
| 171 "\n" | |
| 78 "</script>" | 172 "</script>" |
| 79 "</head>"); | 173 "</head>"); |
| 80 | 174 |
| 81 String extracted = extract(); | 175 setURL("http://www.test.com/"); |
| 82 String expected = "[" + m_content + "]"; | 176 setTitle("My neat website about cool stuff"); |
| 177 | |
| 178 WebPagePtr extracted = extract(); | |
| 179 ASSERT_FALSE(extracted.is_null()); | |
| 180 | |
| 181 WebPagePtr expected = | |
| 182 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 183 | |
| 184 EntityPtr restaurant = Entity::New(); | |
| 185 restaurant->type = "Restaurant"; | |
| 186 restaurant->properties.push_back( | |
| 187 createStringProperty("name", "Special characters for ya >_<;")); | |
| 188 | |
| 189 expected->entities.push_back(std::move(restaurant)); | |
| 190 EXPECT_EQ(expected, extracted); | |
| 191 } | |
| 192 | |
| 193 TEST_F(CopylessPasteExtractorTest, booleanValue) { | |
| 194 setHtmlInnerHTML( | |
| 195 "<body>" | |
| 196 "<script type=\"application/ld+json\">" | |
| 197 "\n" | |
| 198 "\n" | |
| 199 "{\"@type\": \"Restaurant\"," | |
| 200 "\"open\": true" | |
| 201 "}\n" | |
| 202 "\n" | |
| 203 "</script>" | |
| 204 "</body>"); | |
| 205 setURL("http://www.test.com/"); | |
| 206 setTitle("My neat website about cool stuff"); | |
| 207 | |
| 208 WebPagePtr extracted = extract(); | |
| 209 ASSERT_FALSE(extracted.is_null()); | |
| 210 | |
| 211 WebPagePtr expected = | |
| 212 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 213 | |
| 214 EntityPtr restaurant = Entity::New(); | |
| 215 restaurant->type = "Restaurant"; | |
| 216 restaurant->properties.push_back(createBooleanProperty("open", true)); | |
| 217 | |
| 218 expected->entities.push_back(std::move(restaurant)); | |
| 219 EXPECT_EQ(expected, extracted); | |
| 220 } | |
| 221 | |
| 222 TEST_F(CopylessPasteExtractorTest, longValue) { | |
| 223 setHtmlInnerHTML( | |
| 224 "<body>" | |
| 225 "<script type=\"application/ld+json\">" | |
| 226 "\n" | |
| 227 "\n" | |
| 228 "{\"@type\": \"Restaurant\"," | |
| 229 "\"long\": 1" | |
| 230 "}\n" | |
| 231 "\n" | |
| 232 "</script>" | |
| 233 "</body>"); | |
| 234 setURL("http://www.test.com/"); | |
| 235 setTitle("My neat website about cool stuff"); | |
| 236 | |
| 237 WebPagePtr extracted = extract(); | |
| 238 ASSERT_FALSE(extracted.is_null()); | |
| 239 | |
| 240 WebPagePtr expected = | |
| 241 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 242 | |
| 243 EntityPtr restaurant = Entity::New(); | |
| 244 restaurant->type = "Restaurant"; | |
| 245 restaurant->properties.push_back(createLongProperty("long", 1ll)); | |
| 246 | |
| 247 expected->entities.push_back(std::move(restaurant)); | |
| 248 EXPECT_EQ(expected, extracted); | |
| 249 } | |
| 250 | |
| 251 TEST_F(CopylessPasteExtractorTest, doubleValue) { | |
| 252 setHtmlInnerHTML( | |
| 253 "<body>" | |
| 254 "<script type=\"application/ld+json\">" | |
| 255 "\n" | |
| 256 "\n" | |
| 257 "{\"@type\": \"Restaurant\"," | |
| 258 "\"double\": 1.5" | |
| 259 "}\n" | |
| 260 "\n" | |
| 261 "</script>" | |
| 262 "</body>"); | |
| 263 setURL("http://www.test.com/"); | |
| 264 setTitle("My neat website about cool stuff"); | |
| 265 | |
| 266 WebPagePtr extracted = extract(); | |
| 267 ASSERT_FALSE(extracted.is_null()); | |
| 268 | |
| 269 WebPagePtr expected = | |
| 270 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 271 | |
| 272 EntityPtr restaurant = Entity::New(); | |
| 273 restaurant->type = "Restaurant"; | |
| 274 restaurant->properties.push_back(createStringProperty("double", "1.5")); | |
| 275 | |
| 276 expected->entities.push_back(std::move(restaurant)); | |
| 83 EXPECT_EQ(expected, extracted); | 277 EXPECT_EQ(expected, extracted); |
| 84 } | 278 } |
| 85 | 279 |
| 86 TEST_F(CopylessPasteExtractorTest, multiple) { | 280 TEST_F(CopylessPasteExtractorTest, multiple) { |
| 87 setHtmlInnerHTML( | 281 setHtmlInnerHTML( |
| 88 "<head>" | 282 "<head>" |
| 89 "<script type=\"application/ld+json\">" + | 283 "<script type=\"application/ld+json\">" |
| 90 m_content + | 284 "\n" |
| 285 "\n" | |
| 286 "{\"@type\": \"Restaurant\"," | |
| 287 "\"name\": \"Special characters for ya >_<;\"" | |
| 288 "}\n" | |
| 289 "\n" | |
| 91 "</script>" | 290 "</script>" |
| 92 "</head>" | 291 "</head>" |
| 93 "<body>" | 292 "<body>" |
| 94 "<script type=\"application/ld+json\">" + | 293 "<script type=\"application/ld+json\">" |
| 95 m_content + | 294 "\n" |
| 96 "</script>" | 295 "\n" |
| 97 "<script type=\"application/ld+json\">" + | 296 "{\"@type\": \"Restaurant\"," |
| 98 m_content + | 297 "\"name\": \"Special characters for ya >_<;\"" |
| 99 "</script>" | 298 "}\n" |
| 100 "</body>"); | 299 "\n" |
| 101 | 300 "</script>" |
| 102 String extracted = extract(); | 301 "<script type=\"application/ld+json\">" |
| 103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; | 302 "\n" |
| 303 "\n" | |
| 304 "{\"@type\": \"Restaurant\"," | |
| 305 "\"name\": \"Special characters for ya >_<;\"" | |
| 306 "}\n" | |
| 307 "\n" | |
| 308 "</script>" | |
| 309 "</body>"); | |
| 310 | |
| 311 setURL("http://www.test.com/"); | |
| 312 setTitle("My neat website about cool stuff"); | |
| 313 | |
| 314 WebPagePtr extracted = extract(); | |
| 315 ASSERT_FALSE(extracted.is_null()); | |
| 316 | |
| 317 WebPagePtr expected = | |
| 318 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 319 | |
| 320 for (int i = 0; i < 3; ++i) { | |
| 321 EntityPtr restaurant = Entity::New(); | |
| 322 restaurant->type = "Restaurant"; | |
| 323 restaurant->properties.push_back( | |
| 324 createStringProperty("name", "Special characters for ya >_<;")); | |
| 325 | |
| 326 expected->entities.push_back(std::move(restaurant)); | |
| 327 } | |
| 328 EXPECT_EQ(expected, extracted); | |
| 329 } | |
| 330 | |
| 331 TEST_F(CopylessPasteExtractorTest, nested) { | |
| 332 setHtmlInnerHTML( | |
| 333 "<body>" | |
| 334 "<script type=\"application/ld+json\">" | |
| 335 "\n" | |
| 336 "\n" | |
| 337 "{\"@type\": \"Restaurant\"," | |
| 338 "\"name\": \"Ye ol greasy diner\"," | |
| 339 "\"address\": {" | |
| 340 "\n" | |
| 341 " \"streetAddress\": \"123 Big Oak Road\"," | |
| 342 " \"addressLocality\": \"San Francisco\"" | |
| 343 " }\n" | |
| 344 "}\n" | |
| 345 "\n" | |
| 346 "</script>" | |
| 347 "</body>"); | |
| 348 setURL("http://www.test.com/"); | |
| 349 setTitle("My neat website about cool stuff"); | |
| 350 | |
| 351 WebPagePtr extracted = extract(); | |
| 352 ASSERT_FALSE(extracted.is_null()); | |
| 353 | |
| 354 WebPagePtr expected = | |
| 355 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 356 | |
| 357 EntityPtr restaurant = Entity::New(); | |
| 358 restaurant->type = "Restaurant"; | |
| 359 restaurant->properties.push_back( | |
| 360 createStringProperty("name", "Ye ol greasy diner")); | |
| 361 | |
| 362 EntityPtr address = Entity::New(); | |
| 363 address->type = "Thing"; | |
| 364 address->properties.push_back( | |
| 365 createStringProperty("streetAddress", "123 Big Oak Road")); | |
| 366 address->properties.push_back( | |
| 367 createStringProperty("addressLocality", "San Francisco")); | |
| 368 | |
| 369 restaurant->properties.push_back( | |
| 370 createEntityProperty("address", std::move(address))); | |
| 371 | |
| 372 expected->entities.push_back(std::move(restaurant)); | |
| 373 EXPECT_EQ(expected, extracted); | |
| 374 } | |
| 375 | |
| 376 TEST_F(CopylessPasteExtractorTest, repeated) { | |
| 377 setHtmlInnerHTML( | |
| 378 "<body>" | |
| 379 "<script type=\"application/ld+json\">" | |
| 380 "\n" | |
| 381 "\n" | |
| 382 "{\"@type\": \"Restaurant\"," | |
| 383 "\"name\": [ \"First name\", \"Second name\" ]" | |
| 384 "}\n" | |
| 385 "\n" | |
| 386 "</script>" | |
| 387 "</body>"); | |
| 388 setURL("http://www.test.com/"); | |
| 389 setTitle("My neat website about cool stuff"); | |
| 390 | |
| 391 WebPagePtr extracted = extract(); | |
| 392 ASSERT_FALSE(extracted.is_null()); | |
| 393 | |
| 394 WebPagePtr expected = | |
| 395 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 396 | |
| 397 EntityPtr restaurant = Entity::New(); | |
| 398 restaurant->type = "Restaurant"; | |
| 399 | |
| 400 PropertyPtr name = Property::New(); | |
| 401 name->name = "name"; | |
| 402 name->values = Values::New(); | |
| 403 Vector<String> nameValues; | |
| 404 nameValues.push_back("First name"); | |
| 405 nameValues.push_back("Second name"); | |
| 406 name->values->set_string_values(nameValues); | |
| 407 | |
| 408 restaurant->properties.push_back(std::move(name)); | |
| 409 | |
| 410 expected->entities.push_back(std::move(restaurant)); | |
| 411 | |
| 412 EXPECT_EQ(expected, extracted); | |
| 413 } | |
| 414 | |
| 415 TEST_F(CopylessPasteExtractorTest, repeatedObject) { | |
| 416 setHtmlInnerHTML( | |
| 417 "<body>" | |
| 418 "<script type=\"application/ld+json\">" | |
| 419 "\n" | |
| 420 "\n" | |
| 421 "{\"@type\": \"Restaurant\"," | |
| 422 "\"name\": \"Ye ol greasy diner\"," | |
| 423 "\"address\": [" | |
| 424 "\n" | |
| 425 " {" | |
| 426 " \"streetAddress\": \"123 Big Oak Road\"," | |
| 427 " \"addressLocality\": \"San Francisco\"" | |
| 428 " },\n" | |
| 429 " {" | |
| 430 " \"streetAddress\": \"123 Big Oak Road\"," | |
| 431 " \"addressLocality\": \"San Francisco\"" | |
| 432 " }\n" | |
| 433 "]\n" | |
| 434 "}\n" | |
| 435 "\n" | |
| 436 "</script>" | |
| 437 "</body>"); | |
| 438 setURL("http://www.test.com/"); | |
| 439 setTitle("My neat website about cool stuff"); | |
| 440 | |
| 441 WebPagePtr extracted = extract(); | |
| 442 ASSERT_FALSE(extracted.is_null()); | |
| 443 | |
| 444 WebPagePtr expected = | |
| 445 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 446 | |
| 447 EntityPtr restaurant = Entity::New(); | |
| 448 restaurant->type = "Restaurant"; | |
| 449 restaurant->properties.push_back( | |
| 450 createStringProperty("name", "Ye ol greasy diner")); | |
| 451 | |
| 452 PropertyPtr addressProperty = Property::New(); | |
| 453 addressProperty->name = "address"; | |
| 454 addressProperty->values = Values::New(); | |
| 455 addressProperty->values->set_entity_values(Vector<EntityPtr>()); | |
| 456 for (int i = 0; i < 2; ++i) { | |
| 457 EntityPtr address = Entity::New(); | |
| 458 address->type = "Thing"; | |
| 459 address->properties.push_back( | |
| 460 createStringProperty("streetAddress", "123 Big Oak Road")); | |
| 461 address->properties.push_back( | |
| 462 createStringProperty("addressLocality", "San Francisco")); | |
| 463 addressProperty->values->get_entity_values().push_back(std::move(address)); | |
| 464 } | |
| 465 restaurant->properties.push_back(std::move(addressProperty)); | |
| 466 | |
| 467 expected->entities.push_back(std::move(restaurant)); | |
| 468 EXPECT_EQ(expected, extracted); | |
| 469 } | |
| 470 | |
| 471 TEST_F(CopylessPasteExtractorTest, truncateLongString) { | |
| 472 String maxLengthString; | |
| 473 for (int i = 0; i < 200; ++i) { | |
| 474 maxLengthString.append("a"); | |
| 475 } | |
| 476 String tooLongString(maxLengthString); | |
| 477 tooLongString.append("a"); | |
| 478 setHtmlInnerHTML( | |
| 479 "<body>" | |
| 480 "<script type=\"application/ld+json\">" | |
| 481 "\n" | |
| 482 "\n" | |
| 483 "{\"@type\": \"Restaurant\"," | |
| 484 "\"name\": \"" + | |
| 485 tooLongString + | |
| 486 "\"" | |
| 487 "}\n" | |
| 488 "\n" | |
| 489 "</script>" | |
| 490 "</body>"); | |
| 491 setURL("http://www.test.com/"); | |
| 492 setTitle("My neat website about cool stuff"); | |
| 493 | |
| 494 WebPagePtr extracted = extract(); | |
| 495 ASSERT_FALSE(extracted.is_null()); | |
| 496 | |
| 497 WebPagePtr expected = | |
| 498 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 499 | |
| 500 EntityPtr restaurant = Entity::New(); | |
| 501 restaurant->type = "Restaurant"; | |
| 502 restaurant->properties.push_back( | |
| 503 createStringProperty("name", maxLengthString)); | |
| 504 | |
| 505 expected->entities.push_back(std::move(restaurant)); | |
| 506 EXPECT_EQ(expected, extracted); | |
| 507 } | |
| 508 | |
| 509 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { | |
| 510 setHtmlInnerHTML( | |
| 511 "<body>" | |
| 512 "<script type=\"application/ld+json\">" | |
| 513 "\n" | |
| 514 "\n" | |
| 515 "{\"name\": \"Special characters for ya >_<;\"" | |
| 516 "}\n" | |
| 517 "\n" | |
| 518 "</script>" | |
| 519 "</body>"); | |
| 520 setURL("http://www.test.com/"); | |
| 521 setTitle("My neat website about cool stuff"); | |
| 522 | |
| 523 WebPagePtr extracted = extract(); | |
| 524 ASSERT_TRUE(extracted.is_null()); | |
| 525 } | |
| 526 | |
| 527 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { | |
| 528 setHtmlInnerHTML( | |
| 529 "<body>" | |
| 530 "<script type=\"application/ld+json\">" | |
| 531 "\n" | |
| 532 "\n" | |
| 533 "{\"@type\": \"UnsupportedType\"," | |
| 534 "\"name\": \"Special characters for ya >_<;\"" | |
| 535 "}\n" | |
| 536 "\n" | |
| 537 "</script>" | |
| 538 "</body>"); | |
| 539 setURL("http://www.test.com/"); | |
| 540 setTitle("My neat website about cool stuff"); | |
| 541 | |
| 542 WebPagePtr extracted = extract(); | |
| 543 ASSERT_TRUE(extracted.is_null()); | |
| 544 } | |
| 545 | |
| 546 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { | |
| 547 String largeRepeatedField = "["; | |
| 548 for (int i = 0; i < 101; ++i) { | |
| 549 largeRepeatedField.append("\"a\""); | |
| 550 if (i != 100) { | |
| 551 largeRepeatedField.append(", "); | |
| 552 } | |
| 553 } | |
| 554 largeRepeatedField.append("]"); | |
| 555 setHtmlInnerHTML( | |
| 556 "<body>" | |
| 557 "<script type=\"application/ld+json\">" | |
| 558 "\n" | |
| 559 "\n" | |
| 560 "{\"@type\": \"Restaurant\"," | |
| 561 "\"name\": " + | |
| 562 largeRepeatedField + | |
| 563 "}\n" | |
| 564 "\n" | |
| 565 "</script>" | |
| 566 "</body>"); | |
| 567 setURL("http://www.test.com/"); | |
| 568 setTitle("My neat website about cool stuff"); | |
| 569 | |
| 570 WebPagePtr extracted = extract(); | |
| 571 ASSERT_FALSE(extracted.is_null()); | |
| 572 | |
| 573 WebPagePtr expected = | |
| 574 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 575 | |
| 576 EntityPtr restaurant = Entity::New(); | |
| 577 restaurant->type = "Restaurant"; | |
| 578 | |
| 579 PropertyPtr name = Property::New(); | |
| 580 name->name = "name"; | |
| 581 name->values = Values::New(); | |
| 582 Vector<String> nameValues; | |
| 583 for (int i = 0; i < 100; ++i) { | |
| 584 nameValues.push_back("a"); | |
| 585 } | |
| 586 name->values->set_string_values(nameValues); | |
| 587 | |
| 588 restaurant->properties.push_back(std::move(name)); | |
| 589 | |
| 590 expected->entities.push_back(std::move(restaurant)); | |
| 591 | |
| 592 EXPECT_EQ(expected, extracted); | |
| 593 } | |
| 594 | |
| 595 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { | |
| 596 String tooManyFields; | |
| 597 for (int i = 0; i < 20; ++i) { | |
| 598 tooManyFields.append(String::format("\"%d\": \"a\"", i)); | |
| 599 if (i != 19) { | |
| 600 tooManyFields.append(",\n"); | |
| 601 } | |
| 602 } | |
| 603 setHtmlInnerHTML( | |
| 604 "<body>" | |
| 605 "<script type=\"application/ld+json\">" | |
| 606 "\n" | |
| 607 "\n" | |
| 608 "{\"@type\": \"Restaurant\"," + | |
| 609 tooManyFields + | |
| 610 "}\n" | |
| 611 "\n" | |
| 612 "</script>" | |
| 613 "</body>"); | |
| 614 setURL("http://www.test.com/"); | |
| 615 setTitle("My neat website about cool stuff"); | |
| 616 | |
| 617 WebPagePtr extracted = extract(); | |
| 618 ASSERT_FALSE(extracted.is_null()); | |
| 619 | |
| 620 WebPagePtr expected = | |
| 621 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 622 | |
| 623 EntityPtr restaurant = Entity::New(); | |
| 624 restaurant->type = "Restaurant"; | |
| 625 | |
| 626 for (int i = 0; i < 19; ++i) { | |
| 627 restaurant->properties.push_back( | |
| 628 createStringProperty(String::number(i), "a")); | |
| 629 } | |
| 630 | |
| 631 expected->entities.push_back(std::move(restaurant)); | |
| 632 EXPECT_EQ(expected, extracted); | |
| 633 } | |
| 634 | |
| 635 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithEmptyArray) { | |
| 636 setHtmlInnerHTML( | |
| 637 "<body>" | |
| 638 "<script type=\"application/ld+json\">" | |
| 639 "\n" | |
| 640 "\n" | |
| 641 "{\"@type\": \"Restaurant\"," | |
| 642 "\"name\": []" | |
| 643 "}\n" | |
| 644 "\n" | |
| 645 "</script>" | |
| 646 "</body>"); | |
| 647 setURL("http://www.test.com/"); | |
| 648 setTitle("My neat website about cool stuff"); | |
| 649 | |
| 650 WebPagePtr extracted = extract(); | |
| 651 ASSERT_FALSE(extracted.is_null()); | |
| 652 | |
| 653 WebPagePtr expected = | |
| 654 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 655 | |
| 656 EntityPtr restaurant = Entity::New(); | |
| 657 restaurant->type = "Restaurant"; | |
| 658 | |
| 659 expected->entities.push_back(std::move(restaurant)); | |
| 660 | |
| 661 EXPECT_EQ(expected, extracted); | |
| 662 } | |
| 663 | |
| 664 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithMixedTypes) { | |
| 665 setHtmlInnerHTML( | |
| 666 "<body>" | |
| 667 "<script type=\"application/ld+json\">" | |
| 668 "\n" | |
| 669 "\n" | |
| 670 "{\"@type\": \"Restaurant\"," | |
| 671 "\"name\": [ \"Name\", 1 ]" | |
| 672 "}\n" | |
| 673 "\n" | |
| 674 "</script>" | |
| 675 "</body>"); | |
| 676 setURL("http://www.test.com/"); | |
| 677 setTitle("My neat website about cool stuff"); | |
| 678 | |
| 679 WebPagePtr extracted = extract(); | |
| 680 ASSERT_FALSE(extracted.is_null()); | |
| 681 | |
| 682 WebPagePtr expected = | |
| 683 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 684 | |
| 685 EntityPtr restaurant = Entity::New(); | |
| 686 restaurant->type = "Restaurant"; | |
| 687 | |
| 688 expected->entities.push_back(std::move(restaurant)); | |
| 689 | |
| 690 EXPECT_EQ(expected, extracted); | |
| 691 } | |
| 692 | |
| 693 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithNestedArray) { | |
| 694 setHtmlInnerHTML( | |
| 695 "<body>" | |
| 696 "<script type=\"application/ld+json\">" | |
| 697 "\n" | |
| 698 "\n" | |
| 699 "{\"@type\": \"Restaurant\"," | |
| 700 "\"name\": [ [ \"Name\" ] ]" | |
| 701 "}\n" | |
| 702 "\n" | |
| 703 "</script>" | |
| 704 "</body>"); | |
| 705 setURL("http://www.test.com/"); | |
| 706 setTitle("My neat website about cool stuff"); | |
| 707 | |
| 708 WebPagePtr extracted = extract(); | |
| 709 ASSERT_FALSE(extracted.is_null()); | |
| 710 | |
| 711 WebPagePtr expected = | |
| 712 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 713 | |
| 714 EntityPtr restaurant = Entity::New(); | |
| 715 restaurant->type = "Restaurant"; | |
| 716 | |
| 717 expected->entities.push_back(std::move(restaurant)); | |
| 718 | |
| 719 EXPECT_EQ(expected, extracted); | |
| 720 } | |
| 721 | |
| 722 TEST_F(CopylessPasteExtractorTest, enforceMaxNestingDepth) { | |
| 723 setHtmlInnerHTML( | |
| 724 "<body>" | |
| 725 "<script type=\"application/ld+json\">" | |
| 726 "\n" | |
| 727 "\n" | |
| 728 "{\"@type\": \"Restaurant\"," | |
| 729 "\"name\": \"Ye ol greasy diner\"," | |
| 730 "\"1\": {" | |
| 731 " \"2\": {" | |
| 732 " \"3\": {" | |
| 733 " \"4\": {" | |
| 734 " \"5\": 6" | |
| 735 " }\n" | |
| 736 " }\n" | |
| 737 " }\n" | |
| 738 "}\n" | |
| 739 "}\n" | |
| 740 "\n" | |
| 741 "</script>" | |
| 742 "</body>"); | |
| 743 setURL("http://www.test.com/"); | |
| 744 setTitle("My neat website about cool stuff"); | |
| 745 | |
| 746 WebPagePtr extracted = extract(); | |
| 747 ASSERT_FALSE(extracted.is_null()); | |
| 748 | |
| 749 WebPagePtr expected = | |
| 750 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 751 | |
| 752 EntityPtr restaurant = Entity::New(); | |
| 753 restaurant->type = "Restaurant"; | |
| 754 restaurant->properties.push_back( | |
| 755 createStringProperty("name", "Ye ol greasy diner")); | |
| 756 | |
| 757 EntityPtr entity1 = Entity::New(); | |
| 758 entity1->type = "Thing"; | |
| 759 | |
| 760 EntityPtr entity2 = Entity::New(); | |
| 761 entity2->type = "Thing"; | |
| 762 | |
| 763 EntityPtr entity3 = Entity::New(); | |
| 764 entity3->type = "Thing"; | |
| 765 | |
| 766 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); | |
| 767 | |
| 768 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); | |
| 769 | |
| 770 restaurant->properties.push_back( | |
| 771 createEntityProperty("1", std::move(entity1))); | |
| 772 | |
| 773 expected->entities.push_back(std::move(restaurant)); | |
| 774 EXPECT_EQ(expected, extracted); | |
| 775 } | |
| 776 | |
| 777 TEST_F(CopylessPasteExtractorTest, maxNestingDepthWithTerminalProperty) { | |
| 778 setHtmlInnerHTML( | |
| 779 "<body>" | |
| 780 "<script type=\"application/ld+json\">" | |
| 781 "\n" | |
| 782 "\n" | |
| 783 "{\"@type\": \"Restaurant\"," | |
| 784 "\"name\": \"Ye ol greasy diner\"," | |
| 785 "\"1\": {" | |
| 786 " \"2\": {" | |
| 787 " \"3\": {" | |
| 788 " \"4\": 5" | |
| 789 " }\n" | |
| 790 " }\n" | |
| 791 "}\n" | |
| 792 "}\n" | |
| 793 "\n" | |
| 794 "</script>" | |
| 795 "</body>"); | |
| 796 setURL("http://www.test.com/"); | |
| 797 setTitle("My neat website about cool stuff"); | |
| 798 | |
| 799 WebPagePtr extracted = extract(); | |
| 800 ASSERT_FALSE(extracted.is_null()); | |
| 801 | |
| 802 WebPagePtr expected = | |
| 803 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 804 | |
| 805 EntityPtr restaurant = Entity::New(); | |
| 806 restaurant->type = "Restaurant"; | |
| 807 restaurant->properties.push_back( | |
| 808 createStringProperty("name", "Ye ol greasy diner")); | |
| 809 | |
| 810 EntityPtr entity1 = Entity::New(); | |
| 811 entity1->type = "Thing"; | |
| 812 | |
| 813 EntityPtr entity2 = Entity::New(); | |
| 814 entity2->type = "Thing"; | |
| 815 | |
| 816 EntityPtr entity3 = Entity::New(); | |
| 817 entity3->type = "Thing"; | |
| 818 | |
| 819 entity3->properties.push_back(createLongProperty("4", 5)); | |
| 820 | |
| 821 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); | |
| 822 | |
| 823 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); | |
| 824 | |
| 825 restaurant->properties.push_back( | |
| 826 createEntityProperty("1", std::move(entity1))); | |
| 827 | |
| 828 expected->entities.push_back(std::move(restaurant)); | |
| 104 EXPECT_EQ(expected, extracted); | 829 EXPECT_EQ(expected, extracted); |
| 105 } | 830 } |
| 106 | 831 |
| 107 } // namespace | 832 } // namespace |
| 108 | |
| 109 } // namespace blink | 833 } // namespace blink |
| OLD | NEW |