Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
| 6 | 5 |
| 7 #include <memory> | 6 #include <memory> |
| 7 #include <string> | |
| 8 #include <utility> | |
| 8 #include "core/dom/Document.h" | 9 #include "core/dom/Document.h" |
| 9 #include "core/dom/Element.h" | 10 #include "core/dom/Element.h" |
| 10 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
| 12 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
| 13 #include "platform/json/JSONValues.h" | |
| 14 #include "platform/testing/URLTestHelpers.h" | |
| 15 #include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h " | |
| 11 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
| 12 #include "wtf/text/StringBuilder.h" | 17 #include "wtf/text/StringBuilder.h" |
| 13 | 18 |
| 14 namespace blink { | 19 namespace blink { |
| 15 | 20 |
| 16 namespace { | 21 namespace { |
| 17 | 22 |
| 23 using mojom::blink::Entity; | |
| 24 using mojom::blink::EntityPtr; | |
| 25 using mojom::blink::Property; | |
| 26 using mojom::blink::PropertyPtr; | |
| 27 using mojom::blink::Values; | |
| 28 using mojom::blink::ValuesPtr; | |
| 29 using mojom::blink::WebPage; | |
| 30 using mojom::blink::WebPagePtr; | |
| 31 | |
| 18 class CopylessPasteExtractorTest : public ::testing::Test { | 32 class CopylessPasteExtractorTest : public ::testing::Test { |
|
wychen
2017/04/04 02:07:15
Might make sense to add tests that trigger addProp
dproctor
2017/04/04 06:21:19
Done.
| |
| 19 public: | 33 public: |
| 20 CopylessPasteExtractorTest() | 34 CopylessPasteExtractorTest() {} |
| 21 : m_content( | |
| 22 "\n" | |
| 23 "\n" | |
| 24 "{\"@type\": \"NewsArticle\"," | |
| 25 "\"headline\": \"Special characters for ya >_<;\"\n" | |
| 26 "}\n" | |
| 27 "\n") {} | |
| 28 | 35 |
| 29 protected: | 36 protected: |
| 30 void SetUp() override; | 37 void SetUp() override; |
| 31 | 38 |
| 32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 39 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
| 33 | 40 |
| 34 Document& document() const { return m_dummyPageHolder->document(); } | 41 Document& document() const { return m_dummyPageHolder->document(); } |
| 35 | 42 |
| 36 String extract() { return CopylessPasteExtractor::extract(document()); } | 43 bool extract(WebPagePtr* page) { |
| 44 return CopylessPasteExtractor::extract(document(), page); | |
| 45 } | |
| 37 | 46 |
| 38 void setHtmlInnerHTML(const String&); | 47 void setHtmlInnerHTML(const String&); |
| 39 | 48 |
| 40 String m_content; | 49 void setURL(const std::string); |
| 50 | |
| 51 void setTitle(const String&); | |
| 52 | |
| 53 PropertyPtr createStringProperty(const String&, const String&); | |
| 54 | |
| 55 PropertyPtr createBooleanProperty(const String&, const bool&); | |
| 56 | |
| 57 PropertyPtr createLongProperty(const String&, const int64_t&); | |
| 58 | |
| 59 PropertyPtr createEntityProperty(const String&, EntityPtr); | |
| 60 | |
| 61 WebPagePtr createWebPage(const String&, const String&); | |
| 41 | 62 |
| 42 private: | 63 private: |
| 43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; | 64 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; |
| 44 }; | 65 }; |
| 45 | 66 |
| 46 void CopylessPasteExtractorTest::SetUp() { | 67 void CopylessPasteExtractorTest::SetUp() { |
| 47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | 68 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
| 48 } | 69 } |
| 49 | 70 |
| 50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { | 71 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { |
| 51 document().documentElement()->setInnerHTML((htmlContent)); | 72 document().documentElement()->setInnerHTML((htmlContent)); |
| 52 } | 73 } |
| 53 | 74 |
| 75 void CopylessPasteExtractorTest::setURL(const std::string url) { | |
| 76 document().setURL(URLTestHelpers::toKURL(url)); | |
| 77 } | |
| 78 | |
| 79 void CopylessPasteExtractorTest::setTitle(const String& title) { | |
| 80 document().setTitle(title); | |
| 81 } | |
| 82 | |
| 83 PropertyPtr CopylessPasteExtractorTest::createStringProperty( | |
| 84 const String& name, | |
| 85 const String& value) { | |
| 86 PropertyPtr p = Property::New(); | |
| 87 p->name = name; | |
| 88 p->values = Values::New(); | |
| 89 p->values->set_string_values(Vector<String>(1, value)); | |
| 90 return p; | |
| 91 } | |
| 92 | |
| 93 PropertyPtr CopylessPasteExtractorTest::createBooleanProperty( | |
| 94 const String& name, | |
| 95 const bool& value) { | |
| 96 PropertyPtr p = Property::New(); | |
| 97 p->name = name; | |
| 98 p->values = Values::New(); | |
| 99 p->values->set_bool_values(Vector<bool>(1, value)); | |
| 100 return p; | |
| 101 } | |
| 102 | |
| 103 PropertyPtr CopylessPasteExtractorTest::createLongProperty( | |
| 104 const String& name, | |
| 105 const int64_t& value) { | |
| 106 PropertyPtr p = Property::New(); | |
| 107 p->name = name; | |
| 108 p->values = Values::New(); | |
| 109 p->values->set_long_values(Vector<int64_t>(1, value)); | |
| 110 return p; | |
| 111 } | |
| 112 | |
| 113 PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name, | |
| 114 EntityPtr value) { | |
| 115 PropertyPtr p = Property::New(); | |
| 116 p->name = name; | |
| 117 p->values = Values::New(); | |
| 118 p->values->set_entity_values(Vector<EntityPtr>()); | |
| 119 p->values->get_entity_values().push_back(std::move(value)); | |
| 120 return p; | |
| 121 } | |
| 122 | |
| 123 WebPagePtr CopylessPasteExtractorTest::createWebPage(const String& url, | |
| 124 const String& title) { | |
| 125 WebPagePtr wp = WebPage::New(); | |
| 126 wp->url = url; | |
| 127 wp->title = title; | |
| 128 return wp; | |
| 129 } | |
| 130 | |
| 54 TEST_F(CopylessPasteExtractorTest, empty) { | 131 TEST_F(CopylessPasteExtractorTest, empty) { |
| 55 String extracted = extract(); | 132 WebPagePtr page = WebPage::New(); |
| 56 String expected = "[]"; | 133 ASSERT_FALSE(extract(&page)); |
| 57 EXPECT_EQ(expected, extracted); | 134 EXPECT_EQ(WebPage::New(), page); |
| 58 } | 135 } |
| 59 | 136 |
| 60 TEST_F(CopylessPasteExtractorTest, basic) { | 137 TEST_F(CopylessPasteExtractorTest, basic) { |
| 61 setHtmlInnerHTML( | 138 setHtmlInnerHTML( |
| 62 "<body>" | 139 "<body>" |
| 63 "<script type=\"application/ld+json\">" + | 140 "<script type=\"application/ld+json\">" |
| 64 m_content + | 141 "\n" |
| 65 "</script>" | 142 "\n" |
| 66 "</body>"); | 143 "{\"@type\": \"Restaurant\"," |
| 67 | 144 "\"name\": \"Special characters for ya >_<;\"" |
| 68 String extracted = extract(); | 145 "}\n" |
| 69 String expected = "[" + m_content + "]"; | 146 "\n" |
| 147 "</script>" | |
| 148 "</body>"); | |
| 149 setURL("http://www.test.com/"); | |
| 150 setTitle("My neat website about cool stuff"); | |
| 151 | |
| 152 WebPagePtr extracted = WebPage::New(); | |
| 153 ASSERT_TRUE(extract(&extracted)); | |
| 154 | |
| 155 WebPagePtr expected = | |
| 156 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 157 | |
| 158 EntityPtr restaurant = Entity::New(); | |
| 159 restaurant->type = "Restaurant"; | |
| 160 restaurant->properties.push_back( | |
| 161 createStringProperty("name", "Special characters for ya >_<;")); | |
| 162 | |
| 163 expected->entities.push_back(std::move(restaurant)); | |
| 70 EXPECT_EQ(expected, extracted); | 164 EXPECT_EQ(expected, extracted); |
| 71 } | 165 } |
| 72 | 166 |
| 73 TEST_F(CopylessPasteExtractorTest, header) { | 167 TEST_F(CopylessPasteExtractorTest, header) { |
| 74 setHtmlInnerHTML( | 168 setHtmlInnerHTML( |
| 75 "<head>" | 169 "<head>" |
| 76 "<script type=\"application/ld+json\">" + | 170 "<script type=\"application/ld+json\">" |
| 77 m_content + | 171 "\n" |
| 172 "\n" | |
| 173 "{\"@type\": \"Restaurant\"," | |
| 174 "\"name\": \"Special characters for ya >_<;\"" | |
| 175 "}\n" | |
| 176 "\n" | |
| 78 "</script>" | 177 "</script>" |
| 79 "</head>"); | 178 "</head>"); |
| 80 | 179 |
| 81 String extracted = extract(); | 180 setURL("http://www.test.com/"); |
| 82 String expected = "[" + m_content + "]"; | 181 setTitle("My neat website about cool stuff"); |
| 182 | |
| 183 WebPagePtr extracted = WebPage::New(); | |
| 184 ASSERT_TRUE(extract(&extracted)); | |
| 185 | |
| 186 WebPagePtr expected = | |
| 187 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 188 | |
| 189 EntityPtr restaurant = Entity::New(); | |
| 190 restaurant->type = "Restaurant"; | |
| 191 restaurant->properties.push_back( | |
| 192 createStringProperty("name", "Special characters for ya >_<;")); | |
| 193 | |
| 194 expected->entities.push_back(std::move(restaurant)); | |
| 195 EXPECT_EQ(expected, extracted); | |
| 196 } | |
| 197 | |
| 198 TEST_F(CopylessPasteExtractorTest, booleanValue) { | |
| 199 setHtmlInnerHTML( | |
| 200 "<body>" | |
| 201 "<script type=\"application/ld+json\">" | |
| 202 "\n" | |
| 203 "\n" | |
| 204 "{\"@type\": \"Restaurant\"," | |
| 205 "\"open\": true" | |
| 206 "}\n" | |
| 207 "\n" | |
| 208 "</script>" | |
| 209 "</body>"); | |
| 210 setURL("http://www.test.com/"); | |
| 211 setTitle("My neat website about cool stuff"); | |
| 212 | |
| 213 WebPagePtr extracted = WebPage::New(); | |
| 214 ASSERT_TRUE(extract(&extracted)); | |
| 215 | |
| 216 WebPagePtr expected = | |
| 217 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 218 | |
| 219 EntityPtr restaurant = Entity::New(); | |
| 220 restaurant->type = "Restaurant"; | |
| 221 restaurant->properties.push_back(createBooleanProperty("open", true)); | |
| 222 | |
| 223 expected->entities.push_back(std::move(restaurant)); | |
| 224 EXPECT_EQ(expected, extracted); | |
| 225 } | |
| 226 | |
| 227 TEST_F(CopylessPasteExtractorTest, longValue) { | |
| 228 setHtmlInnerHTML( | |
| 229 "<body>" | |
| 230 "<script type=\"application/ld+json\">" | |
| 231 "\n" | |
| 232 "\n" | |
| 233 "{\"@type\": \"Restaurant\"," | |
| 234 "\"long\": 1" | |
| 235 "}\n" | |
| 236 "\n" | |
| 237 "</script>" | |
| 238 "</body>"); | |
| 239 setURL("http://www.test.com/"); | |
| 240 setTitle("My neat website about cool stuff"); | |
| 241 | |
| 242 WebPagePtr extracted = WebPage::New(); | |
| 243 ASSERT_TRUE(extract(&extracted)); | |
| 244 | |
| 245 WebPagePtr expected = | |
| 246 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 247 | |
| 248 EntityPtr restaurant = Entity::New(); | |
| 249 restaurant->type = "Restaurant"; | |
| 250 restaurant->properties.push_back(createLongProperty("long", 1ll)); | |
| 251 | |
| 252 expected->entities.push_back(std::move(restaurant)); | |
| 253 EXPECT_EQ(expected, extracted); | |
| 254 } | |
| 255 | |
| 256 TEST_F(CopylessPasteExtractorTest, doubleValue) { | |
| 257 setHtmlInnerHTML( | |
| 258 "<body>" | |
| 259 "<script type=\"application/ld+json\">" | |
| 260 "\n" | |
| 261 "\n" | |
| 262 "{\"@type\": \"Restaurant\"," | |
| 263 "\"double\": 1.5" | |
| 264 "}\n" | |
| 265 "\n" | |
| 266 "</script>" | |
| 267 "</body>"); | |
| 268 setURL("http://www.test.com/"); | |
| 269 setTitle("My neat website about cool stuff"); | |
| 270 | |
| 271 WebPagePtr extracted = WebPage::New(); | |
| 272 ASSERT_TRUE(extract(&extracted)); | |
| 273 | |
| 274 WebPagePtr expected = | |
| 275 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 276 | |
| 277 EntityPtr restaurant = Entity::New(); | |
| 278 restaurant->type = "Restaurant"; | |
| 279 restaurant->properties.push_back(createStringProperty("double", "1.5")); | |
| 280 | |
| 281 expected->entities.push_back(std::move(restaurant)); | |
| 83 EXPECT_EQ(expected, extracted); | 282 EXPECT_EQ(expected, extracted); |
| 84 } | 283 } |
| 85 | 284 |
| 86 TEST_F(CopylessPasteExtractorTest, multiple) { | 285 TEST_F(CopylessPasteExtractorTest, multiple) { |
| 87 setHtmlInnerHTML( | 286 setHtmlInnerHTML( |
| 88 "<head>" | 287 "<head>" |
| 89 "<script type=\"application/ld+json\">" + | 288 "<script type=\"application/ld+json\">" |
| 90 m_content + | 289 "\n" |
| 290 "\n" | |
| 291 "{\"@type\": \"Restaurant\"," | |
| 292 "\"name\": \"Special characters for ya >_<;\"" | |
| 293 "}\n" | |
| 294 "\n" | |
| 91 "</script>" | 295 "</script>" |
| 92 "</head>" | 296 "</head>" |
| 93 "<body>" | 297 "<body>" |
| 94 "<script type=\"application/ld+json\">" + | 298 "<script type=\"application/ld+json\">" |
| 95 m_content + | 299 "\n" |
| 96 "</script>" | 300 "\n" |
| 97 "<script type=\"application/ld+json\">" + | 301 "{\"@type\": \"Restaurant\"," |
| 98 m_content + | 302 "\"name\": \"Special characters for ya >_<;\"" |
| 99 "</script>" | 303 "}\n" |
| 100 "</body>"); | 304 "\n" |
| 101 | 305 "</script>" |
| 102 String extracted = extract(); | 306 "<script type=\"application/ld+json\">" |
| 103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; | 307 "\n" |
| 308 "\n" | |
| 309 "{\"@type\": \"Restaurant\"," | |
| 310 "\"name\": \"Special characters for ya >_<;\"" | |
| 311 "}\n" | |
| 312 "\n" | |
| 313 "</script>" | |
| 314 "</body>"); | |
| 315 | |
| 316 setURL("http://www.test.com/"); | |
| 317 setTitle("My neat website about cool stuff"); | |
| 318 | |
| 319 WebPagePtr extracted = WebPage::New(); | |
| 320 ASSERT_TRUE(extract(&extracted)); | |
| 321 | |
| 322 WebPagePtr expected = | |
| 323 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 324 | |
| 325 for (int i = 0; i < 3; ++i) { | |
| 326 EntityPtr restaurant = Entity::New(); | |
| 327 restaurant->type = "Restaurant"; | |
| 328 restaurant->properties.push_back( | |
| 329 createStringProperty("name", "Special characters for ya >_<;")); | |
| 330 | |
| 331 expected->entities.push_back(std::move(restaurant)); | |
| 332 } | |
| 333 EXPECT_EQ(expected, extracted); | |
| 334 } | |
| 335 | |
| 336 TEST_F(CopylessPasteExtractorTest, nested) { | |
| 337 setHtmlInnerHTML( | |
| 338 "<body>" | |
| 339 "<script type=\"application/ld+json\">" | |
| 340 "\n" | |
| 341 "\n" | |
| 342 "{\"@type\": \"Restaurant\"," | |
| 343 "\"name\": \"Ye ol greasy diner\"," | |
| 344 "\"address\": {" | |
| 345 "\n" | |
| 346 " \"streetAddress\": \"123 Big Oak Road\"," | |
| 347 " \"addressLocality\": \"San Francisco\"" | |
| 348 " }\n" | |
| 349 "}\n" | |
| 350 "\n" | |
| 351 "</script>" | |
| 352 "</body>"); | |
| 353 setURL("http://www.test.com/"); | |
| 354 setTitle("My neat website about cool stuff"); | |
| 355 | |
| 356 WebPagePtr extracted = WebPage::New(); | |
| 357 ASSERT_TRUE(extract(&extracted)); | |
| 358 | |
| 359 WebPagePtr expected = | |
| 360 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 361 | |
| 362 EntityPtr restaurant = Entity::New(); | |
| 363 restaurant->type = "Restaurant"; | |
| 364 restaurant->properties.push_back( | |
| 365 createStringProperty("name", "Ye ol greasy diner")); | |
| 366 | |
| 367 EntityPtr address = Entity::New(); | |
| 368 address->type = "Thing"; | |
| 369 address->properties.push_back( | |
| 370 createStringProperty("streetAddress", "123 Big Oak Road")); | |
| 371 address->properties.push_back( | |
| 372 createStringProperty("addressLocality", "San Francisco")); | |
| 373 | |
| 374 restaurant->properties.push_back( | |
| 375 createEntityProperty("address", std::move(address))); | |
| 376 | |
| 377 expected->entities.push_back(std::move(restaurant)); | |
| 378 EXPECT_EQ(expected, extracted); | |
| 379 } | |
| 380 | |
| 381 TEST_F(CopylessPasteExtractorTest, repeated) { | |
| 382 setHtmlInnerHTML( | |
| 383 "<body>" | |
| 384 "<script type=\"application/ld+json\">" | |
| 385 "\n" | |
| 386 "\n" | |
| 387 "{\"@type\": \"Restaurant\"," | |
| 388 "\"name\": [ \"First name\", \"Second name\" ]" | |
| 389 "}\n" | |
| 390 "\n" | |
| 391 "</script>" | |
| 392 "</body>"); | |
| 393 setURL("http://www.test.com/"); | |
| 394 setTitle("My neat website about cool stuff"); | |
| 395 | |
| 396 WebPagePtr extracted = WebPage::New(); | |
| 397 ASSERT_TRUE(extract(&extracted)); | |
| 398 | |
| 399 WebPagePtr expected = | |
| 400 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 401 | |
| 402 EntityPtr restaurant = Entity::New(); | |
| 403 restaurant->type = "Restaurant"; | |
| 404 | |
| 405 PropertyPtr name = Property::New(); | |
| 406 name->name = "name"; | |
| 407 name->values = Values::New(); | |
| 408 Vector<String> nameValues; | |
| 409 nameValues.push_back("First name"); | |
| 410 nameValues.push_back("Second name"); | |
| 411 name->values->set_string_values(nameValues); | |
| 412 | |
| 413 restaurant->properties.push_back(std::move(name)); | |
| 414 | |
| 415 expected->entities.push_back(std::move(restaurant)); | |
| 416 | |
| 417 EXPECT_EQ(expected, extracted); | |
| 418 } | |
| 419 | |
| 420 TEST_F(CopylessPasteExtractorTest, repeatedObject) { | |
| 421 setHtmlInnerHTML( | |
| 422 "<body>" | |
| 423 "<script type=\"application/ld+json\">" | |
| 424 "\n" | |
| 425 "\n" | |
| 426 "{\"@type\": \"Restaurant\"," | |
| 427 "\"name\": \"Ye ol greasy diner\"," | |
| 428 "\"address\": [" | |
| 429 "\n" | |
| 430 " {" | |
| 431 " \"streetAddress\": \"123 Big Oak Road\"," | |
| 432 " \"addressLocality\": \"San Francisco\"" | |
| 433 " },\n" | |
| 434 " {" | |
| 435 " \"streetAddress\": \"123 Big Oak Road\"," | |
| 436 " \"addressLocality\": \"San Francisco\"" | |
| 437 " }\n" | |
| 438 "]\n" | |
| 439 "}\n" | |
| 440 "\n" | |
| 441 "</script>" | |
| 442 "</body>"); | |
| 443 setURL("http://www.test.com/"); | |
| 444 setTitle("My neat website about cool stuff"); | |
| 445 | |
| 446 WebPagePtr extracted = WebPage::New(); | |
| 447 ASSERT_TRUE(extract(&extracted)); | |
| 448 | |
| 449 WebPagePtr expected = | |
| 450 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 451 | |
| 452 EntityPtr restaurant = Entity::New(); | |
| 453 restaurant->type = "Restaurant"; | |
| 454 restaurant->properties.push_back( | |
| 455 createStringProperty("name", "Ye ol greasy diner")); | |
| 456 | |
| 457 PropertyPtr addressProperty = Property::New(); | |
| 458 addressProperty->name = "address"; | |
| 459 addressProperty->values = Values::New(); | |
| 460 addressProperty->values->set_entity_values(Vector<EntityPtr>()); | |
| 461 for (int i = 0; i < 2; ++i) { | |
| 462 EntityPtr address = Entity::New(); | |
| 463 address->type = "Thing"; | |
| 464 address->properties.push_back( | |
| 465 createStringProperty("streetAddress", "123 Big Oak Road")); | |
| 466 address->properties.push_back( | |
| 467 createStringProperty("addressLocality", "San Francisco")); | |
| 468 addressProperty->values->get_entity_values().push_back(std::move(address)); | |
| 469 } | |
| 470 restaurant->properties.push_back(std::move(addressProperty)); | |
| 471 | |
| 472 expected->entities.push_back(std::move(restaurant)); | |
| 473 EXPECT_EQ(expected, extracted); | |
| 474 } | |
| 475 | |
| 476 TEST_F(CopylessPasteExtractorTest, truncateLongString) { | |
| 477 String maxLengthString; | |
| 478 for (int i = 0; i < 200; ++i) { | |
| 479 maxLengthString.append("a"); | |
| 480 } | |
| 481 String tooLongString(maxLengthString); | |
| 482 tooLongString.append("a"); | |
| 483 setHtmlInnerHTML( | |
| 484 "<body>" | |
| 485 "<script type=\"application/ld+json\">" | |
| 486 "\n" | |
| 487 "\n" | |
| 488 "{\"@type\": \"Restaurant\"," | |
| 489 "\"name\": \"" + | |
| 490 tooLongString + | |
| 491 "\"" | |
| 492 "}\n" | |
| 493 "\n" | |
| 494 "</script>" | |
| 495 "</body>"); | |
| 496 setURL("http://www.test.com/"); | |
| 497 setTitle("My neat website about cool stuff"); | |
| 498 | |
| 499 WebPagePtr extracted = WebPage::New(); | |
| 500 ASSERT_TRUE(extract(&extracted)); | |
| 501 | |
| 502 WebPagePtr expected = | |
| 503 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 504 | |
| 505 EntityPtr restaurant = Entity::New(); | |
| 506 restaurant->type = "Restaurant"; | |
| 507 restaurant->properties.push_back( | |
| 508 createStringProperty("name", maxLengthString)); | |
| 509 | |
| 510 expected->entities.push_back(std::move(restaurant)); | |
| 511 EXPECT_EQ(expected, extracted); | |
| 512 } | |
| 513 | |
| 514 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { | |
| 515 setHtmlInnerHTML( | |
| 516 "<body>" | |
| 517 "<script type=\"application/ld+json\">" | |
| 518 "\n" | |
| 519 "\n" | |
| 520 "{\"name\": \"Special characters for ya >_<;\"" | |
| 521 "}\n" | |
| 522 "\n" | |
| 523 "</script>" | |
| 524 "</body>"); | |
| 525 setURL("http://www.test.com/"); | |
| 526 setTitle("My neat website about cool stuff"); | |
| 527 | |
| 528 WebPagePtr extracted = WebPage::New(); | |
| 529 ASSERT_FALSE(extract(&extracted)); | |
| 530 WebPagePtr expected = WebPage::New(); | |
| 531 EXPECT_EQ(expected, extracted); | |
| 532 } | |
| 533 | |
| 534 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { | |
| 535 setHtmlInnerHTML( | |
| 536 "<body>" | |
| 537 "<script type=\"application/ld+json\">" | |
| 538 "\n" | |
| 539 "\n" | |
| 540 "{\"@type\": \"UnsupportedType\"," | |
| 541 "\"name\": \"Special characters for ya >_<;\"" | |
| 542 "}\n" | |
| 543 "\n" | |
| 544 "</script>" | |
| 545 "</body>"); | |
| 546 setURL("http://www.test.com/"); | |
| 547 setTitle("My neat website about cool stuff"); | |
| 548 | |
| 549 WebPagePtr extracted = WebPage::New(); | |
| 550 ASSERT_FALSE(extract(&extracted)); | |
| 551 WebPagePtr expected = WebPage::New(); | |
| 552 EXPECT_EQ(expected, extracted); | |
| 553 } | |
| 554 | |
| 555 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { | |
| 556 String largeRepeatedField = "["; | |
| 557 for (int i = 0; i < 101; ++i) { | |
| 558 largeRepeatedField.append("\"a\""); | |
| 559 if (i != 100) { | |
| 560 largeRepeatedField.append(", "); | |
| 561 } | |
| 562 } | |
| 563 largeRepeatedField.append("]"); | |
| 564 setHtmlInnerHTML( | |
| 565 "<body>" | |
| 566 "<script type=\"application/ld+json\">" | |
| 567 "\n" | |
| 568 "\n" | |
| 569 "{\"@type\": \"Restaurant\"," | |
| 570 "\"name\": " + | |
| 571 largeRepeatedField + | |
| 572 "}\n" | |
| 573 "\n" | |
| 574 "</script>" | |
| 575 "</body>"); | |
| 576 setURL("http://www.test.com/"); | |
| 577 setTitle("My neat website about cool stuff"); | |
| 578 | |
| 579 WebPagePtr extracted = WebPage::New(); | |
| 580 ASSERT_TRUE(extract(&extracted)); | |
| 581 | |
| 582 WebPagePtr expected = | |
| 583 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 584 | |
| 585 EntityPtr restaurant = Entity::New(); | |
| 586 restaurant->type = "Restaurant"; | |
| 587 | |
| 588 PropertyPtr name = Property::New(); | |
| 589 name->name = "name"; | |
| 590 name->values = Values::New(); | |
| 591 Vector<String> nameValues; | |
| 592 for (int i = 0; i < 100; ++i) { | |
| 593 nameValues.push_back("a"); | |
| 594 } | |
| 595 name->values->set_string_values(nameValues); | |
| 596 | |
| 597 restaurant->properties.push_back(std::move(name)); | |
| 598 | |
| 599 expected->entities.push_back(std::move(restaurant)); | |
| 600 | |
| 601 EXPECT_EQ(expected, extracted); | |
| 602 } | |
| 603 | |
| 604 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { | |
| 605 String tooManyFields; | |
| 606 for (int i = 0; i < 20; ++i) { | |
| 607 tooManyFields.append(String::format("\"%d\": \"a\"", i)); | |
| 608 if (i != 19) { | |
| 609 tooManyFields.append(",\n"); | |
| 610 } | |
| 611 } | |
| 612 setHtmlInnerHTML( | |
| 613 "<body>" | |
| 614 "<script type=\"application/ld+json\">" | |
| 615 "\n" | |
| 616 "\n" | |
| 617 "{\"@type\": \"Restaurant\"," + | |
| 618 tooManyFields + | |
| 619 "}\n" | |
| 620 "\n" | |
| 621 "</script>" | |
| 622 "</body>"); | |
| 623 setURL("http://www.test.com/"); | |
| 624 setTitle("My neat website about cool stuff"); | |
| 625 | |
| 626 WebPagePtr extracted = WebPage::New(); | |
| 627 ASSERT_TRUE(extract(&extracted)); | |
| 628 | |
| 629 WebPagePtr expected = | |
| 630 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
| 631 | |
| 632 EntityPtr restaurant = Entity::New(); | |
| 633 restaurant->type = "Restaurant"; | |
| 634 | |
| 635 for (int i = 0; i < 19; ++i) { | |
| 636 restaurant->properties.push_back( | |
| 637 createStringProperty(String::number(i), "a")); | |
| 638 } | |
| 639 | |
| 640 expected->entities.push_back(std::move(restaurant)); | |
| 104 EXPECT_EQ(expected, extracted); | 641 EXPECT_EQ(expected, extracted); |
| 105 } | 642 } |
| 106 | 643 |
| 107 } // namespace | 644 } // namespace |
| 108 | |
| 109 } // namespace blink | 645 } // namespace blink |
| OLD | NEW |