| OLD | NEW |
| 1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "modules/document_metadata/CopylessPasteExtractor.cpp" |
| 5 #include "modules/document_metadata/CopylessPasteExtractor.h" | 6 #include "modules/document_metadata/CopylessPasteExtractor.h" |
| 6 | 7 |
| 7 #include <memory> | 8 #include <memory> |
| 9 #include <string> |
| 8 #include "core/dom/Document.h" | 10 #include "core/dom/Document.h" |
| 9 #include "core/dom/Element.h" | 11 #include "core/dom/Element.h" |
| 10 #include "core/testing/DummyPageHolder.h" | 12 #include "core/testing/DummyPageHolder.h" |
| 13 #include "platform/json/JSONValues.h" |
| 14 #include "platform/testing/URLTestHelpers.h" |
| 11 #include "testing/gtest/include/gtest/gtest.h" | 15 #include "testing/gtest/include/gtest/gtest.h" |
| 12 #include "wtf/text/StringBuilder.h" | 16 #include "wtf/text/StringBuilder.h" |
| 13 | 17 |
| 14 namespace blink { | 18 namespace blink { |
| 15 | 19 |
| 16 namespace { | 20 namespace { |
| 17 | 21 |
| 18 class CopylessPasteExtractorTest : public ::testing::Test { | 22 class CopylessPasteExtractorTest : public ::testing::Test { |
| 19 public: | 23 public: |
| 20 CopylessPasteExtractorTest() | 24 CopylessPasteExtractorTest() {} |
| 21 : m_content( | |
| 22 "\n" | |
| 23 "\n" | |
| 24 "{\"@type\": \"NewsArticle\"," | |
| 25 "\"headline\": \"Special characters for ya >_<;\"\n" | |
| 26 "}\n" | |
| 27 "\n") {} | |
| 28 | 25 |
| 29 protected: | 26 protected: |
| 30 void SetUp() override; | 27 void SetUp() override; |
| 31 | 28 |
| 32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 29 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
| 33 | 30 |
| 34 Document& document() const { return m_dummyPageHolder->document(); } | 31 Document& document() const { return m_dummyPageHolder->document(); } |
| 35 | 32 |
| 36 String extract() { return CopylessPasteExtractor::extract(document()); } | 33 bool extract(WebPage* page) { |
| 34 return CopylessPasteExtractor::extract(document(), page); |
| 35 } |
| 37 | 36 |
| 38 void setHtmlInnerHTML(const String&); | 37 void setHtmlInnerHTML(const String&); |
| 39 | 38 |
| 39 void setURL(const std::string); |
| 40 |
| 41 void setTitle(const String&); |
| 42 |
| 40 String m_content; | 43 String m_content; |
| 41 | 44 |
| 42 private: | 45 private: |
| 43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; | 46 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; |
| 44 }; | 47 }; |
| 45 | 48 |
| 46 void CopylessPasteExtractorTest::SetUp() { | 49 void CopylessPasteExtractorTest::SetUp() { |
| 47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | 50 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
| 48 } | 51 } |
| 49 | 52 |
| 50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { | 53 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { |
| 51 document().documentElement()->setInnerHTML((htmlContent)); | 54 document().documentElement()->setInnerHTML((htmlContent)); |
| 52 } | 55 } |
| 53 | 56 |
| 57 void CopylessPasteExtractorTest::setURL(const std::string url) { |
| 58 document().setURL(URLTestHelpers::toKURL(url)); |
| 59 } |
| 60 |
| 61 void CopylessPasteExtractorTest::setTitle(const String& title) { |
| 62 document().setTitle(title); |
| 63 } |
| 64 |
| 54 TEST_F(CopylessPasteExtractorTest, empty) { | 65 TEST_F(CopylessPasteExtractorTest, empty) { |
| 55 String extracted = extract(); | 66 WebPage page; |
| 56 String expected = "[]"; | 67 ASSERT_FALSE(extract(&page)); |
| 57 EXPECT_EQ(expected, extracted); | 68 EXPECT_EQ(WebPage(), page); |
| 58 } | 69 } |
| 59 | 70 |
| 60 TEST_F(CopylessPasteExtractorTest, basic) { | 71 TEST_F(CopylessPasteExtractorTest, basic) { |
| 61 setHtmlInnerHTML( | 72 setHtmlInnerHTML( |
| 62 "<body>" | 73 "<body>" |
| 63 "<script type=\"application/ld+json\">" + | 74 "<script type=\"application/ld+json\">" |
| 64 m_content + | 75 "\n" |
| 65 "</script>" | 76 "\n" |
| 66 "</body>"); | 77 "{\"@type\": \"Restaurant\"," |
| 67 | 78 "\"name\": \"Special characters for ya >_<;\"" |
| 68 String extracted = extract(); | 79 "}\n" |
| 69 String expected = "[" + m_content + "]"; | 80 "\n" |
| 81 "</script>" |
| 82 "</body>"); |
| 83 setURL("http://www.test.com/"); |
| 84 setTitle("My neat website about cool stuff"); |
| 85 |
| 86 WebPage extracted; |
| 87 ASSERT_TRUE(extract(&extracted)); |
| 88 WebPage expected; |
| 89 expected.url = "http://www.test.com/"; |
| 90 expected.title = "My neat website about cool stuff"; |
| 91 Entity restaurant; |
| 92 |
| 93 Property type; |
| 94 type.name = "@type"; |
| 95 type.type = JSONValue::TypeString; |
| 96 type.strVal.push_back("Restaurant"); |
| 97 |
| 98 Property nameProperty; |
| 99 nameProperty.name = "name"; |
| 100 nameProperty.type = JSONValue::TypeString; |
| 101 nameProperty.strVal.push_back("Special characters for ya >_<;"); |
| 102 |
| 103 restaurant.properties.push_back(type); |
| 104 restaurant.properties.push_back(nameProperty); |
| 105 |
| 106 expected.entities.push_back(restaurant); |
| 70 EXPECT_EQ(expected, extracted); | 107 EXPECT_EQ(expected, extracted); |
| 71 } | 108 } |
| 72 | 109 |
| 73 TEST_F(CopylessPasteExtractorTest, header) { | 110 TEST_F(CopylessPasteExtractorTest, header) { |
| 74 setHtmlInnerHTML( | 111 setHtmlInnerHTML( |
| 75 "<head>" | 112 "<head>" |
| 76 "<script type=\"application/ld+json\">" + | 113 "<script type=\"application/ld+json\">" |
| 77 m_content + | 114 "\n" |
| 115 "\n" |
| 116 "{\"@type\": \"Restaurant\"," |
| 117 "\"name\": \"Special characters for ya >_<;\"" |
| 118 "}\n" |
| 119 "\n" |
| 78 "</script>" | 120 "</script>" |
| 79 "</head>"); | 121 "</head>"); |
| 80 | 122 |
| 81 String extracted = extract(); | 123 setURL("http://www.test.com/"); |
| 82 String expected = "[" + m_content + "]"; | 124 setTitle("My neat website about cool stuff"); |
| 125 |
| 126 WebPage extracted; |
| 127 ASSERT_TRUE(extract(&extracted)); |
| 128 WebPage expected; |
| 129 expected.url = "http://www.test.com/"; |
| 130 expected.title = "My neat website about cool stuff"; |
| 131 Entity restaurant; |
| 132 |
| 133 Property type; |
| 134 type.name = "@type"; |
| 135 type.type = JSONValue::TypeString; |
| 136 type.strVal.push_back("Restaurant"); |
| 137 |
| 138 Property nameProperty; |
| 139 nameProperty.name = "name"; |
| 140 nameProperty.type = JSONValue::TypeString; |
| 141 nameProperty.strVal.push_back("Special characters for ya >_<;"); |
| 142 |
| 143 restaurant.properties.push_back(type); |
| 144 restaurant.properties.push_back(nameProperty); |
| 145 |
| 146 expected.entities.push_back(restaurant); |
| 83 EXPECT_EQ(expected, extracted); | 147 EXPECT_EQ(expected, extracted); |
| 84 } | 148 } |
| 85 | 149 |
| 86 TEST_F(CopylessPasteExtractorTest, multiple) { | 150 TEST_F(CopylessPasteExtractorTest, multiple) { |
| 87 setHtmlInnerHTML( | 151 setHtmlInnerHTML( |
| 88 "<head>" | 152 "<head>" |
| 89 "<script type=\"application/ld+json\">" + | 153 "<script type=\"application/ld+json\">" |
| 90 m_content + | 154 "\n" |
| 155 "\n" |
| 156 "{\"@type\": \"Restaurant\"," |
| 157 "\"name\": \"Special characters for ya >_<;\"" |
| 158 "}\n" |
| 159 "\n" |
| 91 "</script>" | 160 "</script>" |
| 92 "</head>" | 161 "</head>" |
| 93 "<body>" | 162 "<body>" |
| 94 "<script type=\"application/ld+json\">" + | 163 "<script type=\"application/ld+json\">" |
| 95 m_content + | 164 "\n" |
| 96 "</script>" | 165 "\n" |
| 97 "<script type=\"application/ld+json\">" + | 166 "{\"@type\": \"Restaurant\"," |
| 98 m_content + | 167 "\"name\": \"Special characters for ya >_<;\"" |
| 99 "</script>" | 168 "}\n" |
| 100 "</body>"); | 169 "\n" |
| 101 | 170 "</script>" |
| 102 String extracted = extract(); | 171 "<script type=\"application/ld+json\">" |
| 103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; | 172 "\n" |
| 104 EXPECT_EQ(expected, extracted); | 173 "\n" |
| 105 } | 174 "{\"@type\": \"Restaurant\"," |
| 106 | 175 "\"name\": \"Special characters for ya >_<;\"" |
| 176 "}\n" |
| 177 "\n" |
| 178 "</script>" |
| 179 "</body>"); |
| 180 |
| 181 setURL("http://www.test.com/"); |
| 182 setTitle("My neat website about cool stuff"); |
| 183 |
| 184 WebPage extracted; |
| 185 ASSERT_TRUE(extract(&extracted)); |
| 186 WebPage expected; |
| 187 expected.url = "http://www.test.com/"; |
| 188 expected.title = "My neat website about cool stuff"; |
| 189 Entity restaurant; |
| 190 |
| 191 Property type; |
| 192 type.name = "@type"; |
| 193 type.type = JSONValue::TypeString; |
| 194 type.strVal.push_back("Restaurant"); |
| 195 |
| 196 Property nameProperty; |
| 197 nameProperty.name = "name"; |
| 198 nameProperty.type = JSONValue::TypeString; |
| 199 nameProperty.strVal.push_back("Special characters for ya >_<;"); |
| 200 |
| 201 restaurant.properties.push_back(type); |
| 202 restaurant.properties.push_back(nameProperty); |
| 203 |
| 204 expected.entities.push_back(restaurant); |
| 205 expected.entities.push_back(restaurant); |
| 206 expected.entities.push_back(restaurant); |
| 207 |
| 208 EXPECT_EQ(expected, extracted); |
| 209 } |
| 210 |
| 211 TEST_F(CopylessPasteExtractorTest, nested) { |
| 212 setHtmlInnerHTML( |
| 213 "<body>" |
| 214 "<script type=\"application/ld+json\">" |
| 215 "\n" |
| 216 "\n" |
| 217 "{\"@type\": \"Restaurant\"," |
| 218 "\"name\": \"Ye ol greasy diner\"," |
| 219 "\"address\": {" |
| 220 "\n" |
| 221 " \"streetAddress\": \"123 Big Oak Road\"," |
| 222 " \"addressLocality\": \"San Francisco\"" |
| 223 " }\n" |
| 224 "}\n" |
| 225 "\n" |
| 226 "</script>" |
| 227 "</body>"); |
| 228 setURL("http://www.test.com/"); |
| 229 setTitle("My neat website about cool stuff"); |
| 230 |
| 231 WebPage extracted; |
| 232 ASSERT_TRUE(extract(&extracted)); |
| 233 WebPage expected; |
| 234 expected.url = "http://www.test.com/"; |
| 235 expected.title = "My neat website about cool stuff"; |
| 236 Entity restaurant; |
| 237 |
| 238 Property type; |
| 239 type.name = "@type"; |
| 240 type.type = JSONValue::TypeString; |
| 241 type.strVal.push_back("Restaurant"); |
| 242 |
| 243 Property name; |
| 244 name.name = "name"; |
| 245 name.type = JSONValue::TypeString; |
| 246 name.strVal.push_back("Ye ol greasy diner"); |
| 247 |
| 248 Property streetAddress; |
| 249 streetAddress.name = "streetAddress"; |
| 250 streetAddress.type = JSONValue::TypeString; |
| 251 streetAddress.strVal.push_back("123 Big Oak Road"); |
| 252 |
| 253 Property addressLocality; |
| 254 addressLocality.name = "addressLocality"; |
| 255 addressLocality.type = JSONValue::TypeString; |
| 256 addressLocality.strVal.push_back("San Francisco"); |
| 257 |
| 258 Entity address; |
| 259 address.properties.push_back(streetAddress); |
| 260 address.properties.push_back(addressLocality); |
| 261 |
| 262 Property addressProperty; |
| 263 addressProperty.name = "address"; |
| 264 addressProperty.type = JSONValue::TypeObject; |
| 265 addressProperty.entityVal.push_back(address); |
| 266 |
| 267 restaurant.properties.push_back(type); |
| 268 restaurant.properties.push_back(name); |
| 269 restaurant.properties.push_back(addressProperty); |
| 270 |
| 271 expected.entities.push_back(restaurant); |
| 272 EXPECT_EQ(expected, extracted); |
| 273 } |
| 274 |
| 275 TEST_F(CopylessPasteExtractorTest, repeated) { |
| 276 setHtmlInnerHTML( |
| 277 "<body>" |
| 278 "<script type=\"application/ld+json\">" |
| 279 "\n" |
| 280 "\n" |
| 281 "{\"@type\": \"Restaurant\"," |
| 282 "\"name\": [ \"First name\", \"Second name\"]" |
| 283 "}\n" |
| 284 "\n" |
| 285 "</script>" |
| 286 "</body>"); |
| 287 setURL("http://www.test.com/"); |
| 288 setTitle("My neat website about cool stuff"); |
| 289 |
| 290 WebPage extracted; |
| 291 ASSERT_TRUE(extract(&extracted)); |
| 292 WebPage expected; |
| 293 expected.url = "http://www.test.com/"; |
| 294 expected.title = "My neat website about cool stuff"; |
| 295 Entity restaurant; |
| 296 |
| 297 Property type; |
| 298 type.name = "@type"; |
| 299 type.type = JSONValue::TypeString; |
| 300 type.strVal.push_back("Restaurant"); |
| 301 |
| 302 Property nameProperty; |
| 303 nameProperty.name = "name"; |
| 304 nameProperty.type = JSONValue::TypeString; |
| 305 nameProperty.strVal.push_back("First name"); |
| 306 nameProperty.strVal.push_back("Second name"); |
| 307 |
| 308 restaurant.properties.push_back(type); |
| 309 restaurant.properties.push_back(nameProperty); |
| 310 |
| 311 expected.entities.push_back(restaurant); |
| 312 EXPECT_EQ(expected, extracted); |
| 313 } |
| 314 |
| 315 TEST_F(CopylessPasteExtractorTest, repeatedObject) { |
| 316 setHtmlInnerHTML( |
| 317 "<body>" |
| 318 "<script type=\"application/ld+json\">" |
| 319 "\n" |
| 320 "\n" |
| 321 "{\"@type\": \"Restaurant\"," |
| 322 "\"name\": \"Ye ol greasy diner\"," |
| 323 "\"address\": [" |
| 324 "\n" |
| 325 " {" |
| 326 " \"streetAddress\": \"123 Big Oak Road\"," |
| 327 " \"addressLocality\": \"San Francisco\"" |
| 328 " },\n" |
| 329 " {" |
| 330 " \"streetAddress\": \"123 Big Oak Road\"," |
| 331 " \"addressLocality\": \"San Francisco\"" |
| 332 " }\n" |
| 333 "]\n" |
| 334 "}\n" |
| 335 "\n" |
| 336 "</script>" |
| 337 "</body>"); |
| 338 setURL("http://www.test.com/"); |
| 339 setTitle("My neat website about cool stuff"); |
| 340 |
| 341 WebPage extracted; |
| 342 ASSERT_TRUE(extract(&extracted)); |
| 343 WebPage expected; |
| 344 expected.url = "http://www.test.com/"; |
| 345 expected.title = "My neat website about cool stuff"; |
| 346 Entity restaurant; |
| 347 |
| 348 Property type; |
| 349 type.name = "@type"; |
| 350 type.type = JSONValue::TypeString; |
| 351 type.strVal.push_back("Restaurant"); |
| 352 |
| 353 Property name; |
| 354 name.name = "name"; |
| 355 name.type = JSONValue::TypeString; |
| 356 name.strVal.push_back("Ye ol greasy diner"); |
| 357 |
| 358 Property streetAddress; |
| 359 streetAddress.name = "streetAddress"; |
| 360 streetAddress.type = JSONValue::TypeString; |
| 361 streetAddress.strVal.push_back("123 Big Oak Road"); |
| 362 |
| 363 Property addressLocality; |
| 364 addressLocality.name = "addressLocality"; |
| 365 addressLocality.type = JSONValue::TypeString; |
| 366 addressLocality.strVal.push_back("San Francisco"); |
| 367 |
| 368 Entity address; |
| 369 address.properties.push_back(streetAddress); |
| 370 address.properties.push_back(addressLocality); |
| 371 |
| 372 Property addressProperty; |
| 373 addressProperty.name = "address"; |
| 374 addressProperty.type = JSONValue::TypeObject; |
| 375 addressProperty.entityVal.push_back(address); |
| 376 addressProperty.entityVal.push_back(address); |
| 377 |
| 378 restaurant.properties.push_back(type); |
| 379 restaurant.properties.push_back(name); |
| 380 restaurant.properties.push_back(addressProperty); |
| 381 |
| 382 expected.entities.push_back(restaurant); |
| 383 EXPECT_EQ(expected, extracted); |
| 384 } |
| 385 |
| 386 TEST_F(CopylessPasteExtractorTest, truncateLongString) { |
| 387 String maxLengthString; |
| 388 for (int i = 0; i < 200; ++i) { |
| 389 maxLengthString.append("a"); |
| 390 } |
| 391 String tooLongString(maxLengthString); |
| 392 tooLongString.append("a"); |
| 393 setHtmlInnerHTML( |
| 394 "<body>" |
| 395 "<script type=\"application/ld+json\">" |
| 396 "\n" |
| 397 "\n" |
| 398 "{\"@type\": \"Restaurant\"," |
| 399 "\"name\": \"" + |
| 400 tooLongString + |
| 401 "\"" |
| 402 "}\n" |
| 403 "\n" |
| 404 "</script>" |
| 405 "</body>"); |
| 406 setURL("http://www.test.com/"); |
| 407 setTitle("My neat website about cool stuff"); |
| 408 |
| 409 WebPage extracted; |
| 410 ASSERT_TRUE(extract(&extracted)); |
| 411 WebPage expected; |
| 412 expected.url = "http://www.test.com/"; |
| 413 expected.title = "My neat website about cool stuff"; |
| 414 Entity restaurant; |
| 415 |
| 416 Property type; |
| 417 type.name = "@type"; |
| 418 type.type = JSONValue::TypeString; |
| 419 type.strVal.push_back("Restaurant"); |
| 420 |
| 421 Property nameProperty; |
| 422 nameProperty.name = "name"; |
| 423 nameProperty.type = JSONValue::TypeString; |
| 424 nameProperty.strVal.push_back(maxLengthString); |
| 425 |
| 426 restaurant.properties.push_back(type); |
| 427 restaurant.properties.push_back(nameProperty); |
| 428 |
| 429 expected.entities.push_back(restaurant); |
| 430 EXPECT_EQ(expected, extracted); |
| 431 } |
| 432 |
| 433 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { |
| 434 setHtmlInnerHTML( |
| 435 "<body>" |
| 436 "<script type=\"application/ld+json\">" |
| 437 "\n" |
| 438 "\n" |
| 439 "{\"name\": \"Special characters for ya >_<;\"" |
| 440 "}\n" |
| 441 "\n" |
| 442 "</script>" |
| 443 "</body>"); |
| 444 setURL("http://www.test.com/"); |
| 445 setTitle("My neat website about cool stuff"); |
| 446 |
| 447 WebPage extracted; |
| 448 ASSERT_FALSE(extract(&extracted)); |
| 449 WebPage expected; |
| 450 EXPECT_EQ(expected, extracted); |
| 451 } |
| 452 |
| 453 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { |
| 454 setHtmlInnerHTML( |
| 455 "<body>" |
| 456 "<script type=\"application/ld+json\">" |
| 457 "\n" |
| 458 "\n" |
| 459 "{\"@type\": \"UnsupportedType\"," |
| 460 "\"name\": \"Special characters for ya >_<;\"" |
| 461 "}\n" |
| 462 "\n" |
| 463 "</script>" |
| 464 "</body>"); |
| 465 setURL("http://www.test.com/"); |
| 466 setTitle("My neat website about cool stuff"); |
| 467 |
| 468 WebPage extracted; |
| 469 ASSERT_FALSE(extract(&extracted)); |
| 470 WebPage expected; |
| 471 EXPECT_EQ(expected, extracted); |
| 472 } |
| 473 |
| 474 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { |
| 475 String largeRepeatedField = "["; |
| 476 for (int i = 0; i < 101; ++i) { |
| 477 largeRepeatedField.append("\"a\""); |
| 478 if (i != 100) { |
| 479 largeRepeatedField.append(", "); |
| 480 } |
| 481 } |
| 482 largeRepeatedField.append("]"); |
| 483 setHtmlInnerHTML( |
| 484 "<body>" |
| 485 "<script type=\"application/ld+json\">" |
| 486 "\n" |
| 487 "\n" |
| 488 "{\"@type\": \"Restaurant\"," |
| 489 "\"name\": " + |
| 490 largeRepeatedField + |
| 491 "}\n" |
| 492 "\n" |
| 493 "</script>" |
| 494 "</body>"); |
| 495 setURL("http://www.test.com/"); |
| 496 setTitle("My neat website about cool stuff"); |
| 497 |
| 498 WebPage extracted; |
| 499 ASSERT_TRUE(extract(&extracted)); |
| 500 WebPage expected; |
| 501 expected.url = "http://www.test.com/"; |
| 502 expected.title = "My neat website about cool stuff"; |
| 503 Entity restaurant; |
| 504 |
| 505 Property type; |
| 506 type.name = "@type"; |
| 507 type.type = JSONValue::TypeString; |
| 508 type.strVal.push_back("Restaurant"); |
| 509 |
| 510 Property nameProperty; |
| 511 nameProperty.name = "name"; |
| 512 nameProperty.type = JSONValue::TypeString; |
| 513 for (int i = 0; i < 100; ++i) { |
| 514 nameProperty.strVal.push_back("a"); |
| 515 } |
| 516 |
| 517 restaurant.properties.push_back(type); |
| 518 restaurant.properties.push_back(nameProperty); |
| 519 |
| 520 expected.entities.push_back(restaurant); |
| 521 EXPECT_EQ(expected, extracted); |
| 522 } |
| 523 |
| 524 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { |
| 525 String tooManyFields; |
| 526 for (int i = 0; i < 20; ++i) { |
| 527 tooManyFields.append(String::format("\"%d\": \"a\"", i)); |
| 528 if (i != 19) { |
| 529 tooManyFields.append(",\n"); |
| 530 } |
| 531 } |
| 532 setHtmlInnerHTML( |
| 533 "<body>" |
| 534 "<script type=\"application/ld+json\">" |
| 535 "\n" |
| 536 "\n" |
| 537 "{\"@type\": \"Restaurant\"," + |
| 538 tooManyFields + |
| 539 "}\n" |
| 540 "\n" |
| 541 "</script>" |
| 542 "</body>"); |
| 543 setURL("http://www.test.com/"); |
| 544 setTitle("My neat website about cool stuff"); |
| 545 |
| 546 WebPage extracted; |
| 547 ASSERT_TRUE(extract(&extracted)); |
| 548 WebPage expected; |
| 549 expected.url = "http://www.test.com/"; |
| 550 expected.title = "My neat website about cool stuff"; |
| 551 Entity restaurant; |
| 552 |
| 553 Property type; |
| 554 type.name = "@type"; |
| 555 type.type = JSONValue::TypeString; |
| 556 type.strVal.push_back("Restaurant"); |
| 557 |
| 558 restaurant.properties.push_back(type); |
| 559 |
| 560 // App Indexing limits to 20 fields. One of these is the @type, so there are |
| 561 // 19 left. |
| 562 for (int i = 0; i < 19; ++i) { |
| 563 Property p; |
| 564 p.name = String::number(i); |
| 565 p.type = JSONValue::TypeString; |
| 566 p.strVal.push_back("a"); |
| 567 restaurant.properties.push_back(p); |
| 568 } |
| 569 |
| 570 expected.entities.push_back(restaurant); |
| 571 EXPECT_EQ(expected, extracted); |
| 572 } |
| 573 |
| 574 TEST_F(CopylessPasteExtractorTest, numbers) { |
| 575 setHtmlInnerHTML( |
| 576 "<body>" |
| 577 "<script type=\"application/ld+json\">" |
| 578 "\n" |
| 579 "\n" |
| 580 "{\"@type\": \"Restaurant\"," |
| 581 "\"int\": 1," |
| 582 "\"double\": 1.5" |
| 583 "}\n" |
| 584 "\n" |
| 585 "</script>" |
| 586 "</body>"); |
| 587 setURL("http://www.test.com/"); |
| 588 setTitle("My neat website about cool stuff"); |
| 589 |
| 590 WebPage extracted; |
| 591 ASSERT_TRUE(extract(&extracted)); |
| 592 WebPage expected; |
| 593 expected.url = "http://www.test.com/"; |
| 594 expected.title = "My neat website about cool stuff"; |
| 595 Entity restaurant; |
| 596 |
| 597 Property type; |
| 598 type.name = "@type"; |
| 599 type.type = JSONValue::TypeString; |
| 600 type.strVal.push_back("Restaurant"); |
| 601 |
| 602 Property intProperty; |
| 603 intProperty.name = "int"; |
| 604 intProperty.type = JSONValue::TypeInteger; |
| 605 intProperty.intVal.push_back(1); |
| 606 |
| 607 Property longProperty; |
| 608 longProperty.name = "double"; |
| 609 longProperty.type = JSONValue::TypeDouble; |
| 610 longProperty.doubleVal.push_back(1.5); |
| 611 |
| 612 restaurant.properties.push_back(type); |
| 613 restaurant.properties.push_back(intProperty); |
| 614 restaurant.properties.push_back(longProperty); |
| 615 |
| 616 expected.entities.push_back(restaurant); |
| 617 EXPECT_EQ(expected, extracted); |
| 618 } |
| 619 |
| 107 } // namespace | 620 } // namespace |
| 108 | 621 |
| 109 } // namespace blink | 622 } // namespace blink |
| OLD | NEW |