Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(58)

Side by Side Diff: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp

Issue 2793103002: Parse JSON in Blink for CopylessPaste. (Closed)
Patch Set: more const Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2017 The Chromium Authors. All rights reserved. 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "modules/document_metadata/CopylessPasteExtractor.h"
6 5
7 #include <memory> 6 #include <memory>
7 #include <string>
8 #include <utility>
8 #include "core/dom/Document.h" 9 #include "core/dom/Document.h"
9 #include "core/dom/Element.h" 10 #include "core/dom/Element.h"
10 #include "core/testing/DummyPageHolder.h" 11 #include "core/testing/DummyPageHolder.h"
12 #include "modules/document_metadata/CopylessPasteExtractor.h"
13 #include "platform/json/JSONValues.h"
14 #include "platform/testing/URLTestHelpers.h"
15 #include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h "
11 #include "testing/gtest/include/gtest/gtest.h" 16 #include "testing/gtest/include/gtest/gtest.h"
12 #include "wtf/text/StringBuilder.h" 17 #include "wtf/text/StringBuilder.h"
13 18
14 namespace blink { 19 namespace blink {
15 20
16 namespace { 21 namespace {
17 22
23 using mojom::blink::Entity;
24 using mojom::blink::EntityPtr;
25 using mojom::blink::Property;
26 using mojom::blink::PropertyPtr;
27 using mojom::blink::Values;
28 using mojom::blink::ValuesPtr;
29 using mojom::blink::WebPage;
30 using mojom::blink::WebPagePtr;
31
18 class CopylessPasteExtractorTest : public ::testing::Test { 32 class CopylessPasteExtractorTest : public ::testing::Test {
19 public: 33 public:
20 CopylessPasteExtractorTest() 34 CopylessPasteExtractorTest() {}
21 : m_content(
22 "\n"
23 "\n"
24 "{\"@type\": \"NewsArticle\","
25 "\"headline\": \"Special characters for ya >_<;\"\n"
26 "}\n"
27 "\n") {}
28 35
29 protected: 36 protected:
30 void SetUp() override; 37 void SetUp() override;
31 38
32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } 39 void TearDown() override { ThreadState::current()->collectAllGarbage(); }
33 40
34 Document& document() const { return m_dummyPageHolder->document(); } 41 Document& document() const { return m_dummyPageHolder->document(); }
35 42
36 String extract() { return CopylessPasteExtractor::extract(document()); } 43 bool extract(WebPagePtr& page) {
44 return CopylessPasteExtractor::extract(document(), *page);
45 }
37 46
38 void setHtmlInnerHTML(const String&); 47 void setHtmlInnerHTML(const String&);
39 48
40 String m_content; 49 void setURL(const std::string);
50
51 void setTitle(const String&);
52
53 PropertyPtr createStringProperty(const String&, const String&);
54
55 PropertyPtr createBooleanProperty(const String&, const bool&);
56
57 PropertyPtr createLongProperty(const String&, const int64_t&);
58
59 PropertyPtr createEntityProperty(const String&, EntityPtr);
60
61 WebPagePtr createWebPage(const std::string&, const String&);
41 62
42 private: 63 private:
43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; 64 std::unique_ptr<DummyPageHolder> m_dummyPageHolder;
44 }; 65 };
45 66
46 void CopylessPasteExtractorTest::SetUp() { 67 void CopylessPasteExtractorTest::SetUp() {
47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); 68 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
48 } 69 }
49 70
50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { 71 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) {
51 document().documentElement()->setInnerHTML((htmlContent)); 72 document().documentElement()->setInnerHTML((htmlContent));
52 } 73 }
53 74
75 void CopylessPasteExtractorTest::setURL(const std::string url) {
76 document().setURL(URLTestHelpers::toKURL(url));
77 }
78
79 void CopylessPasteExtractorTest::setTitle(const String& title) {
80 document().setTitle(title);
81 }
82
83 PropertyPtr CopylessPasteExtractorTest::createStringProperty(
84 const String& name,
85 const String& value) {
86 PropertyPtr p = Property::New();
87 p->name = name;
88 p->values = Values::New();
89 p->values->set_string_values(Vector<String>(1, value));
90 return p;
91 }
92
93 PropertyPtr CopylessPasteExtractorTest::createBooleanProperty(
94 const String& name,
95 const bool& value) {
96 PropertyPtr p = Property::New();
97 p->name = name;
98 p->values = Values::New();
99 p->values->set_bool_values(Vector<bool>(1, value));
100 return p;
101 }
102
103 PropertyPtr CopylessPasteExtractorTest::createLongProperty(
104 const String& name,
105 const int64_t& value) {
106 PropertyPtr p = Property::New();
107 p->name = name;
108 p->values = Values::New();
109 p->values->set_long_values(Vector<int64_t>(1, value));
110 return p;
111 }
112
113 PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name,
114 EntityPtr value) {
115 PropertyPtr p = Property::New();
116 p->name = name;
117 p->values = Values::New();
118 p->values->set_entity_values(Vector<EntityPtr>());
119 p->values->get_entity_values().push_back(std::move(value));
120 return p;
121 }
122
123 WebPagePtr CopylessPasteExtractorTest::createWebPage(const std::string& url,
124 const String& title) {
125 WebPagePtr wp = WebPage::New();
126 wp->url = URLTestHelpers::toKURL(url);
127 wp->title = title;
128 return wp;
129 }
130
54 TEST_F(CopylessPasteExtractorTest, empty) { 131 TEST_F(CopylessPasteExtractorTest, empty) {
55 String extracted = extract(); 132 WebPagePtr page = WebPage::New();
56 String expected = "[]"; 133 ASSERT_FALSE(extract(page));
57 EXPECT_EQ(expected, extracted); 134 EXPECT_EQ(WebPage::New(), page);
58 } 135 }
59 136
60 TEST_F(CopylessPasteExtractorTest, basic) { 137 TEST_F(CopylessPasteExtractorTest, basic) {
61 setHtmlInnerHTML( 138 setHtmlInnerHTML(
62 "<body>" 139 "<body>"
63 "<script type=\"application/ld+json\">" + 140 "<script type=\"application/ld+json\">"
64 m_content + 141 "\n"
65 "</script>" 142 "\n"
66 "</body>"); 143 "{\"@type\": \"Restaurant\","
67 144 "\"name\": \"Special characters for ya >_<;\""
68 String extracted = extract(); 145 "}\n"
69 String expected = "[" + m_content + "]"; 146 "\n"
147 "</script>"
148 "</body>");
149 setURL("http://www.test.com/");
150 setTitle("My neat website about cool stuff");
151
152 WebPagePtr extracted = WebPage::New();
153 ASSERT_TRUE(extract(extracted));
154
155 WebPagePtr expected =
156 createWebPage("http://www.test.com/", "My neat website about cool stuff");
157
158 EntityPtr restaurant = Entity::New();
159 restaurant->type = "Restaurant";
160 restaurant->properties.push_back(
161 createStringProperty("name", "Special characters for ya >_<;"));
162
163 expected->entities.push_back(std::move(restaurant));
70 EXPECT_EQ(expected, extracted); 164 EXPECT_EQ(expected, extracted);
71 } 165 }
72 166
73 TEST_F(CopylessPasteExtractorTest, header) { 167 TEST_F(CopylessPasteExtractorTest, header) {
74 setHtmlInnerHTML( 168 setHtmlInnerHTML(
75 "<head>" 169 "<head>"
76 "<script type=\"application/ld+json\">" + 170 "<script type=\"application/ld+json\">"
77 m_content + 171 "\n"
172 "\n"
173 "{\"@type\": \"Restaurant\","
174 "\"name\": \"Special characters for ya >_<;\""
175 "}\n"
176 "\n"
78 "</script>" 177 "</script>"
79 "</head>"); 178 "</head>");
80 179
81 String extracted = extract(); 180 setURL("http://www.test.com/");
82 String expected = "[" + m_content + "]"; 181 setTitle("My neat website about cool stuff");
182
183 WebPagePtr extracted = WebPage::New();
184 ASSERT_TRUE(extract(extracted));
185
186 WebPagePtr expected =
187 createWebPage("http://www.test.com/", "My neat website about cool stuff");
188
189 EntityPtr restaurant = Entity::New();
190 restaurant->type = "Restaurant";
191 restaurant->properties.push_back(
192 createStringProperty("name", "Special characters for ya >_<;"));
193
194 expected->entities.push_back(std::move(restaurant));
195 EXPECT_EQ(expected, extracted);
196 }
197
198 TEST_F(CopylessPasteExtractorTest, booleanValue) {
199 setHtmlInnerHTML(
200 "<body>"
201 "<script type=\"application/ld+json\">"
202 "\n"
203 "\n"
204 "{\"@type\": \"Restaurant\","
205 "\"open\": true"
206 "}\n"
207 "\n"
208 "</script>"
209 "</body>");
210 setURL("http://www.test.com/");
211 setTitle("My neat website about cool stuff");
212
213 WebPagePtr extracted = WebPage::New();
214 ASSERT_TRUE(extract(extracted));
215
216 WebPagePtr expected =
217 createWebPage("http://www.test.com/", "My neat website about cool stuff");
218
219 EntityPtr restaurant = Entity::New();
220 restaurant->type = "Restaurant";
221 restaurant->properties.push_back(createBooleanProperty("open", true));
222
223 expected->entities.push_back(std::move(restaurant));
224 EXPECT_EQ(expected, extracted);
225 }
226
227 TEST_F(CopylessPasteExtractorTest, longValue) {
228 setHtmlInnerHTML(
229 "<body>"
230 "<script type=\"application/ld+json\">"
231 "\n"
232 "\n"
233 "{\"@type\": \"Restaurant\","
234 "\"long\": 1"
235 "}\n"
236 "\n"
237 "</script>"
238 "</body>");
239 setURL("http://www.test.com/");
240 setTitle("My neat website about cool stuff");
241
242 WebPagePtr extracted = WebPage::New();
243 ASSERT_TRUE(extract(extracted));
244
245 WebPagePtr expected =
246 createWebPage("http://www.test.com/", "My neat website about cool stuff");
247
248 EntityPtr restaurant = Entity::New();
249 restaurant->type = "Restaurant";
250 restaurant->properties.push_back(createLongProperty("long", 1ll));
251
252 expected->entities.push_back(std::move(restaurant));
253 EXPECT_EQ(expected, extracted);
254 }
255
256 TEST_F(CopylessPasteExtractorTest, doubleValue) {
257 setHtmlInnerHTML(
258 "<body>"
259 "<script type=\"application/ld+json\">"
260 "\n"
261 "\n"
262 "{\"@type\": \"Restaurant\","
263 "\"double\": 1.5"
264 "}\n"
265 "\n"
266 "</script>"
267 "</body>");
268 setURL("http://www.test.com/");
269 setTitle("My neat website about cool stuff");
270
271 WebPagePtr extracted = WebPage::New();
272 ASSERT_TRUE(extract(extracted));
273
274 WebPagePtr expected =
275 createWebPage("http://www.test.com/", "My neat website about cool stuff");
276
277 EntityPtr restaurant = Entity::New();
278 restaurant->type = "Restaurant";
279 restaurant->properties.push_back(createStringProperty("double", "1.5"));
280
281 expected->entities.push_back(std::move(restaurant));
83 EXPECT_EQ(expected, extracted); 282 EXPECT_EQ(expected, extracted);
84 } 283 }
85 284
86 TEST_F(CopylessPasteExtractorTest, multiple) { 285 TEST_F(CopylessPasteExtractorTest, multiple) {
87 setHtmlInnerHTML( 286 setHtmlInnerHTML(
88 "<head>" 287 "<head>"
89 "<script type=\"application/ld+json\">" + 288 "<script type=\"application/ld+json\">"
90 m_content + 289 "\n"
290 "\n"
291 "{\"@type\": \"Restaurant\","
292 "\"name\": \"Special characters for ya >_<;\""
293 "}\n"
294 "\n"
91 "</script>" 295 "</script>"
92 "</head>" 296 "</head>"
93 "<body>" 297 "<body>"
94 "<script type=\"application/ld+json\">" + 298 "<script type=\"application/ld+json\">"
95 m_content + 299 "\n"
96 "</script>" 300 "\n"
97 "<script type=\"application/ld+json\">" + 301 "{\"@type\": \"Restaurant\","
98 m_content + 302 "\"name\": \"Special characters for ya >_<;\""
99 "</script>" 303 "}\n"
100 "</body>"); 304 "\n"
101 305 "</script>"
102 String extracted = extract(); 306 "<script type=\"application/ld+json\">"
103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; 307 "\n"
308 "\n"
309 "{\"@type\": \"Restaurant\","
310 "\"name\": \"Special characters for ya >_<;\""
311 "}\n"
312 "\n"
313 "</script>"
314 "</body>");
315
316 setURL("http://www.test.com/");
317 setTitle("My neat website about cool stuff");
318
319 WebPagePtr extracted = WebPage::New();
320 ASSERT_TRUE(extract(extracted));
321
322 WebPagePtr expected =
323 createWebPage("http://www.test.com/", "My neat website about cool stuff");
324
325 for (int i = 0; i < 3; ++i) {
326 EntityPtr restaurant = Entity::New();
327 restaurant->type = "Restaurant";
328 restaurant->properties.push_back(
329 createStringProperty("name", "Special characters for ya >_<;"));
330
331 expected->entities.push_back(std::move(restaurant));
332 }
333 EXPECT_EQ(expected, extracted);
334 }
335
336 TEST_F(CopylessPasteExtractorTest, nested) {
337 setHtmlInnerHTML(
338 "<body>"
339 "<script type=\"application/ld+json\">"
340 "\n"
341 "\n"
342 "{\"@type\": \"Restaurant\","
343 "\"name\": \"Ye ol greasy diner\","
344 "\"address\": {"
345 "\n"
346 " \"streetAddress\": \"123 Big Oak Road\","
347 " \"addressLocality\": \"San Francisco\""
348 " }\n"
349 "}\n"
350 "\n"
351 "</script>"
352 "</body>");
353 setURL("http://www.test.com/");
354 setTitle("My neat website about cool stuff");
355
356 WebPagePtr extracted = WebPage::New();
357 ASSERT_TRUE(extract(extracted));
358
359 WebPagePtr expected =
360 createWebPage("http://www.test.com/", "My neat website about cool stuff");
361
362 EntityPtr restaurant = Entity::New();
363 restaurant->type = "Restaurant";
364 restaurant->properties.push_back(
365 createStringProperty("name", "Ye ol greasy diner"));
366
367 EntityPtr address = Entity::New();
368 address->type = "Thing";
369 address->properties.push_back(
370 createStringProperty("streetAddress", "123 Big Oak Road"));
371 address->properties.push_back(
372 createStringProperty("addressLocality", "San Francisco"));
373
374 restaurant->properties.push_back(
375 createEntityProperty("address", std::move(address)));
376
377 expected->entities.push_back(std::move(restaurant));
378 EXPECT_EQ(expected, extracted);
379 }
380
381 TEST_F(CopylessPasteExtractorTest, repeated) {
382 setHtmlInnerHTML(
383 "<body>"
384 "<script type=\"application/ld+json\">"
385 "\n"
386 "\n"
387 "{\"@type\": \"Restaurant\","
388 "\"name\": [ \"First name\", \"Second name\" ]"
389 "}\n"
390 "\n"
391 "</script>"
392 "</body>");
393 setURL("http://www.test.com/");
394 setTitle("My neat website about cool stuff");
395
396 WebPagePtr extracted = WebPage::New();
397 ASSERT_TRUE(extract(extracted));
398
399 WebPagePtr expected =
400 createWebPage("http://www.test.com/", "My neat website about cool stuff");
401
402 EntityPtr restaurant = Entity::New();
403 restaurant->type = "Restaurant";
404
405 PropertyPtr name = Property::New();
406 name->name = "name";
407 name->values = Values::New();
408 Vector<String> nameValues;
409 nameValues.push_back("First name");
410 nameValues.push_back("Second name");
411 name->values->set_string_values(nameValues);
412
413 restaurant->properties.push_back(std::move(name));
414
415 expected->entities.push_back(std::move(restaurant));
416
417 EXPECT_EQ(expected, extracted);
418 }
419
420 TEST_F(CopylessPasteExtractorTest, repeatedObject) {
421 setHtmlInnerHTML(
422 "<body>"
423 "<script type=\"application/ld+json\">"
424 "\n"
425 "\n"
426 "{\"@type\": \"Restaurant\","
427 "\"name\": \"Ye ol greasy diner\","
428 "\"address\": ["
429 "\n"
430 " {"
431 " \"streetAddress\": \"123 Big Oak Road\","
432 " \"addressLocality\": \"San Francisco\""
433 " },\n"
434 " {"
435 " \"streetAddress\": \"123 Big Oak Road\","
436 " \"addressLocality\": \"San Francisco\""
437 " }\n"
438 "]\n"
439 "}\n"
440 "\n"
441 "</script>"
442 "</body>");
443 setURL("http://www.test.com/");
444 setTitle("My neat website about cool stuff");
445
446 WebPagePtr extracted = WebPage::New();
447 ASSERT_TRUE(extract(extracted));
448
449 WebPagePtr expected =
450 createWebPage("http://www.test.com/", "My neat website about cool stuff");
451
452 EntityPtr restaurant = Entity::New();
453 restaurant->type = "Restaurant";
454 restaurant->properties.push_back(
455 createStringProperty("name", "Ye ol greasy diner"));
456
457 PropertyPtr addressProperty = Property::New();
458 addressProperty->name = "address";
459 addressProperty->values = Values::New();
460 addressProperty->values->set_entity_values(Vector<EntityPtr>());
461 for (int i = 0; i < 2; ++i) {
462 EntityPtr address = Entity::New();
463 address->type = "Thing";
464 address->properties.push_back(
465 createStringProperty("streetAddress", "123 Big Oak Road"));
466 address->properties.push_back(
467 createStringProperty("addressLocality", "San Francisco"));
468 addressProperty->values->get_entity_values().push_back(std::move(address));
469 }
470 restaurant->properties.push_back(std::move(addressProperty));
471
472 expected->entities.push_back(std::move(restaurant));
473 EXPECT_EQ(expected, extracted);
474 }
475
476 TEST_F(CopylessPasteExtractorTest, truncateLongString) {
477 String maxLengthString;
478 for (int i = 0; i < 200; ++i) {
479 maxLengthString.append("a");
480 }
481 String tooLongString(maxLengthString);
482 tooLongString.append("a");
483 setHtmlInnerHTML(
484 "<body>"
485 "<script type=\"application/ld+json\">"
486 "\n"
487 "\n"
488 "{\"@type\": \"Restaurant\","
489 "\"name\": \"" +
490 tooLongString +
491 "\""
492 "}\n"
493 "\n"
494 "</script>"
495 "</body>");
496 setURL("http://www.test.com/");
497 setTitle("My neat website about cool stuff");
498
499 WebPagePtr extracted = WebPage::New();
500 ASSERT_TRUE(extract(extracted));
501
502 WebPagePtr expected =
503 createWebPage("http://www.test.com/", "My neat website about cool stuff");
504
505 EntityPtr restaurant = Entity::New();
506 restaurant->type = "Restaurant";
507 restaurant->properties.push_back(
508 createStringProperty("name", maxLengthString));
509
510 expected->entities.push_back(std::move(restaurant));
511 EXPECT_EQ(expected, extracted);
512 }
513
514 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) {
515 setHtmlInnerHTML(
516 "<body>"
517 "<script type=\"application/ld+json\">"
518 "\n"
519 "\n"
520 "{\"name\": \"Special characters for ya >_<;\""
521 "}\n"
522 "\n"
523 "</script>"
524 "</body>");
525 setURL("http://www.test.com/");
526 setTitle("My neat website about cool stuff");
527
528 WebPagePtr extracted = WebPage::New();
529 ASSERT_FALSE(extract(extracted));
530 WebPagePtr expected = WebPage::New();
531 EXPECT_EQ(expected, extracted);
532 }
533
534 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) {
535 setHtmlInnerHTML(
536 "<body>"
537 "<script type=\"application/ld+json\">"
538 "\n"
539 "\n"
540 "{\"@type\": \"UnsupportedType\","
541 "\"name\": \"Special characters for ya >_<;\""
542 "}\n"
543 "\n"
544 "</script>"
545 "</body>");
546 setURL("http://www.test.com/");
547 setTitle("My neat website about cool stuff");
548
549 WebPagePtr extracted = WebPage::New();
550 ASSERT_FALSE(extract(extracted));
551 WebPagePtr expected = WebPage::New();
552 EXPECT_EQ(expected, extracted);
553 }
554
555 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) {
556 String largeRepeatedField = "[";
557 for (int i = 0; i < 101; ++i) {
558 largeRepeatedField.append("\"a\"");
559 if (i != 100) {
560 largeRepeatedField.append(", ");
561 }
562 }
563 largeRepeatedField.append("]");
564 setHtmlInnerHTML(
565 "<body>"
566 "<script type=\"application/ld+json\">"
567 "\n"
568 "\n"
569 "{\"@type\": \"Restaurant\","
570 "\"name\": " +
571 largeRepeatedField +
572 "}\n"
573 "\n"
574 "</script>"
575 "</body>");
576 setURL("http://www.test.com/");
577 setTitle("My neat website about cool stuff");
578
579 WebPagePtr extracted = WebPage::New();
580 ASSERT_TRUE(extract(extracted));
581
582 WebPagePtr expected =
583 createWebPage("http://www.test.com/", "My neat website about cool stuff");
584
585 EntityPtr restaurant = Entity::New();
586 restaurant->type = "Restaurant";
587
588 PropertyPtr name = Property::New();
589 name->name = "name";
590 name->values = Values::New();
591 Vector<String> nameValues;
592 for (int i = 0; i < 100; ++i) {
593 nameValues.push_back("a");
594 }
595 name->values->set_string_values(nameValues);
596
597 restaurant->properties.push_back(std::move(name));
598
599 expected->entities.push_back(std::move(restaurant));
600
601 EXPECT_EQ(expected, extracted);
602 }
603
604 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) {
605 String tooManyFields;
606 for (int i = 0; i < 20; ++i) {
607 tooManyFields.append(String::format("\"%d\": \"a\"", i));
608 if (i != 19) {
609 tooManyFields.append(",\n");
610 }
611 }
612 setHtmlInnerHTML(
613 "<body>"
614 "<script type=\"application/ld+json\">"
615 "\n"
616 "\n"
617 "{\"@type\": \"Restaurant\"," +
618 tooManyFields +
619 "}\n"
620 "\n"
621 "</script>"
622 "</body>");
623 setURL("http://www.test.com/");
624 setTitle("My neat website about cool stuff");
625
626 WebPagePtr extracted = WebPage::New();
627 ASSERT_TRUE(extract(extracted));
628
629 WebPagePtr expected =
630 createWebPage("http://www.test.com/", "My neat website about cool stuff");
631
632 EntityPtr restaurant = Entity::New();
633 restaurant->type = "Restaurant";
634
635 for (int i = 0; i < 19; ++i) {
636 restaurant->properties.push_back(
637 createStringProperty(String::number(i), "a"));
638 }
639
640 expected->entities.push_back(std::move(restaurant));
641 EXPECT_EQ(expected, extracted);
642 }
643
644 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithEmptyArray) {
645 setHtmlInnerHTML(
646 "<body>"
647 "<script type=\"application/ld+json\">"
648 "\n"
649 "\n"
650 "{\"@type\": \"Restaurant\","
651 "\"name\": []"
652 "}\n"
653 "\n"
654 "</script>"
655 "</body>");
656 setURL("http://www.test.com/");
657 setTitle("My neat website about cool stuff");
658
659 WebPagePtr extracted = WebPage::New();
660 ASSERT_TRUE(extract(extracted));
661
662 WebPagePtr expected =
663 createWebPage("http://www.test.com/", "My neat website about cool stuff");
664
665 EntityPtr restaurant = Entity::New();
666 restaurant->type = "Restaurant";
667
668 expected->entities.push_back(std::move(restaurant));
669
670 EXPECT_EQ(expected, extracted);
671 }
672
673 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithMixedTypes) {
674 setHtmlInnerHTML(
675 "<body>"
676 "<script type=\"application/ld+json\">"
677 "\n"
678 "\n"
679 "{\"@type\": \"Restaurant\","
680 "\"name\": [ \"Name\", 1 ]"
681 "}\n"
682 "\n"
683 "</script>"
684 "</body>");
685 setURL("http://www.test.com/");
686 setTitle("My neat website about cool stuff");
687
688 WebPagePtr extracted = WebPage::New();
689 ASSERT_TRUE(extract(extracted));
690
691 WebPagePtr expected =
692 createWebPage("http://www.test.com/", "My neat website about cool stuff");
693
694 EntityPtr restaurant = Entity::New();
695 restaurant->type = "Restaurant";
696
697 expected->entities.push_back(std::move(restaurant));
698
699 EXPECT_EQ(expected, extracted);
700 }
701
702 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithNestedArray) {
703 setHtmlInnerHTML(
704 "<body>"
705 "<script type=\"application/ld+json\">"
706 "\n"
707 "\n"
708 "{\"@type\": \"Restaurant\","
709 "\"name\": [ [ \"Name\" ] ]"
710 "}\n"
711 "\n"
712 "</script>"
713 "</body>");
714 setURL("http://www.test.com/");
715 setTitle("My neat website about cool stuff");
716
717 WebPagePtr extracted = WebPage::New();
718 ASSERT_TRUE(extract(extracted));
719
720 WebPagePtr expected =
721 createWebPage("http://www.test.com/", "My neat website about cool stuff");
722
723 EntityPtr restaurant = Entity::New();
724 restaurant->type = "Restaurant";
725
726 expected->entities.push_back(std::move(restaurant));
727
728 EXPECT_EQ(expected, extracted);
729 }
730
731 TEST_F(CopylessPasteExtractorTest, enforceMaxNestingDepth) {
732 setHtmlInnerHTML(
733 "<body>"
734 "<script type=\"application/ld+json\">"
735 "\n"
736 "\n"
737 "{\"@type\": \"Restaurant\","
738 "\"name\": \"Ye ol greasy diner\","
739 "\"1\": {"
740 " \"2\": {"
741 " \"3\": {"
742 " \"4\": {"
743 " \"5\": 6"
744 " }\n"
745 " }\n"
746 " }\n"
747 "}\n"
748 "}\n"
749 "\n"
750 "</script>"
751 "</body>");
752 setURL("http://www.test.com/");
753 setTitle("My neat website about cool stuff");
754
755 WebPagePtr extracted = WebPage::New();
756 ASSERT_TRUE(extract(extracted));
757
758 WebPagePtr expected =
759 createWebPage("http://www.test.com/", "My neat website about cool stuff");
760
761 EntityPtr restaurant = Entity::New();
762 restaurant->type = "Restaurant";
763 restaurant->properties.push_back(
764 createStringProperty("name", "Ye ol greasy diner"));
765
766 EntityPtr entity1 = Entity::New();
767 entity1->type = "Thing";
768
769 EntityPtr entity2 = Entity::New();
770 entity2->type = "Thing";
771
772 EntityPtr entity3 = Entity::New();
773 entity3->type = "Thing";
774
775 entity2->properties.push_back(createEntityProperty("3", std::move(entity3)));
776
777 entity1->properties.push_back(createEntityProperty("2", std::move(entity2)));
778
779 restaurant->properties.push_back(
780 createEntityProperty("1", std::move(entity1)));
781
782 expected->entities.push_back(std::move(restaurant));
783 EXPECT_EQ(expected, extracted);
784 }
785
786 TEST_F(CopylessPasteExtractorTest, maxNestingDepthWithTerminalProperty) {
787 setHtmlInnerHTML(
788 "<body>"
789 "<script type=\"application/ld+json\">"
790 "\n"
791 "\n"
792 "{\"@type\": \"Restaurant\","
793 "\"name\": \"Ye ol greasy diner\","
794 "\"1\": {"
795 " \"2\": {"
796 " \"3\": {"
797 " \"4\": 5"
798 " }\n"
799 " }\n"
800 "}\n"
801 "}\n"
802 "\n"
803 "</script>"
804 "</body>");
805 setURL("http://www.test.com/");
806 setTitle("My neat website about cool stuff");
807
808 WebPagePtr extracted = WebPage::New();
809 ASSERT_TRUE(extract(extracted));
810
811 WebPagePtr expected =
812 createWebPage("http://www.test.com/", "My neat website about cool stuff");
813
814 EntityPtr restaurant = Entity::New();
815 restaurant->type = "Restaurant";
816 restaurant->properties.push_back(
817 createStringProperty("name", "Ye ol greasy diner"));
818
819 EntityPtr entity1 = Entity::New();
820 entity1->type = "Thing";
821
822 EntityPtr entity2 = Entity::New();
823 entity2->type = "Thing";
824
825 EntityPtr entity3 = Entity::New();
826 entity3->type = "Thing";
827
828 entity3->properties.push_back(createLongProperty("4", 5));
829
830 entity2->properties.push_back(createEntityProperty("3", std::move(entity3)));
831
832 entity1->properties.push_back(createEntityProperty("2", std::move(entity2)));
833
834 restaurant->properties.push_back(
835 createEntityProperty("1", std::move(entity1)));
836
837 expected->entities.push_back(std::move(restaurant));
104 EXPECT_EQ(expected, extracted); 838 EXPECT_EQ(expected, extracted);
105 } 839 }
106 840
107 } // namespace 841 } // namespace
108
109 } // namespace blink 842 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698