Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Side by Side Diff: third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractorTest.cpp

Issue 2777623002: Move json-ld parsing to Blink.
Patch Set: update policy enforcement in blink, clank handling of repeated values Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2017 The Chromium Authors. All rights reserved. 1 // Copyright 2017 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "modules/document_metadata/CopylessPasteExtractor.cpp"
5 #include "modules/document_metadata/CopylessPasteExtractor.h" 6 #include "modules/document_metadata/CopylessPasteExtractor.h"
6 7
7 #include <memory> 8 #include <memory>
9 #include <string>
8 #include "core/dom/Document.h" 10 #include "core/dom/Document.h"
9 #include "core/dom/Element.h" 11 #include "core/dom/Element.h"
10 #include "core/testing/DummyPageHolder.h" 12 #include "core/testing/DummyPageHolder.h"
13 #include "platform/json/JSONValues.h"
14 #include "platform/testing/URLTestHelpers.h"
11 #include "testing/gtest/include/gtest/gtest.h" 15 #include "testing/gtest/include/gtest/gtest.h"
12 #include "wtf/text/StringBuilder.h" 16 #include "wtf/text/StringBuilder.h"
13 17
14 namespace blink { 18 namespace blink {
15 19
16 namespace { 20 namespace {
17 21
18 class CopylessPasteExtractorTest : public ::testing::Test { 22 class CopylessPasteExtractorTest : public ::testing::Test {
19 public: 23 public:
20 CopylessPasteExtractorTest() 24 CopylessPasteExtractorTest() {}
21 : m_content(
22 "\n"
23 "\n"
24 "{\"@type\": \"NewsArticle\","
25 "\"headline\": \"Special characters for ya >_<;\"\n"
26 "}\n"
27 "\n") {}
28 25
29 protected: 26 protected:
30 void SetUp() override; 27 void SetUp() override;
31 28
32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } 29 void TearDown() override { ThreadState::current()->collectAllGarbage(); }
33 30
34 Document& document() const { return m_dummyPageHolder->document(); } 31 Document& document() const { return m_dummyPageHolder->document(); }
35 32
36 String extract() { return CopylessPasteExtractor::extract(document()); } 33 bool extract(WebPage* page) {
34 return CopylessPasteExtractor::extract(document(), page);
35 }
37 36
38 void setHtmlInnerHTML(const String&); 37 void setHtmlInnerHTML(const String&);
39 38
39 void setURL(const std::string);
40
41 void setTitle(const String&);
42
40 String m_content; 43 String m_content;
41 44
42 private: 45 private:
43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; 46 std::unique_ptr<DummyPageHolder> m_dummyPageHolder;
44 }; 47 };
45 48
46 void CopylessPasteExtractorTest::SetUp() { 49 void CopylessPasteExtractorTest::SetUp() {
47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); 50 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600));
48 } 51 }
49 52
50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { 53 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) {
51 document().documentElement()->setInnerHTML((htmlContent)); 54 document().documentElement()->setInnerHTML((htmlContent));
52 } 55 }
53 56
57 void CopylessPasteExtractorTest::setURL(const std::string url) {
58 document().setURL(URLTestHelpers::toKURL(url));
59 }
60
61 void CopylessPasteExtractorTest::setTitle(const String& title) {
62 document().setTitle(title);
63 }
64
54 TEST_F(CopylessPasteExtractorTest, empty) { 65 TEST_F(CopylessPasteExtractorTest, empty) {
55 String extracted = extract(); 66 WebPage page;
56 String expected = "[]"; 67 ASSERT_FALSE(extract(&page));
57 EXPECT_EQ(expected, extracted); 68 EXPECT_EQ(WebPage(), page);
58 } 69 }
59 70
60 TEST_F(CopylessPasteExtractorTest, basic) { 71 TEST_F(CopylessPasteExtractorTest, basic) {
61 setHtmlInnerHTML( 72 setHtmlInnerHTML(
62 "<body>" 73 "<body>"
63 "<script type=\"application/ld+json\">" + 74 "<script type=\"application/ld+json\">"
64 m_content + 75 "\n"
65 "</script>" 76 "\n"
66 "</body>"); 77 "{\"@type\": \"Restaurant\","
67 78 "\"name\": \"Special characters for ya >_<;\""
68 String extracted = extract(); 79 "}\n"
69 String expected = "[" + m_content + "]"; 80 "\n"
81 "</script>"
82 "</body>");
83 setURL("http://www.test.com/");
84 setTitle("My neat website about cool stuff");
85
86 WebPage extracted;
87 ASSERT_TRUE(extract(&extracted));
88 WebPage expected;
89 expected.url = "http://www.test.com/";
90 expected.title = "My neat website about cool stuff";
91 Entity restaurant;
92
93 Property type;
94 type.name = "@type";
95 type.type = JSONValue::TypeString;
96 type.strVal.push_back("Restaurant");
97
98 Property nameProperty;
99 nameProperty.name = "name";
100 nameProperty.type = JSONValue::TypeString;
101 nameProperty.strVal.push_back("Special characters for ya >_<;");
102
103 restaurant.properties.push_back(type);
104 restaurant.properties.push_back(nameProperty);
105
106 expected.entities.push_back(restaurant);
70 EXPECT_EQ(expected, extracted); 107 EXPECT_EQ(expected, extracted);
71 } 108 }
72 109
73 TEST_F(CopylessPasteExtractorTest, header) { 110 TEST_F(CopylessPasteExtractorTest, header) {
74 setHtmlInnerHTML( 111 setHtmlInnerHTML(
75 "<head>" 112 "<head>"
76 "<script type=\"application/ld+json\">" + 113 "<script type=\"application/ld+json\">"
77 m_content + 114 "\n"
115 "\n"
116 "{\"@type\": \"Restaurant\","
117 "\"name\": \"Special characters for ya >_<;\""
118 "}\n"
119 "\n"
78 "</script>" 120 "</script>"
79 "</head>"); 121 "</head>");
80 122
81 String extracted = extract(); 123 setURL("http://www.test.com/");
82 String expected = "[" + m_content + "]"; 124 setTitle("My neat website about cool stuff");
125
126 WebPage extracted;
127 ASSERT_TRUE(extract(&extracted));
128 WebPage expected;
129 expected.url = "http://www.test.com/";
130 expected.title = "My neat website about cool stuff";
131 Entity restaurant;
132
133 Property type;
134 type.name = "@type";
135 type.type = JSONValue::TypeString;
136 type.strVal.push_back("Restaurant");
137
138 Property nameProperty;
139 nameProperty.name = "name";
140 nameProperty.type = JSONValue::TypeString;
141 nameProperty.strVal.push_back("Special characters for ya >_<;");
142
143 restaurant.properties.push_back(type);
144 restaurant.properties.push_back(nameProperty);
145
146 expected.entities.push_back(restaurant);
83 EXPECT_EQ(expected, extracted); 147 EXPECT_EQ(expected, extracted);
84 } 148 }
85 149
86 TEST_F(CopylessPasteExtractorTest, multiple) { 150 TEST_F(CopylessPasteExtractorTest, multiple) {
87 setHtmlInnerHTML( 151 setHtmlInnerHTML(
88 "<head>" 152 "<head>"
89 "<script type=\"application/ld+json\">" + 153 "<script type=\"application/ld+json\">"
90 m_content + 154 "\n"
155 "\n"
156 "{\"@type\": \"Restaurant\","
157 "\"name\": \"Special characters for ya >_<;\""
158 "}\n"
159 "\n"
91 "</script>" 160 "</script>"
92 "</head>" 161 "</head>"
93 "<body>" 162 "<body>"
94 "<script type=\"application/ld+json\">" + 163 "<script type=\"application/ld+json\">"
95 m_content + 164 "\n"
96 "</script>" 165 "\n"
97 "<script type=\"application/ld+json\">" + 166 "{\"@type\": \"Restaurant\","
98 m_content + 167 "\"name\": \"Special characters for ya >_<;\""
99 "</script>" 168 "}\n"
100 "</body>"); 169 "\n"
101 170 "</script>"
102 String extracted = extract(); 171 "<script type=\"application/ld+json\">"
103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; 172 "\n"
104 EXPECT_EQ(expected, extracted); 173 "\n"
105 } 174 "{\"@type\": \"Restaurant\","
106 175 "\"name\": \"Special characters for ya >_<;\""
176 "}\n"
177 "\n"
178 "</script>"
179 "</body>");
180
181 setURL("http://www.test.com/");
182 setTitle("My neat website about cool stuff");
183
184 WebPage extracted;
185 ASSERT_TRUE(extract(&extracted));
186 WebPage expected;
187 expected.url = "http://www.test.com/";
188 expected.title = "My neat website about cool stuff";
189 Entity restaurant;
190
191 Property type;
192 type.name = "@type";
193 type.type = JSONValue::TypeString;
194 type.strVal.push_back("Restaurant");
195
196 Property nameProperty;
197 nameProperty.name = "name";
198 nameProperty.type = JSONValue::TypeString;
199 nameProperty.strVal.push_back("Special characters for ya >_<;");
200
201 restaurant.properties.push_back(type);
202 restaurant.properties.push_back(nameProperty);
203
204 expected.entities.push_back(restaurant);
205 expected.entities.push_back(restaurant);
206 expected.entities.push_back(restaurant);
207
208 EXPECT_EQ(expected, extracted);
209 }
210
211 TEST_F(CopylessPasteExtractorTest, nested) {
212 setHtmlInnerHTML(
213 "<body>"
214 "<script type=\"application/ld+json\">"
215 "\n"
216 "\n"
217 "{\"@type\": \"Restaurant\","
218 "\"name\": \"Ye ol greasy diner\","
219 "\"address\": {"
220 "\n"
221 " \"streetAddress\": \"123 Big Oak Road\","
222 " \"addressLocality\": \"San Francisco\""
223 " }\n"
224 "}\n"
225 "\n"
226 "</script>"
227 "</body>");
228 setURL("http://www.test.com/");
229 setTitle("My neat website about cool stuff");
230
231 WebPage extracted;
232 ASSERT_TRUE(extract(&extracted));
233 WebPage expected;
234 expected.url = "http://www.test.com/";
235 expected.title = "My neat website about cool stuff";
236 Entity restaurant;
237
238 Property type;
239 type.name = "@type";
240 type.type = JSONValue::TypeString;
241 type.strVal.push_back("Restaurant");
242
243 Property name;
244 name.name = "name";
245 name.type = JSONValue::TypeString;
246 name.strVal.push_back("Ye ol greasy diner");
247
248 Property streetAddress;
249 streetAddress.name = "streetAddress";
250 streetAddress.type = JSONValue::TypeString;
251 streetAddress.strVal.push_back("123 Big Oak Road");
252
253 Property addressLocality;
254 addressLocality.name = "addressLocality";
255 addressLocality.type = JSONValue::TypeString;
256 addressLocality.strVal.push_back("San Francisco");
257
258 Entity address;
259 address.properties.push_back(streetAddress);
260 address.properties.push_back(addressLocality);
261
262 Property addressProperty;
263 addressProperty.name = "address";
264 addressProperty.type = JSONValue::TypeObject;
265 addressProperty.entityVal.push_back(address);
266
267 restaurant.properties.push_back(type);
268 restaurant.properties.push_back(name);
269 restaurant.properties.push_back(addressProperty);
270
271 expected.entities.push_back(restaurant);
272 EXPECT_EQ(expected, extracted);
273 }
274
275 TEST_F(CopylessPasteExtractorTest, repeated) {
276 setHtmlInnerHTML(
277 "<body>"
278 "<script type=\"application/ld+json\">"
279 "\n"
280 "\n"
281 "{\"@type\": \"Restaurant\","
282 "\"name\": [ \"First name\", \"Second name\"]"
283 "}\n"
284 "\n"
285 "</script>"
286 "</body>");
287 setURL("http://www.test.com/");
288 setTitle("My neat website about cool stuff");
289
290 WebPage extracted;
291 ASSERT_TRUE(extract(&extracted));
292 WebPage expected;
293 expected.url = "http://www.test.com/";
294 expected.title = "My neat website about cool stuff";
295 Entity restaurant;
296
297 Property type;
298 type.name = "@type";
299 type.type = JSONValue::TypeString;
300 type.strVal.push_back("Restaurant");
301
302 Property nameProperty;
303 nameProperty.name = "name";
304 nameProperty.type = JSONValue::TypeString;
305 nameProperty.strVal.push_back("First name");
306 nameProperty.strVal.push_back("Second name");
307
308 restaurant.properties.push_back(type);
309 restaurant.properties.push_back(nameProperty);
310
311 expected.entities.push_back(restaurant);
312 EXPECT_EQ(expected, extracted);
313 }
314
315 TEST_F(CopylessPasteExtractorTest, repeatedObject) {
316 setHtmlInnerHTML(
317 "<body>"
318 "<script type=\"application/ld+json\">"
319 "\n"
320 "\n"
321 "{\"@type\": \"Restaurant\","
322 "\"name\": \"Ye ol greasy diner\","
323 "\"address\": ["
324 "\n"
325 " {"
326 " \"streetAddress\": \"123 Big Oak Road\","
327 " \"addressLocality\": \"San Francisco\""
328 " },\n"
329 " {"
330 " \"streetAddress\": \"123 Big Oak Road\","
331 " \"addressLocality\": \"San Francisco\""
332 " }\n"
333 "]\n"
334 "}\n"
335 "\n"
336 "</script>"
337 "</body>");
338 setURL("http://www.test.com/");
339 setTitle("My neat website about cool stuff");
340
341 WebPage extracted;
342 ASSERT_TRUE(extract(&extracted));
343 WebPage expected;
344 expected.url = "http://www.test.com/";
345 expected.title = "My neat website about cool stuff";
346 Entity restaurant;
347
348 Property type;
349 type.name = "@type";
350 type.type = JSONValue::TypeString;
351 type.strVal.push_back("Restaurant");
352
353 Property name;
354 name.name = "name";
355 name.type = JSONValue::TypeString;
356 name.strVal.push_back("Ye ol greasy diner");
357
358 Property streetAddress;
359 streetAddress.name = "streetAddress";
360 streetAddress.type = JSONValue::TypeString;
361 streetAddress.strVal.push_back("123 Big Oak Road");
362
363 Property addressLocality;
364 addressLocality.name = "addressLocality";
365 addressLocality.type = JSONValue::TypeString;
366 addressLocality.strVal.push_back("San Francisco");
367
368 Entity address;
369 address.properties.push_back(streetAddress);
370 address.properties.push_back(addressLocality);
371
372 Property addressProperty;
373 addressProperty.name = "address";
374 addressProperty.type = JSONValue::TypeObject;
375 addressProperty.entityVal.push_back(address);
376 addressProperty.entityVal.push_back(address);
377
378 restaurant.properties.push_back(type);
379 restaurant.properties.push_back(name);
380 restaurant.properties.push_back(addressProperty);
381
382 expected.entities.push_back(restaurant);
383 EXPECT_EQ(expected, extracted);
384 }
385
386 TEST_F(CopylessPasteExtractorTest, truncateLongString) {
387 String maxLengthString;
388 for (int i = 0; i < 200; ++i) {
389 maxLengthString.append("a");
390 }
391 String tooLongString(maxLengthString);
392 tooLongString.append("a");
393 setHtmlInnerHTML(
394 "<body>"
395 "<script type=\"application/ld+json\">"
396 "\n"
397 "\n"
398 "{\"@type\": \"Restaurant\","
399 "\"name\": \"" +
400 tooLongString +
401 "\""
402 "}\n"
403 "\n"
404 "</script>"
405 "</body>");
406 setURL("http://www.test.com/");
407 setTitle("My neat website about cool stuff");
408
409 WebPage extracted;
410 ASSERT_TRUE(extract(&extracted));
411 WebPage expected;
412 expected.url = "http://www.test.com/";
413 expected.title = "My neat website about cool stuff";
414 Entity restaurant;
415
416 Property type;
417 type.name = "@type";
418 type.type = JSONValue::TypeString;
419 type.strVal.push_back("Restaurant");
420
421 Property nameProperty;
422 nameProperty.name = "name";
423 nameProperty.type = JSONValue::TypeString;
424 nameProperty.strVal.push_back(maxLengthString);
425
426 restaurant.properties.push_back(type);
427 restaurant.properties.push_back(nameProperty);
428
429 expected.entities.push_back(restaurant);
430 EXPECT_EQ(expected, extracted);
431 }
432
433 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) {
434 setHtmlInnerHTML(
435 "<body>"
436 "<script type=\"application/ld+json\">"
437 "\n"
438 "\n"
439 "{\"name\": \"Special characters for ya >_<;\""
440 "}\n"
441 "\n"
442 "</script>"
443 "</body>");
444 setURL("http://www.test.com/");
445 setTitle("My neat website about cool stuff");
446
447 WebPage extracted;
448 ASSERT_FALSE(extract(&extracted));
449 WebPage expected;
450 EXPECT_EQ(expected, extracted);
451 }
452
453 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) {
454 setHtmlInnerHTML(
455 "<body>"
456 "<script type=\"application/ld+json\">"
457 "\n"
458 "\n"
459 "{\"@type\": \"UnsupportedType\","
460 "\"name\": \"Special characters for ya >_<;\""
461 "}\n"
462 "\n"
463 "</script>"
464 "</body>");
465 setURL("http://www.test.com/");
466 setTitle("My neat website about cool stuff");
467
468 WebPage extracted;
469 ASSERT_FALSE(extract(&extracted));
470 WebPage expected;
471 EXPECT_EQ(expected, extracted);
472 }
473
474 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) {
475 String largeRepeatedField = "[";
476 for (int i = 0; i < 101; ++i) {
477 largeRepeatedField.append("\"a\"");
478 if (i != 100) {
479 largeRepeatedField.append(", ");
480 }
481 }
482 largeRepeatedField.append("]");
483 setHtmlInnerHTML(
484 "<body>"
485 "<script type=\"application/ld+json\">"
486 "\n"
487 "\n"
488 "{\"@type\": \"Restaurant\","
489 "\"name\": " +
490 largeRepeatedField +
491 "}\n"
492 "\n"
493 "</script>"
494 "</body>");
495 setURL("http://www.test.com/");
496 setTitle("My neat website about cool stuff");
497
498 WebPage extracted;
499 ASSERT_TRUE(extract(&extracted));
500 WebPage expected;
501 expected.url = "http://www.test.com/";
502 expected.title = "My neat website about cool stuff";
503 Entity restaurant;
504
505 Property type;
506 type.name = "@type";
507 type.type = JSONValue::TypeString;
508 type.strVal.push_back("Restaurant");
509
510 Property nameProperty;
511 nameProperty.name = "name";
512 nameProperty.type = JSONValue::TypeString;
513 for (int i = 0; i < 100; ++i) {
514 nameProperty.strVal.push_back("a");
515 }
516
517 restaurant.properties.push_back(type);
518 restaurant.properties.push_back(nameProperty);
519
520 expected.entities.push_back(restaurant);
521 EXPECT_EQ(expected, extracted);
522 }
523
524 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) {
525 String tooManyFields;
526 for (int i = 0; i < 20; ++i) {
527 tooManyFields.append(String::format("\"%d\": \"a\"", i));
528 if (i != 19) {
529 tooManyFields.append(",\n");
530 }
531 }
532 setHtmlInnerHTML(
533 "<body>"
534 "<script type=\"application/ld+json\">"
535 "\n"
536 "\n"
537 "{\"@type\": \"Restaurant\"," +
538 tooManyFields +
539 "}\n"
540 "\n"
541 "</script>"
542 "</body>");
543 setURL("http://www.test.com/");
544 setTitle("My neat website about cool stuff");
545
546 WebPage extracted;
547 ASSERT_TRUE(extract(&extracted));
548 WebPage expected;
549 expected.url = "http://www.test.com/";
550 expected.title = "My neat website about cool stuff";
551 Entity restaurant;
552
553 Property type;
554 type.name = "@type";
555 type.type = JSONValue::TypeString;
556 type.strVal.push_back("Restaurant");
557
558 restaurant.properties.push_back(type);
559
560 // App Indexing limits to 20 fields. One of these is the @type, so there are
561 // 19 left.
562 for (int i = 0; i < 19; ++i) {
563 Property p;
564 p.name = String::number(i);
565 p.type = JSONValue::TypeString;
566 p.strVal.push_back("a");
567 restaurant.properties.push_back(p);
568 }
569
570 expected.entities.push_back(restaurant);
571 EXPECT_EQ(expected, extracted);
572 }
573
574 TEST_F(CopylessPasteExtractorTest, numbers) {
575 setHtmlInnerHTML(
576 "<body>"
577 "<script type=\"application/ld+json\">"
578 "\n"
579 "\n"
580 "{\"@type\": \"Restaurant\","
581 "\"int\": 1,"
582 "\"double\": 1.5"
583 "}\n"
584 "\n"
585 "</script>"
586 "</body>");
587 setURL("http://www.test.com/");
588 setTitle("My neat website about cool stuff");
589
590 WebPage extracted;
591 ASSERT_TRUE(extract(&extracted));
592 WebPage expected;
593 expected.url = "http://www.test.com/";
594 expected.title = "My neat website about cool stuff";
595 Entity restaurant;
596
597 Property type;
598 type.name = "@type";
599 type.type = JSONValue::TypeString;
600 type.strVal.push_back("Restaurant");
601
602 Property intProperty;
603 intProperty.name = "int";
604 intProperty.type = JSONValue::TypeInteger;
605 intProperty.intVal.push_back(1);
606
607 Property longProperty;
608 longProperty.name = "double";
609 longProperty.type = JSONValue::TypeDouble;
610 longProperty.doubleVal.push_back(1.5);
611
612 restaurant.properties.push_back(type);
613 restaurant.properties.push_back(intProperty);
614 restaurant.properties.push_back(longProperty);
615
616 expected.entities.push_back(restaurant);
617 EXPECT_EQ(expected, extracted);
618 }
619
107 } // namespace 620 } // namespace
108 621
109 } // namespace blink 622 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/modules/document_metadata/CopylessPasteExtractor.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698