OLD | NEW |
---|---|
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
6 | 5 |
7 #include <memory> | 6 #include <memory> |
7 #include <string> | |
8 #include <utility> | |
8 #include "core/dom/Document.h" | 9 #include "core/dom/Document.h" |
9 #include "core/dom/Element.h" | 10 #include "core/dom/Element.h" |
10 #include "core/testing/DummyPageHolder.h" | 11 #include "core/testing/DummyPageHolder.h" |
12 #include "modules/document_metadata/CopylessPasteExtractor.h" | |
13 #include "platform/json/JSONValues.h" | |
14 #include "platform/testing/URLTestHelpers.h" | |
15 #include "public/platform/modules/document_metadata/copyless_paste.mojom-blink.h " | |
11 #include "testing/gtest/include/gtest/gtest.h" | 16 #include "testing/gtest/include/gtest/gtest.h" |
12 #include "wtf/text/StringBuilder.h" | 17 #include "wtf/text/StringBuilder.h" |
13 | 18 |
14 namespace blink { | 19 namespace blink { |
15 | 20 |
16 namespace { | 21 namespace { |
17 | 22 |
23 using mojom::document_metadata::blink::Entity; | |
24 using mojom::document_metadata::blink::EntityPtr; | |
25 using mojom::document_metadata::blink::Property; | |
26 using mojom::document_metadata::blink::PropertyPtr; | |
27 using mojom::document_metadata::blink::Values; | |
28 using mojom::document_metadata::blink::ValuesPtr; | |
29 using mojom::document_metadata::blink::WebPage; | |
30 using mojom::document_metadata::blink::WebPagePtr; | |
31 | |
18 class CopylessPasteExtractorTest : public ::testing::Test { | 32 class CopylessPasteExtractorTest : public ::testing::Test { |
19 public: | 33 public: |
20 CopylessPasteExtractorTest() | 34 CopylessPasteExtractorTest() {} |
21 : m_content( | |
22 "\n" | |
23 "\n" | |
24 "{\"@type\": \"NewsArticle\"," | |
25 "\"headline\": \"Special characters for ya >_<;\"\n" | |
26 "}\n" | |
27 "\n") {} | |
28 | 35 |
29 protected: | 36 protected: |
30 void SetUp() override; | 37 void SetUp() override; |
31 | 38 |
32 void TearDown() override { ThreadState::current()->collectAllGarbage(); } | 39 void TearDown() override { ThreadState::current()->collectAllGarbage(); } |
33 | 40 |
34 Document& document() const { return m_dummyPageHolder->document(); } | 41 Document& document() const { return m_dummyPageHolder->document(); } |
35 | 42 |
36 String extract() { return CopylessPasteExtractor::extract(document()); } | 43 bool extract(WebPagePtr& page) { |
esprehn
2017/04/07 01:05:30
Can you return WebPagePtr and let the caller do th
dproctor
2017/04/07 01:41:15
Done.
| |
44 page = CopylessPasteExtractor::extract(document()); | |
45 return !page.is_null(); | |
46 } | |
37 | 47 |
38 void setHtmlInnerHTML(const String&); | 48 void setHtmlInnerHTML(const String&); |
39 | 49 |
40 String m_content; | 50 void setURL(const std::string); |
esprehn
2017/04/07 01:05:30
Can you use WTF::String instead?
dproctor
2017/04/07 01:41:15
Done.
| |
51 | |
52 void setTitle(const String&); | |
53 | |
54 PropertyPtr createStringProperty(const String&, const String&); | |
esprehn
2017/04/07 01:05:30
argument names would be nice here for all these
dproctor
2017/04/07 01:41:15
Done.
| |
55 | |
56 PropertyPtr createBooleanProperty(const String&, const bool&); | |
esprehn
2017/04/07 01:05:30
argument names for primitives like bools are very
dproctor
2017/04/07 01:41:15
Done.
| |
57 | |
58 PropertyPtr createLongProperty(const String&, const int64_t&); | |
59 | |
60 PropertyPtr createEntityProperty(const String&, EntityPtr); | |
61 | |
62 WebPagePtr createWebPage(const std::string&, const String&); | |
esprehn
2017/04/07 01:05:30
Can you use WTF::String instead?
dproctor
2017/04/07 01:41:15
Done.
| |
41 | 63 |
42 private: | 64 private: |
43 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; | 65 std::unique_ptr<DummyPageHolder> m_dummyPageHolder; |
44 }; | 66 }; |
45 | 67 |
46 void CopylessPasteExtractorTest::SetUp() { | 68 void CopylessPasteExtractorTest::SetUp() { |
47 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); | 69 m_dummyPageHolder = DummyPageHolder::create(IntSize(800, 600)); |
48 } | 70 } |
49 | 71 |
50 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { | 72 void CopylessPasteExtractorTest::setHtmlInnerHTML(const String& htmlContent) { |
51 document().documentElement()->setInnerHTML((htmlContent)); | 73 document().documentElement()->setInnerHTML((htmlContent)); |
52 } | 74 } |
53 | 75 |
76 void CopylessPasteExtractorTest::setURL(const std::string url) { | |
77 document().setURL(URLTestHelpers::toKURL(url)); | |
78 } | |
79 | |
80 void CopylessPasteExtractorTest::setTitle(const String& title) { | |
81 document().setTitle(title); | |
82 } | |
83 | |
84 PropertyPtr CopylessPasteExtractorTest::createStringProperty( | |
85 const String& name, | |
86 const String& value) { | |
87 PropertyPtr p = Property::New(); | |
88 p->name = name; | |
89 p->values = Values::New(); | |
90 p->values->set_string_values(Vector<String>(1, value)); | |
91 return p; | |
92 } | |
93 | |
94 PropertyPtr CopylessPasteExtractorTest::createBooleanProperty( | |
95 const String& name, | |
96 const bool& value) { | |
97 PropertyPtr p = Property::New(); | |
98 p->name = name; | |
99 p->values = Values::New(); | |
100 p->values->set_bool_values(Vector<bool>(1, value)); | |
101 return p; | |
102 } | |
103 | |
104 PropertyPtr CopylessPasteExtractorTest::createLongProperty( | |
105 const String& name, | |
106 const int64_t& value) { | |
107 PropertyPtr p = Property::New(); | |
108 p->name = name; | |
109 p->values = Values::New(); | |
110 p->values->set_long_values(Vector<int64_t>(1, value)); | |
111 return p; | |
112 } | |
113 | |
114 PropertyPtr CopylessPasteExtractorTest::createEntityProperty(const String& name, | |
115 EntityPtr value) { | |
116 PropertyPtr p = Property::New(); | |
esprehn
2017/04/07 01:05:30
we usually don't abbreviate, so all these variable
dproctor
2017/04/07 01:41:15
Done.
| |
117 p->name = name; | |
118 p->values = Values::New(); | |
119 p->values->set_entity_values(Vector<EntityPtr>()); | |
120 p->values->get_entity_values().push_back(std::move(value)); | |
121 return p; | |
122 } | |
123 | |
124 WebPagePtr CopylessPasteExtractorTest::createWebPage(const std::string& url, | |
125 const String& title) { | |
126 WebPagePtr wp = WebPage::New(); | |
127 wp->url = URLTestHelpers::toKURL(url); | |
128 wp->title = title; | |
129 return wp; | |
130 } | |
131 | |
54 TEST_F(CopylessPasteExtractorTest, empty) { | 132 TEST_F(CopylessPasteExtractorTest, empty) { |
55 String extracted = extract(); | 133 WebPagePtr page = WebPage::New(); |
56 String expected = "[]"; | 134 ASSERT_FALSE(extract(page)); |
57 EXPECT_EQ(expected, extracted); | 135 ASSERT_TRUE(page.is_null()); |
58 } | 136 } |
59 | 137 |
60 TEST_F(CopylessPasteExtractorTest, basic) { | 138 TEST_F(CopylessPasteExtractorTest, basic) { |
61 setHtmlInnerHTML( | 139 setHtmlInnerHTML( |
62 "<body>" | 140 "<body>" |
63 "<script type=\"application/ld+json\">" + | 141 "<script type=\"application/ld+json\">" |
64 m_content + | 142 "\n" |
65 "</script>" | 143 "\n" |
66 "</body>"); | 144 "{\"@type\": \"Restaurant\"," |
67 | 145 "\"name\": \"Special characters for ya >_<;\"" |
68 String extracted = extract(); | 146 "}\n" |
69 String expected = "[" + m_content + "]"; | 147 "\n" |
148 "</script>" | |
149 "</body>"); | |
150 setURL("http://www.test.com/"); | |
151 setTitle("My neat website about cool stuff"); | |
152 | |
153 WebPagePtr extracted = WebPage::New(); | |
154 ASSERT_TRUE(extract(extracted)); | |
155 | |
156 WebPagePtr expected = | |
157 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
158 | |
159 EntityPtr restaurant = Entity::New(); | |
160 restaurant->type = "Restaurant"; | |
161 restaurant->properties.push_back( | |
162 createStringProperty("name", "Special characters for ya >_<;")); | |
163 | |
164 expected->entities.push_back(std::move(restaurant)); | |
70 EXPECT_EQ(expected, extracted); | 165 EXPECT_EQ(expected, extracted); |
71 } | 166 } |
72 | 167 |
73 TEST_F(CopylessPasteExtractorTest, header) { | 168 TEST_F(CopylessPasteExtractorTest, header) { |
74 setHtmlInnerHTML( | 169 setHtmlInnerHTML( |
75 "<head>" | 170 "<head>" |
76 "<script type=\"application/ld+json\">" + | 171 "<script type=\"application/ld+json\">" |
77 m_content + | 172 "\n" |
173 "\n" | |
174 "{\"@type\": \"Restaurant\"," | |
175 "\"name\": \"Special characters for ya >_<;\"" | |
176 "}\n" | |
177 "\n" | |
78 "</script>" | 178 "</script>" |
79 "</head>"); | 179 "</head>"); |
80 | 180 |
81 String extracted = extract(); | 181 setURL("http://www.test.com/"); |
82 String expected = "[" + m_content + "]"; | 182 setTitle("My neat website about cool stuff"); |
183 | |
184 WebPagePtr extracted = WebPage::New(); | |
185 ASSERT_TRUE(extract(extracted)); | |
186 | |
187 WebPagePtr expected = | |
188 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
189 | |
190 EntityPtr restaurant = Entity::New(); | |
191 restaurant->type = "Restaurant"; | |
192 restaurant->properties.push_back( | |
193 createStringProperty("name", "Special characters for ya >_<;")); | |
194 | |
195 expected->entities.push_back(std::move(restaurant)); | |
196 EXPECT_EQ(expected, extracted); | |
197 } | |
198 | |
199 TEST_F(CopylessPasteExtractorTest, booleanValue) { | |
200 setHtmlInnerHTML( | |
201 "<body>" | |
202 "<script type=\"application/ld+json\">" | |
203 "\n" | |
204 "\n" | |
205 "{\"@type\": \"Restaurant\"," | |
206 "\"open\": true" | |
207 "}\n" | |
208 "\n" | |
209 "</script>" | |
210 "</body>"); | |
211 setURL("http://www.test.com/"); | |
212 setTitle("My neat website about cool stuff"); | |
213 | |
214 WebPagePtr extracted = WebPage::New(); | |
215 ASSERT_TRUE(extract(extracted)); | |
216 | |
217 WebPagePtr expected = | |
218 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
219 | |
220 EntityPtr restaurant = Entity::New(); | |
221 restaurant->type = "Restaurant"; | |
222 restaurant->properties.push_back(createBooleanProperty("open", true)); | |
223 | |
224 expected->entities.push_back(std::move(restaurant)); | |
225 EXPECT_EQ(expected, extracted); | |
226 } | |
227 | |
228 TEST_F(CopylessPasteExtractorTest, longValue) { | |
229 setHtmlInnerHTML( | |
230 "<body>" | |
231 "<script type=\"application/ld+json\">" | |
232 "\n" | |
233 "\n" | |
234 "{\"@type\": \"Restaurant\"," | |
235 "\"long\": 1" | |
236 "}\n" | |
237 "\n" | |
238 "</script>" | |
239 "</body>"); | |
240 setURL("http://www.test.com/"); | |
241 setTitle("My neat website about cool stuff"); | |
242 | |
243 WebPagePtr extracted = WebPage::New(); | |
244 ASSERT_TRUE(extract(extracted)); | |
245 | |
246 WebPagePtr expected = | |
247 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
248 | |
249 EntityPtr restaurant = Entity::New(); | |
250 restaurant->type = "Restaurant"; | |
251 restaurant->properties.push_back(createLongProperty("long", 1ll)); | |
252 | |
253 expected->entities.push_back(std::move(restaurant)); | |
254 EXPECT_EQ(expected, extracted); | |
255 } | |
256 | |
257 TEST_F(CopylessPasteExtractorTest, doubleValue) { | |
258 setHtmlInnerHTML( | |
259 "<body>" | |
260 "<script type=\"application/ld+json\">" | |
261 "\n" | |
262 "\n" | |
263 "{\"@type\": \"Restaurant\"," | |
264 "\"double\": 1.5" | |
265 "}\n" | |
266 "\n" | |
267 "</script>" | |
268 "</body>"); | |
269 setURL("http://www.test.com/"); | |
270 setTitle("My neat website about cool stuff"); | |
271 | |
272 WebPagePtr extracted = WebPage::New(); | |
273 ASSERT_TRUE(extract(extracted)); | |
274 | |
275 WebPagePtr expected = | |
276 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
277 | |
278 EntityPtr restaurant = Entity::New(); | |
279 restaurant->type = "Restaurant"; | |
280 restaurant->properties.push_back(createStringProperty("double", "1.5")); | |
281 | |
282 expected->entities.push_back(std::move(restaurant)); | |
83 EXPECT_EQ(expected, extracted); | 283 EXPECT_EQ(expected, extracted); |
84 } | 284 } |
85 | 285 |
86 TEST_F(CopylessPasteExtractorTest, multiple) { | 286 TEST_F(CopylessPasteExtractorTest, multiple) { |
87 setHtmlInnerHTML( | 287 setHtmlInnerHTML( |
88 "<head>" | 288 "<head>" |
89 "<script type=\"application/ld+json\">" + | 289 "<script type=\"application/ld+json\">" |
90 m_content + | 290 "\n" |
291 "\n" | |
292 "{\"@type\": \"Restaurant\"," | |
293 "\"name\": \"Special characters for ya >_<;\"" | |
294 "}\n" | |
295 "\n" | |
91 "</script>" | 296 "</script>" |
92 "</head>" | 297 "</head>" |
93 "<body>" | 298 "<body>" |
94 "<script type=\"application/ld+json\">" + | 299 "<script type=\"application/ld+json\">" |
95 m_content + | 300 "\n" |
96 "</script>" | 301 "\n" |
97 "<script type=\"application/ld+json\">" + | 302 "{\"@type\": \"Restaurant\"," |
98 m_content + | 303 "\"name\": \"Special characters for ya >_<;\"" |
99 "</script>" | 304 "}\n" |
100 "</body>"); | 305 "\n" |
101 | 306 "</script>" |
102 String extracted = extract(); | 307 "<script type=\"application/ld+json\">" |
103 String expected = "[" + m_content + "," + m_content + "," + m_content + "]"; | 308 "\n" |
309 "\n" | |
310 "{\"@type\": \"Restaurant\"," | |
311 "\"name\": \"Special characters for ya >_<;\"" | |
312 "}\n" | |
313 "\n" | |
314 "</script>" | |
315 "</body>"); | |
316 | |
317 setURL("http://www.test.com/"); | |
318 setTitle("My neat website about cool stuff"); | |
319 | |
320 WebPagePtr extracted = WebPage::New(); | |
321 ASSERT_TRUE(extract(extracted)); | |
322 | |
323 WebPagePtr expected = | |
324 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
325 | |
326 for (int i = 0; i < 3; ++i) { | |
327 EntityPtr restaurant = Entity::New(); | |
328 restaurant->type = "Restaurant"; | |
329 restaurant->properties.push_back( | |
330 createStringProperty("name", "Special characters for ya >_<;")); | |
331 | |
332 expected->entities.push_back(std::move(restaurant)); | |
333 } | |
334 EXPECT_EQ(expected, extracted); | |
335 } | |
336 | |
337 TEST_F(CopylessPasteExtractorTest, nested) { | |
338 setHtmlInnerHTML( | |
339 "<body>" | |
340 "<script type=\"application/ld+json\">" | |
341 "\n" | |
342 "\n" | |
343 "{\"@type\": \"Restaurant\"," | |
344 "\"name\": \"Ye ol greasy diner\"," | |
345 "\"address\": {" | |
346 "\n" | |
347 " \"streetAddress\": \"123 Big Oak Road\"," | |
348 " \"addressLocality\": \"San Francisco\"" | |
349 " }\n" | |
350 "}\n" | |
351 "\n" | |
352 "</script>" | |
353 "</body>"); | |
354 setURL("http://www.test.com/"); | |
355 setTitle("My neat website about cool stuff"); | |
356 | |
357 WebPagePtr extracted = WebPage::New(); | |
358 ASSERT_TRUE(extract(extracted)); | |
359 | |
360 WebPagePtr expected = | |
361 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
362 | |
363 EntityPtr restaurant = Entity::New(); | |
364 restaurant->type = "Restaurant"; | |
365 restaurant->properties.push_back( | |
366 createStringProperty("name", "Ye ol greasy diner")); | |
367 | |
368 EntityPtr address = Entity::New(); | |
369 address->type = "Thing"; | |
370 address->properties.push_back( | |
371 createStringProperty("streetAddress", "123 Big Oak Road")); | |
372 address->properties.push_back( | |
373 createStringProperty("addressLocality", "San Francisco")); | |
374 | |
375 restaurant->properties.push_back( | |
376 createEntityProperty("address", std::move(address))); | |
377 | |
378 expected->entities.push_back(std::move(restaurant)); | |
379 EXPECT_EQ(expected, extracted); | |
380 } | |
381 | |
382 TEST_F(CopylessPasteExtractorTest, repeated) { | |
383 setHtmlInnerHTML( | |
384 "<body>" | |
385 "<script type=\"application/ld+json\">" | |
386 "\n" | |
387 "\n" | |
388 "{\"@type\": \"Restaurant\"," | |
389 "\"name\": [ \"First name\", \"Second name\" ]" | |
390 "}\n" | |
391 "\n" | |
392 "</script>" | |
393 "</body>"); | |
394 setURL("http://www.test.com/"); | |
395 setTitle("My neat website about cool stuff"); | |
396 | |
397 WebPagePtr extracted = WebPage::New(); | |
398 ASSERT_TRUE(extract(extracted)); | |
399 | |
400 WebPagePtr expected = | |
401 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
402 | |
403 EntityPtr restaurant = Entity::New(); | |
404 restaurant->type = "Restaurant"; | |
405 | |
406 PropertyPtr name = Property::New(); | |
407 name->name = "name"; | |
408 name->values = Values::New(); | |
409 Vector<String> nameValues; | |
410 nameValues.push_back("First name"); | |
411 nameValues.push_back("Second name"); | |
412 name->values->set_string_values(nameValues); | |
413 | |
414 restaurant->properties.push_back(std::move(name)); | |
415 | |
416 expected->entities.push_back(std::move(restaurant)); | |
417 | |
418 EXPECT_EQ(expected, extracted); | |
419 } | |
420 | |
421 TEST_F(CopylessPasteExtractorTest, repeatedObject) { | |
422 setHtmlInnerHTML( | |
423 "<body>" | |
424 "<script type=\"application/ld+json\">" | |
425 "\n" | |
426 "\n" | |
427 "{\"@type\": \"Restaurant\"," | |
428 "\"name\": \"Ye ol greasy diner\"," | |
429 "\"address\": [" | |
430 "\n" | |
431 " {" | |
432 " \"streetAddress\": \"123 Big Oak Road\"," | |
433 " \"addressLocality\": \"San Francisco\"" | |
434 " },\n" | |
435 " {" | |
436 " \"streetAddress\": \"123 Big Oak Road\"," | |
437 " \"addressLocality\": \"San Francisco\"" | |
438 " }\n" | |
439 "]\n" | |
440 "}\n" | |
441 "\n" | |
442 "</script>" | |
443 "</body>"); | |
444 setURL("http://www.test.com/"); | |
445 setTitle("My neat website about cool stuff"); | |
446 | |
447 WebPagePtr extracted = WebPage::New(); | |
448 ASSERT_TRUE(extract(extracted)); | |
449 | |
450 WebPagePtr expected = | |
451 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
452 | |
453 EntityPtr restaurant = Entity::New(); | |
454 restaurant->type = "Restaurant"; | |
455 restaurant->properties.push_back( | |
456 createStringProperty("name", "Ye ol greasy diner")); | |
457 | |
458 PropertyPtr addressProperty = Property::New(); | |
459 addressProperty->name = "address"; | |
460 addressProperty->values = Values::New(); | |
461 addressProperty->values->set_entity_values(Vector<EntityPtr>()); | |
462 for (int i = 0; i < 2; ++i) { | |
463 EntityPtr address = Entity::New(); | |
464 address->type = "Thing"; | |
465 address->properties.push_back( | |
466 createStringProperty("streetAddress", "123 Big Oak Road")); | |
467 address->properties.push_back( | |
468 createStringProperty("addressLocality", "San Francisco")); | |
469 addressProperty->values->get_entity_values().push_back(std::move(address)); | |
470 } | |
471 restaurant->properties.push_back(std::move(addressProperty)); | |
472 | |
473 expected->entities.push_back(std::move(restaurant)); | |
474 EXPECT_EQ(expected, extracted); | |
475 } | |
476 | |
477 TEST_F(CopylessPasteExtractorTest, truncateLongString) { | |
478 String maxLengthString; | |
479 for (int i = 0; i < 200; ++i) { | |
480 maxLengthString.append("a"); | |
481 } | |
482 String tooLongString(maxLengthString); | |
483 tooLongString.append("a"); | |
484 setHtmlInnerHTML( | |
485 "<body>" | |
486 "<script type=\"application/ld+json\">" | |
487 "\n" | |
488 "\n" | |
489 "{\"@type\": \"Restaurant\"," | |
490 "\"name\": \"" + | |
491 tooLongString + | |
492 "\"" | |
493 "}\n" | |
494 "\n" | |
495 "</script>" | |
496 "</body>"); | |
497 setURL("http://www.test.com/"); | |
498 setTitle("My neat website about cool stuff"); | |
499 | |
500 WebPagePtr extracted = WebPage::New(); | |
501 ASSERT_TRUE(extract(extracted)); | |
502 | |
503 WebPagePtr expected = | |
504 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
505 | |
506 EntityPtr restaurant = Entity::New(); | |
507 restaurant->type = "Restaurant"; | |
508 restaurant->properties.push_back( | |
509 createStringProperty("name", maxLengthString)); | |
510 | |
511 expected->entities.push_back(std::move(restaurant)); | |
512 EXPECT_EQ(expected, extracted); | |
513 } | |
514 | |
515 TEST_F(CopylessPasteExtractorTest, enforceTypeExists) { | |
516 setHtmlInnerHTML( | |
517 "<body>" | |
518 "<script type=\"application/ld+json\">" | |
519 "\n" | |
520 "\n" | |
521 "{\"name\": \"Special characters for ya >_<;\"" | |
522 "}\n" | |
523 "\n" | |
524 "</script>" | |
525 "</body>"); | |
526 setURL("http://www.test.com/"); | |
527 setTitle("My neat website about cool stuff"); | |
528 | |
529 WebPagePtr extracted = WebPage::New(); | |
530 ASSERT_FALSE(extract(extracted)); | |
531 ASSERT_TRUE(extracted.is_null()); | |
532 } | |
533 | |
534 TEST_F(CopylessPasteExtractorTest, enforceTypeWhitelist) { | |
535 setHtmlInnerHTML( | |
536 "<body>" | |
537 "<script type=\"application/ld+json\">" | |
538 "\n" | |
539 "\n" | |
540 "{\"@type\": \"UnsupportedType\"," | |
541 "\"name\": \"Special characters for ya >_<;\"" | |
542 "}\n" | |
543 "\n" | |
544 "</script>" | |
545 "</body>"); | |
546 setURL("http://www.test.com/"); | |
547 setTitle("My neat website about cool stuff"); | |
548 | |
549 WebPagePtr extracted = WebPage::New(); | |
550 ASSERT_FALSE(extract(extracted)); | |
551 ASSERT_TRUE(extracted.is_null()); | |
552 } | |
553 | |
554 TEST_F(CopylessPasteExtractorTest, truncateTooManyValuesInField) { | |
555 String largeRepeatedField = "["; | |
556 for (int i = 0; i < 101; ++i) { | |
557 largeRepeatedField.append("\"a\""); | |
558 if (i != 100) { | |
559 largeRepeatedField.append(", "); | |
560 } | |
561 } | |
562 largeRepeatedField.append("]"); | |
563 setHtmlInnerHTML( | |
564 "<body>" | |
565 "<script type=\"application/ld+json\">" | |
566 "\n" | |
567 "\n" | |
568 "{\"@type\": \"Restaurant\"," | |
569 "\"name\": " + | |
570 largeRepeatedField + | |
571 "}\n" | |
572 "\n" | |
573 "</script>" | |
574 "</body>"); | |
575 setURL("http://www.test.com/"); | |
576 setTitle("My neat website about cool stuff"); | |
577 | |
578 WebPagePtr extracted = WebPage::New(); | |
579 ASSERT_TRUE(extract(extracted)); | |
580 | |
581 WebPagePtr expected = | |
582 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
583 | |
584 EntityPtr restaurant = Entity::New(); | |
585 restaurant->type = "Restaurant"; | |
586 | |
587 PropertyPtr name = Property::New(); | |
588 name->name = "name"; | |
589 name->values = Values::New(); | |
590 Vector<String> nameValues; | |
591 for (int i = 0; i < 100; ++i) { | |
592 nameValues.push_back("a"); | |
593 } | |
594 name->values->set_string_values(nameValues); | |
595 | |
596 restaurant->properties.push_back(std::move(name)); | |
597 | |
598 expected->entities.push_back(std::move(restaurant)); | |
599 | |
600 EXPECT_EQ(expected, extracted); | |
601 } | |
602 | |
603 TEST_F(CopylessPasteExtractorTest, truncateTooManyFields) { | |
604 String tooManyFields; | |
605 for (int i = 0; i < 20; ++i) { | |
606 tooManyFields.append(String::format("\"%d\": \"a\"", i)); | |
607 if (i != 19) { | |
608 tooManyFields.append(",\n"); | |
609 } | |
610 } | |
611 setHtmlInnerHTML( | |
612 "<body>" | |
613 "<script type=\"application/ld+json\">" | |
614 "\n" | |
615 "\n" | |
616 "{\"@type\": \"Restaurant\"," + | |
617 tooManyFields + | |
618 "}\n" | |
619 "\n" | |
620 "</script>" | |
621 "</body>"); | |
622 setURL("http://www.test.com/"); | |
623 setTitle("My neat website about cool stuff"); | |
624 | |
625 WebPagePtr extracted = WebPage::New(); | |
626 ASSERT_TRUE(extract(extracted)); | |
627 | |
628 WebPagePtr expected = | |
629 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
630 | |
631 EntityPtr restaurant = Entity::New(); | |
632 restaurant->type = "Restaurant"; | |
633 | |
634 for (int i = 0; i < 19; ++i) { | |
635 restaurant->properties.push_back( | |
636 createStringProperty(String::number(i), "a")); | |
637 } | |
638 | |
639 expected->entities.push_back(std::move(restaurant)); | |
640 EXPECT_EQ(expected, extracted); | |
641 } | |
642 | |
643 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithEmptyArray) { | |
644 setHtmlInnerHTML( | |
645 "<body>" | |
646 "<script type=\"application/ld+json\">" | |
647 "\n" | |
648 "\n" | |
649 "{\"@type\": \"Restaurant\"," | |
650 "\"name\": []" | |
651 "}\n" | |
652 "\n" | |
653 "</script>" | |
654 "</body>"); | |
655 setURL("http://www.test.com/"); | |
656 setTitle("My neat website about cool stuff"); | |
657 | |
658 WebPagePtr extracted = WebPage::New(); | |
659 ASSERT_TRUE(extract(extracted)); | |
660 | |
661 WebPagePtr expected = | |
662 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
663 | |
664 EntityPtr restaurant = Entity::New(); | |
665 restaurant->type = "Restaurant"; | |
666 | |
667 expected->entities.push_back(std::move(restaurant)); | |
668 | |
669 EXPECT_EQ(expected, extracted); | |
670 } | |
671 | |
672 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithMixedTypes) { | |
673 setHtmlInnerHTML( | |
674 "<body>" | |
675 "<script type=\"application/ld+json\">" | |
676 "\n" | |
677 "\n" | |
678 "{\"@type\": \"Restaurant\"," | |
679 "\"name\": [ \"Name\", 1 ]" | |
680 "}\n" | |
681 "\n" | |
682 "</script>" | |
683 "</body>"); | |
684 setURL("http://www.test.com/"); | |
685 setTitle("My neat website about cool stuff"); | |
686 | |
687 WebPagePtr extracted = WebPage::New(); | |
688 ASSERT_TRUE(extract(extracted)); | |
689 | |
690 WebPagePtr expected = | |
691 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
692 | |
693 EntityPtr restaurant = Entity::New(); | |
694 restaurant->type = "Restaurant"; | |
695 | |
696 expected->entities.push_back(std::move(restaurant)); | |
697 | |
698 EXPECT_EQ(expected, extracted); | |
699 } | |
700 | |
701 TEST_F(CopylessPasteExtractorTest, ignorePropertyWithNestedArray) { | |
702 setHtmlInnerHTML( | |
703 "<body>" | |
704 "<script type=\"application/ld+json\">" | |
705 "\n" | |
706 "\n" | |
707 "{\"@type\": \"Restaurant\"," | |
708 "\"name\": [ [ \"Name\" ] ]" | |
709 "}\n" | |
710 "\n" | |
711 "</script>" | |
712 "</body>"); | |
713 setURL("http://www.test.com/"); | |
714 setTitle("My neat website about cool stuff"); | |
715 | |
716 WebPagePtr extracted = WebPage::New(); | |
717 ASSERT_TRUE(extract(extracted)); | |
718 | |
719 WebPagePtr expected = | |
720 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
721 | |
722 EntityPtr restaurant = Entity::New(); | |
723 restaurant->type = "Restaurant"; | |
724 | |
725 expected->entities.push_back(std::move(restaurant)); | |
726 | |
727 EXPECT_EQ(expected, extracted); | |
728 } | |
729 | |
730 TEST_F(CopylessPasteExtractorTest, enforceMaxNestingDepth) { | |
731 setHtmlInnerHTML( | |
732 "<body>" | |
733 "<script type=\"application/ld+json\">" | |
734 "\n" | |
735 "\n" | |
736 "{\"@type\": \"Restaurant\"," | |
737 "\"name\": \"Ye ol greasy diner\"," | |
738 "\"1\": {" | |
739 " \"2\": {" | |
740 " \"3\": {" | |
741 " \"4\": {" | |
742 " \"5\": 6" | |
743 " }\n" | |
744 " }\n" | |
745 " }\n" | |
746 "}\n" | |
747 "}\n" | |
748 "\n" | |
749 "</script>" | |
750 "</body>"); | |
751 setURL("http://www.test.com/"); | |
752 setTitle("My neat website about cool stuff"); | |
753 | |
754 WebPagePtr extracted = WebPage::New(); | |
755 ASSERT_TRUE(extract(extracted)); | |
756 | |
757 WebPagePtr expected = | |
758 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
759 | |
760 EntityPtr restaurant = Entity::New(); | |
761 restaurant->type = "Restaurant"; | |
762 restaurant->properties.push_back( | |
763 createStringProperty("name", "Ye ol greasy diner")); | |
764 | |
765 EntityPtr entity1 = Entity::New(); | |
766 entity1->type = "Thing"; | |
767 | |
768 EntityPtr entity2 = Entity::New(); | |
769 entity2->type = "Thing"; | |
770 | |
771 EntityPtr entity3 = Entity::New(); | |
772 entity3->type = "Thing"; | |
773 | |
774 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); | |
775 | |
776 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); | |
777 | |
778 restaurant->properties.push_back( | |
779 createEntityProperty("1", std::move(entity1))); | |
780 | |
781 expected->entities.push_back(std::move(restaurant)); | |
782 EXPECT_EQ(expected, extracted); | |
783 } | |
784 | |
785 TEST_F(CopylessPasteExtractorTest, maxNestingDepthWithTerminalProperty) { | |
786 setHtmlInnerHTML( | |
787 "<body>" | |
788 "<script type=\"application/ld+json\">" | |
789 "\n" | |
790 "\n" | |
791 "{\"@type\": \"Restaurant\"," | |
792 "\"name\": \"Ye ol greasy diner\"," | |
793 "\"1\": {" | |
794 " \"2\": {" | |
795 " \"3\": {" | |
796 " \"4\": 5" | |
797 " }\n" | |
798 " }\n" | |
799 "}\n" | |
800 "}\n" | |
801 "\n" | |
802 "</script>" | |
803 "</body>"); | |
804 setURL("http://www.test.com/"); | |
805 setTitle("My neat website about cool stuff"); | |
806 | |
807 WebPagePtr extracted = WebPage::New(); | |
808 ASSERT_TRUE(extract(extracted)); | |
809 | |
810 WebPagePtr expected = | |
811 createWebPage("http://www.test.com/", "My neat website about cool stuff"); | |
812 | |
813 EntityPtr restaurant = Entity::New(); | |
814 restaurant->type = "Restaurant"; | |
815 restaurant->properties.push_back( | |
816 createStringProperty("name", "Ye ol greasy diner")); | |
817 | |
818 EntityPtr entity1 = Entity::New(); | |
819 entity1->type = "Thing"; | |
820 | |
821 EntityPtr entity2 = Entity::New(); | |
822 entity2->type = "Thing"; | |
823 | |
824 EntityPtr entity3 = Entity::New(); | |
825 entity3->type = "Thing"; | |
826 | |
827 entity3->properties.push_back(createLongProperty("4", 5)); | |
828 | |
829 entity2->properties.push_back(createEntityProperty("3", std::move(entity3))); | |
830 | |
831 entity1->properties.push_back(createEntityProperty("2", std::move(entity2))); | |
832 | |
833 restaurant->properties.push_back( | |
834 createEntityProperty("1", std::move(entity1))); | |
835 | |
836 expected->entities.push_back(std::move(restaurant)); | |
104 EXPECT_EQ(expected, extracted); | 837 EXPECT_EQ(expected, extracted); |
105 } | 838 } |
106 | 839 |
107 } // namespace | 840 } // namespace |
108 | |
109 } // namespace blink | 841 } // namespace blink |
OLD | NEW |