Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(578)

Side by Side Diff: test/com/dom_distiller/client/SchemaOrgParserTest.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: addressed all comments Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package com.dom_distiller.client;
6
7 import com.google.gwt.dom.client.Document;
8 import com.google.gwt.dom.client.Element;
9 import com.google.gwt.dom.client.ImageElement;
10 import com.google.gwt.dom.client.MetaElement;
11 import com.google.gwt.dom.client.NodeList;
12
13 import com.google.gwt.junit.client.GWTTestCase;
14
15 public class SchemaOrgParserTest extends GWTTestCase {
16 @Override
17 public String getModuleName() {
18 return "com.dom_distiller.DomDistillerJUnit";
19 }
20
21 public void testImageWithEmbeddedPublisher() {
22 Element rootDiv = TestUtil.createDiv(0);
23 setItemScopeAndType(rootDiv, "ImageObject");
24 mBody.appendChild(rootDiv);
25
26 String expectedTitle = "Testcase for IMAGE";
27 Element h = TestUtil.createHeading(1, expectedTitle);
28 setItemProp(h, "headline");
29 rootDiv.appendChild(h);
30
31 String expectedDescription = "Testing IMAGE with embedded publisher";
32 h = TestUtil.createHeading(2, expectedDescription);
33 setItemProp(h, "description");
cjhopman 2014/04/18 01:17:01 Note: the following comment applies to all of thes
kuan 2014/04/18 23:34:38 Done. i changed to using html string and setInner
34 rootDiv.appendChild(h);
35
36 // This should extract the "href" attribute of the <a> tag.
37 String expectedUrl = "http://test_image_with_embedded_item.html";
38 Element link = TestUtil.createAnchor(expectedUrl, "test results");
39 setItemProp(link, "contentUrl");
40 rootDiv.appendChild(link);
41
42 Element div = TestUtil.createDiv(1);
43 setItemProp(div, "publisher");
44 setItemScopeAndType(div, "Organization");
45 div.appendChild(TestUtil.createText("Publisher: "));
46 String expectedPublisher = "Whatever Image Incorporated";
47 Element span = TestUtil.createSpan(expectedPublisher);
48 setItemProp(span, "name");
49 div.appendChild(span);
50 rootDiv.appendChild(div);
51
52 div = TestUtil.createDiv(2);
53 String expectedCopyrightYear = "1999-2022";
54 span = TestUtil.createSpan(expectedCopyrightYear);
55 setItemProp(span, "copyrightYear");
56 div.appendChild(span);
57
58 String expectedCopyrightHolder = "Whoever Image Copyrighted";
59 span = TestUtil.createSpan(expectedCopyrightHolder);
60 setItemProp(span, "copyrightHolder");
61 div.appendChild(span);
62 rootDiv.appendChild(div);
63
64 String expectedFormat = "jpeg";
65 span = TestUtil.createSpan(expectedFormat);
66 setItemProp(span, "encodingFormat");
67 rootDiv.appendChild(span);
68
69 String expectedCaption = "A test for IMAGE with embedded publisher";
70 span = TestUtil.createSpan(expectedCaption);
71 setItemProp(span, "caption");
72 rootDiv.appendChild(span);
73
74 // This should extract the "content" attribute of the <meta> tag.
75 Element meta = TestUtil.createMetaName("no_name", "true");
76 setItemProp(meta, "representativeOfPage");
77 rootDiv.appendChild(meta);
78
79 meta = TestUtil.createMetaName("no_name", "600");
80 setItemProp(meta, "width");
81 rootDiv.appendChild(meta);
82
83 meta = TestUtil.createMetaName("no_name", "400");
84 setItemProp(meta, "height");
85 rootDiv.appendChild(meta);
86
87 SchemaOrgParser parser = new SchemaOrgParser(mRoot);
88 assertEquals("IMAGE", parser.getType());
89 assertEquals(expectedTitle, parser.getTitle());
90 assertEquals(expectedDescription, parser.getDescription());
91 assertEquals("", parser.getUrl());
92 assertEquals(expectedPublisher, parser.getPublisher());
93 assertEquals(null, parser.getArticle());
94 assertEquals("", parser.getAuthor());
95 assertEquals(
96 "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder ,
97 parser.getCopyright());
98 MarkupParser.Image[] images = parser.getImages();
99 assertEquals(1, images.length);
100 MarkupParser.Image image = images[0];
101 assertEquals(expectedUrl, image.image);
102 assertEquals(expectedUrl, image.url);
103 assertEquals(null, image.secureUrl);
104 assertEquals(expectedFormat, image.type);
105 assertEquals(expectedCaption, image.caption);
106 assertEquals(600, image.width);
107 assertEquals(400, image.height);
108 }
109
110 public void test2Images() {
111 Element rootDiv = TestUtil.createDiv(0);
112 setItemScopeAndType(rootDiv, "ImageObject");
113 mBody.appendChild(rootDiv);
114
115 String expectedTitle1 = "Testcase for 1st IMAGE";
116 Element h = TestUtil.createHeading(1, expectedTitle1);
117 setItemProp(h, "headline");
118 rootDiv.appendChild(h);
119
120 String expectedDescription1 = "Testing 1st IMAGE";
121 h = TestUtil.createHeading(2, expectedDescription1);
122 setItemProp(h, "description");
123 rootDiv.appendChild(h);
124
125 // This should extract the "href" attribute of the <a> tag.
126 String expectedUrl1 = "http://test_1st image.html";
127 Element link = TestUtil.createAnchor(expectedUrl1, "1st test results");
128 setItemProp(link, "contentUrl");
129 rootDiv.appendChild(link);
130
131 String expectedPublisher1 = "Whatever 1st Image Incorporated";
132 Element div = TestUtil.createDiv(1);
133 setItemProp(div, "publisher");
134 div.setInnerHTML(expectedPublisher1);
135 rootDiv.appendChild(div);
136
137 div = TestUtil.createDiv(2);
138 String expectedCopyrightYear1 = "1000-1999";
139 Element span = TestUtil.createSpan(expectedCopyrightYear1);
140 setItemProp(span, "copyrightYear");
141 div.appendChild(span);
142
143 String expectedCopyrightHolder1 = "Whoever 1st Image Copyrighted";
144 span = TestUtil.createSpan(expectedCopyrightHolder1);
145 setItemProp(span, "copyrightHolder");
146 div.appendChild(span);
147 rootDiv.appendChild(div);
148
149 String expectedFormat1 = "jpeg";
150 span = TestUtil.createSpan(expectedFormat1);
151 setItemProp(span, "encodingFormat");
152 rootDiv.appendChild(span);
153
154 String expectedCaption1 = "A test for 1st IMAGE";
155 span = TestUtil.createSpan(expectedCaption1);
156 setItemProp(span, "caption");
157 rootDiv.appendChild(span);
158
159 // This should extract the "content" attribute of the <meta> tag.
160 Element meta = TestUtil.createMetaName("no_name", "false");
161 setItemProp(meta, "representativeOfPage");
162 rootDiv.appendChild(meta);
163
164 meta = TestUtil.createMetaName("no_name", "400");
165 setItemProp(meta, "width");
166 rootDiv.appendChild(meta);
167
168 meta = TestUtil.createMetaName("no_name", "300");
169 setItemProp(meta, "height");
170 rootDiv.appendChild(meta);
171
172 rootDiv = TestUtil.createDiv(10);
173 setItemScopeAndType(rootDiv, "ImageObject");
174 mBody.appendChild(rootDiv);
175
176 String expectedTitle2 = "Testcase for 2nd IMAGE";
177 h = TestUtil.createHeading(2, expectedTitle2);
178 setItemProp(h, "headline");
179 rootDiv.appendChild(h);
180
181 String expectedDescription2 = "Testing 2nd IMAGE";
182 h = TestUtil.createHeading(2, expectedDescription2);
183 setItemProp(h, "description");
184 rootDiv.appendChild(h);
185
186 // This should extract the "href" attribute of the <a> tag.
187 String expectedUrl2 = "http://test_2nd mage.html";
188 link = TestUtil.createAnchor(expectedUrl2, "2nd test results");
189 setItemProp(link, "contentUrl");
190 rootDiv.appendChild(link);
191
192 String expectedPublisher2 = "Whatever 2nd Image Incorporated";
193 div = TestUtil.createDiv(11);
194 setItemProp(div, "publisher");
195 div.setInnerHTML(expectedPublisher2);
196 rootDiv.appendChild(div);
197
198 div = TestUtil.createDiv(12);
199 String expectedCopyrightYear2 = "2000-2999";
200 span = TestUtil.createSpan(expectedCopyrightYear2);
201 setItemProp(span, "copyrightYear");
202 div.appendChild(span);
203
204 String expectedCopyrightHolder2 = "Whoever 2nd Image Copyrighted";
205 span = TestUtil.createSpan(expectedCopyrightHolder2);
206 setItemProp(span, "copyrightHolder");
207 div.appendChild(span);
208 rootDiv.appendChild(div);
209
210 String expectedFormat2 = "gif";
211 span = TestUtil.createSpan(expectedFormat2);
212 setItemProp(span, "encodingFormat");
213 rootDiv.appendChild(span);
214
215 String expectedCaption2 = "A test for 2nd IMAGE";
216 span = TestUtil.createSpan(expectedCaption2);
217 setItemProp(span, "caption");
218 rootDiv.appendChild(span);
219
220 // This should extract the "content" attribute of the <meta> tag.
221 meta = TestUtil.createMetaName("no_name", "true");
222 setItemProp(meta, "representativeOfPage");
223 rootDiv.appendChild(meta);
224
225 meta = TestUtil.createMetaName("no_name", "1000");
226 setItemProp(meta, "width");
227 rootDiv.appendChild(meta);
228
229 meta = TestUtil.createMetaName("no_name", "600");
230 setItemProp(meta, "height");
231 rootDiv.appendChild(meta);
232
233 SchemaOrgParser parser = new SchemaOrgParser(mRoot);
234 // The basic properties of Thing should be from the first image that was
235 // inserted.
236 assertEquals("IMAGE", parser.getType());
237 assertEquals(expectedTitle1, parser.getTitle());
238 assertEquals(expectedDescription1, parser.getDescription());
239 assertEquals("", parser.getUrl());
240 assertEquals(expectedPublisher1, parser.getPublisher());
241 assertEquals(null, parser.getArticle());
242 assertEquals("", parser.getAuthor());
243 assertEquals("Copyright " + expectedCopyrightYear1 + " " + expectedCopyr ightHolder1,
244 parser.getCopyright());
245
246 MarkupParser.Image[] images = parser.getImages();
247 assertEquals(2, images.length);
248 // The 2nd image that was inserted is representative of page, so the
249 // images should be swapped in |images|.
250 MarkupParser.Image image = images[0];
251 assertEquals(expectedUrl2, image.image);
252 assertEquals(expectedUrl2, image.url);
253 assertEquals(null, image.secureUrl);
254 assertEquals(expectedFormat2, image.type);
255 assertEquals(expectedCaption2, image.caption);
256 assertEquals(1000, image.width);
257 assertEquals(600, image.height);
258 image = images[1];
259 assertEquals(expectedUrl1, image.image);
260 assertEquals(expectedUrl1, image.url);
261 assertEquals(null, image.secureUrl);
262 assertEquals(expectedFormat1, image.type);
263 assertEquals(expectedCaption1, image.caption);
264 assertEquals(400, image.width);
265 assertEquals(300, image.height);
266 }
267
268 public void testArticleWithEmbeddedAuthorAndPublisher() {
269 Element rootDiv = TestUtil.createDiv(0);
270 setItemScopeAndType(rootDiv, "Article");
271 mBody.appendChild(rootDiv);
272
273 String expectedTitle = "Testcase for ARTICLE";
274 Element h = TestUtil.createHeading(1, expectedTitle);
275 setItemProp(h, "headline");
276 rootDiv.appendChild(h);
277
278 String expectedDescription = "Testing ARTICLE with embedded author and p ublisher";
279 h = TestUtil.createHeading(2, expectedDescription);
280 setItemProp(h, "description");
281 rootDiv.appendChild(h);
282
283 // This should extract the "href" attribute of the <a> tag.
284 String expectedUrl = "http://test_article_with_embedded_items.html";
285 Element link = TestUtil.createAnchor(expectedUrl, "test results");
286 setItemProp(link, "url");
287 rootDiv.appendChild(link);
288
289 // This should extract the "src" attribute of the <image> tag.
290 String expectedImage = "http://test_article_with_embedded_items.jpeg";
291 ImageElement image = TestUtil.createImage();
292 image.setSrc(expectedImage);
293 setItemProp(image, "image");
294 rootDiv.appendChild(image);
295
296 Element div = TestUtil.createDiv(1);
297 setItemProp(div, "author");
298 setItemScopeAndType(div, "Person");
299 div.appendChild(TestUtil.createText("Author: "));
300 String expectedAuthor = "Whoever authored";
301 Element span = TestUtil.createSpan(expectedAuthor);
302 setItemProp(span, "name");
303 div.appendChild(span);
304 rootDiv.appendChild(div);
305
306 div = TestUtil.createDiv(2);
307 setItemProp(div, "publisher");
308 setItemScopeAndType(div, "Organization");
309 div.appendChild(TestUtil.createText("Publisher: "));
310 String expectedPublisher = "Whatever Article Incorporated";
311 span = TestUtil.createSpan(expectedPublisher);
312 setItemProp(span, "name");
313 div.appendChild(span);
314 rootDiv.appendChild(div);
315
316 String expectedDatePublished = "April 15, 2014";
317 span = TestUtil.createSpan(expectedDatePublished);
318 setItemProp(span, "datePublished");
319 rootDiv.appendChild(span);
320
321 // This should extract the "datetime" attribute of the <time> tag.
322 String expectedTimeModified = "2014-04-16T23:59";
323 Element time = Document.get().createElement("time");
324 time.setInnerHTML("April 16, 2014 11:59pm");
325 time.setAttribute("datetime", expectedTimeModified);
326 setItemProp(time, "dateModified");
327 rootDiv.appendChild(time);
328
329 String expectedCopyrightYear = "2000-2014";
330 span = TestUtil.createSpan(expectedCopyrightYear);
331 setItemProp(span, "copyrightYear");
332 rootDiv.appendChild(span);
333
334 String expectedCopyrightHolder = "Whoever Article Copyrighted";
335 span = TestUtil.createSpan(expectedCopyrightHolder);
336 setItemProp(span, "copyrightHolder");
337 rootDiv.appendChild(span);
338
339 String expectedSection = "Romance thriller";
340 span = TestUtil.createSpan(expectedSection);
341 setItemProp(span, "articleSection");
342 rootDiv.appendChild(span);
343
344 SchemaOrgParser parser = new SchemaOrgParser(mRoot);
345 assertEquals("ARTICLE", parser.getType());
346 assertEquals(expectedTitle, parser.getTitle());
347 assertEquals(expectedDescription, parser.getDescription());
348 assertEquals(expectedUrl, parser.getUrl());
349 assertEquals(expectedAuthor, parser.getAuthor());
350 assertEquals(expectedPublisher, parser.getPublisher());
351 assertEquals(
352 "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder ,
353 parser.getCopyright());
354 MarkupParser.Image[] images = parser.getImages();
355 assertEquals(1, images.length);
356 assertEquals(expectedImage, images[0].image);
357 assertEquals(expectedImage, images[0].url);
358 MarkupParser.Article article = parser.getArticle();
359 assertEquals(expectedDatePublished, article.publishedTime);
360 assertEquals(expectedTimeModified, article.modifiedTime);
361 assertEquals(null, article.expirationTime);
362 assertEquals(expectedSection, article.section);
363 assertEquals(1, article.authors.length);
364 assertEquals(expectedAuthor, article.authors[0]);
365 }
366
367 public void testItemscopeInHTMLTag() {
368 setItemScopeAndType(mRoot, "Article");
369
370 String expectedTitle = "Testcase for ItemScope in HTML tag";
371 Element h = TestUtil.createHeading(1, expectedTitle);
372 setItemProp(h, "headline");
373 mBody.appendChild(h);
374
375 SchemaOrgParser parser = new SchemaOrgParser(mRoot);
376 assertEquals("ARTICLE", parser.getType());
377 assertEquals(expectedTitle, parser.getTitle());
378 assertTrue(parser.getArticle() != null);
379
380 // Remove "itemscope" and "itemtype" attributes in <html> tag, so that
381 // other testcases won't be affected.
382 mRoot.removeAttribute("ITEMSCOPE");
383 mRoot.removeAttribute("ITEMTYPE");
384 }
385
386 @Override
387 protected void gwtSetUp() throws Exception {
388 // Get root element.
389 mRoot = Document.get().getDocumentElement();
390
391 // Get <body> element.
392 NodeList<Element> bodies = mRoot.getElementsByTagName("BODY");
393 if (bodies.getLength() != 1)
394 throw new Exception("There shouldn't be more than 1 <body> tag");
395 mBody = bodies.getItem(0);
396
397 // Remove all meta tags, otherwise a testcase may run with the meta tags
398 // set up in a previous testcase, resulting in unexpected results.
399 NodeList<Element> allMeta = mRoot.getElementsByTagName("META");
400 for (int i = allMeta.getLength() - 1; i >= 0; i--) {
401 allMeta.getItem(i).removeFromParent();
402 }
403
404 // Remove all div tags, otherwise a testcase may run with the div tags
405 // set up in a previous testcase, resulting in unexpected results.
406 NodeList<Element> allDiv = mRoot.getElementsByTagName("DIV");
407 for (int i = allDiv.getLength() - 1; i >= 0; i--) {
408 allDiv.getItem(i).removeFromParent();
409 }
410 }
411
412 private void setItemScopeAndType(Element e, String type) {
413 e.setAttribute("ITEMSCOPE", "");
414 e.setAttribute("ITEMTYPE", "http://schema.org/" + type);
415 }
416
417 private void setItemProp(Element e, String name) {
418 e.setAttribute("itemprop", name);
419 }
420
421 private Element mRoot;
422 private Element mBody;
423 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698