Chromium Code Reviews| Index: test/com/dom_distiller/client/SchemaOrgParserTest.java |
| diff --git a/test/com/dom_distiller/client/SchemaOrgParserTest.java b/test/com/dom_distiller/client/SchemaOrgParserTest.java |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..f3f5470b78bdb466100aa2a75d221ed8541a8a80 |
| --- /dev/null |
| +++ b/test/com/dom_distiller/client/SchemaOrgParserTest.java |
| @@ -0,0 +1,423 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +package com.dom_distiller.client; |
| + |
| +import com.google.gwt.dom.client.Document; |
| +import com.google.gwt.dom.client.Element; |
| +import com.google.gwt.dom.client.ImageElement; |
| +import com.google.gwt.dom.client.MetaElement; |
| +import com.google.gwt.dom.client.NodeList; |
| + |
| +import com.google.gwt.junit.client.GWTTestCase; |
| + |
| +public class SchemaOrgParserTest extends GWTTestCase { |
| + @Override |
| + public String getModuleName() { |
| + return "com.dom_distiller.DomDistillerJUnit"; |
| + } |
| + |
| + public void testImageWithEmbeddedPublisher() { |
| + Element rootDiv = TestUtil.createDiv(0); |
| + setItemScopeAndType(rootDiv, "ImageObject"); |
| + mBody.appendChild(rootDiv); |
| + |
| + String expectedTitle = "Testcase for IMAGE"; |
| + Element h = TestUtil.createHeading(1, expectedTitle); |
| + setItemProp(h, "headline"); |
| + rootDiv.appendChild(h); |
| + |
| + String expectedDescription = "Testing IMAGE with embedded publisher"; |
| + h = TestUtil.createHeading(2, expectedDescription); |
| + setItemProp(h, "description"); |
|
cjhopman
2014/04/18 01:17:01
Note: the following comment applies to all of thes
kuan
2014/04/18 23:34:38
Done. i changed to using html string and setInner
|
| + rootDiv.appendChild(h); |
| + |
| + // This should extract the "href" attribute of the <a> tag. |
| + String expectedUrl = "http://test_image_with_embedded_item.html"; |
| + Element link = TestUtil.createAnchor(expectedUrl, "test results"); |
| + setItemProp(link, "contentUrl"); |
| + rootDiv.appendChild(link); |
| + |
| + Element div = TestUtil.createDiv(1); |
| + setItemProp(div, "publisher"); |
| + setItemScopeAndType(div, "Organization"); |
| + div.appendChild(TestUtil.createText("Publisher: ")); |
| + String expectedPublisher = "Whatever Image Incorporated"; |
| + Element span = TestUtil.createSpan(expectedPublisher); |
| + setItemProp(span, "name"); |
| + div.appendChild(span); |
| + rootDiv.appendChild(div); |
| + |
| + div = TestUtil.createDiv(2); |
| + String expectedCopyrightYear = "1999-2022"; |
| + span = TestUtil.createSpan(expectedCopyrightYear); |
| + setItemProp(span, "copyrightYear"); |
| + div.appendChild(span); |
| + |
| + String expectedCopyrightHolder = "Whoever Image Copyrighted"; |
| + span = TestUtil.createSpan(expectedCopyrightHolder); |
| + setItemProp(span, "copyrightHolder"); |
| + div.appendChild(span); |
| + rootDiv.appendChild(div); |
| + |
| + String expectedFormat = "jpeg"; |
| + span = TestUtil.createSpan(expectedFormat); |
| + setItemProp(span, "encodingFormat"); |
| + rootDiv.appendChild(span); |
| + |
| + String expectedCaption = "A test for IMAGE with embedded publisher"; |
| + span = TestUtil.createSpan(expectedCaption); |
| + setItemProp(span, "caption"); |
| + rootDiv.appendChild(span); |
| + |
| + // This should extract the "content" attribute of the <meta> tag. |
| + Element meta = TestUtil.createMetaName("no_name", "true"); |
| + setItemProp(meta, "representativeOfPage"); |
| + rootDiv.appendChild(meta); |
| + |
| + meta = TestUtil.createMetaName("no_name", "600"); |
| + setItemProp(meta, "width"); |
| + rootDiv.appendChild(meta); |
| + |
| + meta = TestUtil.createMetaName("no_name", "400"); |
| + setItemProp(meta, "height"); |
| + rootDiv.appendChild(meta); |
| + |
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot); |
| + assertEquals("IMAGE", parser.getType()); |
| + assertEquals(expectedTitle, parser.getTitle()); |
| + assertEquals(expectedDescription, parser.getDescription()); |
| + assertEquals("", parser.getUrl()); |
| + assertEquals(expectedPublisher, parser.getPublisher()); |
| + assertEquals(null, parser.getArticle()); |
| + assertEquals("", parser.getAuthor()); |
| + assertEquals( |
| + "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder, |
| + parser.getCopyright()); |
| + MarkupParser.Image[] images = parser.getImages(); |
| + assertEquals(1, images.length); |
| + MarkupParser.Image image = images[0]; |
| + assertEquals(expectedUrl, image.image); |
| + assertEquals(expectedUrl, image.url); |
| + assertEquals(null, image.secureUrl); |
| + assertEquals(expectedFormat, image.type); |
| + assertEquals(expectedCaption, image.caption); |
| + assertEquals(600, image.width); |
| + assertEquals(400, image.height); |
| + } |
| + |
| + public void test2Images() { |
| + Element rootDiv = TestUtil.createDiv(0); |
| + setItemScopeAndType(rootDiv, "ImageObject"); |
| + mBody.appendChild(rootDiv); |
| + |
| + String expectedTitle1 = "Testcase for 1st IMAGE"; |
| + Element h = TestUtil.createHeading(1, expectedTitle1); |
| + setItemProp(h, "headline"); |
| + rootDiv.appendChild(h); |
| + |
| + String expectedDescription1 = "Testing 1st IMAGE"; |
| + h = TestUtil.createHeading(2, expectedDescription1); |
| + setItemProp(h, "description"); |
| + rootDiv.appendChild(h); |
| + |
| + // This should extract the "href" attribute of the <a> tag. |
| + String expectedUrl1 = "http://test_1st image.html"; |
| + Element link = TestUtil.createAnchor(expectedUrl1, "1st test results"); |
| + setItemProp(link, "contentUrl"); |
| + rootDiv.appendChild(link); |
| + |
| + String expectedPublisher1 = "Whatever 1st Image Incorporated"; |
| + Element div = TestUtil.createDiv(1); |
| + setItemProp(div, "publisher"); |
| + div.setInnerHTML(expectedPublisher1); |
| + rootDiv.appendChild(div); |
| + |
| + div = TestUtil.createDiv(2); |
| + String expectedCopyrightYear1 = "1000-1999"; |
| + Element span = TestUtil.createSpan(expectedCopyrightYear1); |
| + setItemProp(span, "copyrightYear"); |
| + div.appendChild(span); |
| + |
| + String expectedCopyrightHolder1 = "Whoever 1st Image Copyrighted"; |
| + span = TestUtil.createSpan(expectedCopyrightHolder1); |
| + setItemProp(span, "copyrightHolder"); |
| + div.appendChild(span); |
| + rootDiv.appendChild(div); |
| + |
| + String expectedFormat1 = "jpeg"; |
| + span = TestUtil.createSpan(expectedFormat1); |
| + setItemProp(span, "encodingFormat"); |
| + rootDiv.appendChild(span); |
| + |
| + String expectedCaption1 = "A test for 1st IMAGE"; |
| + span = TestUtil.createSpan(expectedCaption1); |
| + setItemProp(span, "caption"); |
| + rootDiv.appendChild(span); |
| + |
| + // This should extract the "content" attribute of the <meta> tag. |
| + Element meta = TestUtil.createMetaName("no_name", "false"); |
| + setItemProp(meta, "representativeOfPage"); |
| + rootDiv.appendChild(meta); |
| + |
| + meta = TestUtil.createMetaName("no_name", "400"); |
| + setItemProp(meta, "width"); |
| + rootDiv.appendChild(meta); |
| + |
| + meta = TestUtil.createMetaName("no_name", "300"); |
| + setItemProp(meta, "height"); |
| + rootDiv.appendChild(meta); |
| + |
| + rootDiv = TestUtil.createDiv(10); |
| + setItemScopeAndType(rootDiv, "ImageObject"); |
| + mBody.appendChild(rootDiv); |
| + |
| + String expectedTitle2 = "Testcase for 2nd IMAGE"; |
| + h = TestUtil.createHeading(2, expectedTitle2); |
| + setItemProp(h, "headline"); |
| + rootDiv.appendChild(h); |
| + |
| + String expectedDescription2 = "Testing 2nd IMAGE"; |
| + h = TestUtil.createHeading(2, expectedDescription2); |
| + setItemProp(h, "description"); |
| + rootDiv.appendChild(h); |
| + |
| + // This should extract the "href" attribute of the <a> tag. |
| + String expectedUrl2 = "http://test_2nd mage.html"; |
| + link = TestUtil.createAnchor(expectedUrl2, "2nd test results"); |
| + setItemProp(link, "contentUrl"); |
| + rootDiv.appendChild(link); |
| + |
| + String expectedPublisher2 = "Whatever 2nd Image Incorporated"; |
| + div = TestUtil.createDiv(11); |
| + setItemProp(div, "publisher"); |
| + div.setInnerHTML(expectedPublisher2); |
| + rootDiv.appendChild(div); |
| + |
| + div = TestUtil.createDiv(12); |
| + String expectedCopyrightYear2 = "2000-2999"; |
| + span = TestUtil.createSpan(expectedCopyrightYear2); |
| + setItemProp(span, "copyrightYear"); |
| + div.appendChild(span); |
| + |
| + String expectedCopyrightHolder2 = "Whoever 2nd Image Copyrighted"; |
| + span = TestUtil.createSpan(expectedCopyrightHolder2); |
| + setItemProp(span, "copyrightHolder"); |
| + div.appendChild(span); |
| + rootDiv.appendChild(div); |
| + |
| + String expectedFormat2 = "gif"; |
| + span = TestUtil.createSpan(expectedFormat2); |
| + setItemProp(span, "encodingFormat"); |
| + rootDiv.appendChild(span); |
| + |
| + String expectedCaption2 = "A test for 2nd IMAGE"; |
| + span = TestUtil.createSpan(expectedCaption2); |
| + setItemProp(span, "caption"); |
| + rootDiv.appendChild(span); |
| + |
| + // This should extract the "content" attribute of the <meta> tag. |
| + meta = TestUtil.createMetaName("no_name", "true"); |
| + setItemProp(meta, "representativeOfPage"); |
| + rootDiv.appendChild(meta); |
| + |
| + meta = TestUtil.createMetaName("no_name", "1000"); |
| + setItemProp(meta, "width"); |
| + rootDiv.appendChild(meta); |
| + |
| + meta = TestUtil.createMetaName("no_name", "600"); |
| + setItemProp(meta, "height"); |
| + rootDiv.appendChild(meta); |
| + |
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot); |
| + // The basic properties of Thing should be from the first image that was |
| + // inserted. |
| + assertEquals("IMAGE", parser.getType()); |
| + assertEquals(expectedTitle1, parser.getTitle()); |
| + assertEquals(expectedDescription1, parser.getDescription()); |
| + assertEquals("", parser.getUrl()); |
| + assertEquals(expectedPublisher1, parser.getPublisher()); |
| + assertEquals(null, parser.getArticle()); |
| + assertEquals("", parser.getAuthor()); |
| + assertEquals("Copyright " + expectedCopyrightYear1 + " " + expectedCopyrightHolder1, |
| + parser.getCopyright()); |
| + |
| + MarkupParser.Image[] images = parser.getImages(); |
| + assertEquals(2, images.length); |
| + // The 2nd image that was inserted is representative of page, so the |
| + // images should be swapped in |images|. |
| + MarkupParser.Image image = images[0]; |
| + assertEquals(expectedUrl2, image.image); |
| + assertEquals(expectedUrl2, image.url); |
| + assertEquals(null, image.secureUrl); |
| + assertEquals(expectedFormat2, image.type); |
| + assertEquals(expectedCaption2, image.caption); |
| + assertEquals(1000, image.width); |
| + assertEquals(600, image.height); |
| + image = images[1]; |
| + assertEquals(expectedUrl1, image.image); |
| + assertEquals(expectedUrl1, image.url); |
| + assertEquals(null, image.secureUrl); |
| + assertEquals(expectedFormat1, image.type); |
| + assertEquals(expectedCaption1, image.caption); |
| + assertEquals(400, image.width); |
| + assertEquals(300, image.height); |
| + } |
| + |
| + public void testArticleWithEmbeddedAuthorAndPublisher() { |
| + Element rootDiv = TestUtil.createDiv(0); |
| + setItemScopeAndType(rootDiv, "Article"); |
| + mBody.appendChild(rootDiv); |
| + |
| + String expectedTitle = "Testcase for ARTICLE"; |
| + Element h = TestUtil.createHeading(1, expectedTitle); |
| + setItemProp(h, "headline"); |
| + rootDiv.appendChild(h); |
| + |
| + String expectedDescription = "Testing ARTICLE with embedded author and publisher"; |
| + h = TestUtil.createHeading(2, expectedDescription); |
| + setItemProp(h, "description"); |
| + rootDiv.appendChild(h); |
| + |
| + // This should extract the "href" attribute of the <a> tag. |
| + String expectedUrl = "http://test_article_with_embedded_items.html"; |
| + Element link = TestUtil.createAnchor(expectedUrl, "test results"); |
| + setItemProp(link, "url"); |
| + rootDiv.appendChild(link); |
| + |
| + // This should extract the "src" attribute of the <image> tag. |
| + String expectedImage = "http://test_article_with_embedded_items.jpeg"; |
| + ImageElement image = TestUtil.createImage(); |
| + image.setSrc(expectedImage); |
| + setItemProp(image, "image"); |
| + rootDiv.appendChild(image); |
| + |
| + Element div = TestUtil.createDiv(1); |
| + setItemProp(div, "author"); |
| + setItemScopeAndType(div, "Person"); |
| + div.appendChild(TestUtil.createText("Author: ")); |
| + String expectedAuthor = "Whoever authored"; |
| + Element span = TestUtil.createSpan(expectedAuthor); |
| + setItemProp(span, "name"); |
| + div.appendChild(span); |
| + rootDiv.appendChild(div); |
| + |
| + div = TestUtil.createDiv(2); |
| + setItemProp(div, "publisher"); |
| + setItemScopeAndType(div, "Organization"); |
| + div.appendChild(TestUtil.createText("Publisher: ")); |
| + String expectedPublisher = "Whatever Article Incorporated"; |
| + span = TestUtil.createSpan(expectedPublisher); |
| + setItemProp(span, "name"); |
| + div.appendChild(span); |
| + rootDiv.appendChild(div); |
| + |
| + String expectedDatePublished = "April 15, 2014"; |
| + span = TestUtil.createSpan(expectedDatePublished); |
| + setItemProp(span, "datePublished"); |
| + rootDiv.appendChild(span); |
| + |
| + // This should extract the "datetime" attribute of the <time> tag. |
| + String expectedTimeModified = "2014-04-16T23:59"; |
| + Element time = Document.get().createElement("time"); |
| + time.setInnerHTML("April 16, 2014 11:59pm"); |
| + time.setAttribute("datetime", expectedTimeModified); |
| + setItemProp(time, "dateModified"); |
| + rootDiv.appendChild(time); |
| + |
| + String expectedCopyrightYear = "2000-2014"; |
| + span = TestUtil.createSpan(expectedCopyrightYear); |
| + setItemProp(span, "copyrightYear"); |
| + rootDiv.appendChild(span); |
| + |
| + String expectedCopyrightHolder = "Whoever Article Copyrighted"; |
| + span = TestUtil.createSpan(expectedCopyrightHolder); |
| + setItemProp(span, "copyrightHolder"); |
| + rootDiv.appendChild(span); |
| + |
| + String expectedSection = "Romance thriller"; |
| + span = TestUtil.createSpan(expectedSection); |
| + setItemProp(span, "articleSection"); |
| + rootDiv.appendChild(span); |
| + |
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot); |
| + assertEquals("ARTICLE", parser.getType()); |
| + assertEquals(expectedTitle, parser.getTitle()); |
| + assertEquals(expectedDescription, parser.getDescription()); |
| + assertEquals(expectedUrl, parser.getUrl()); |
| + assertEquals(expectedAuthor, parser.getAuthor()); |
| + assertEquals(expectedPublisher, parser.getPublisher()); |
| + assertEquals( |
| + "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder, |
| + parser.getCopyright()); |
| + MarkupParser.Image[] images = parser.getImages(); |
| + assertEquals(1, images.length); |
| + assertEquals(expectedImage, images[0].image); |
| + assertEquals(expectedImage, images[0].url); |
| + MarkupParser.Article article = parser.getArticle(); |
| + assertEquals(expectedDatePublished, article.publishedTime); |
| + assertEquals(expectedTimeModified, article.modifiedTime); |
| + assertEquals(null, article.expirationTime); |
| + assertEquals(expectedSection, article.section); |
| + assertEquals(1, article.authors.length); |
| + assertEquals(expectedAuthor, article.authors[0]); |
| + } |
| + |
| + public void testItemscopeInHTMLTag() { |
| + setItemScopeAndType(mRoot, "Article"); |
| + |
| + String expectedTitle = "Testcase for ItemScope in HTML tag"; |
| + Element h = TestUtil.createHeading(1, expectedTitle); |
| + setItemProp(h, "headline"); |
| + mBody.appendChild(h); |
| + |
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot); |
| + assertEquals("ARTICLE", parser.getType()); |
| + assertEquals(expectedTitle, parser.getTitle()); |
| + assertTrue(parser.getArticle() != null); |
| + |
| + // Remove "itemscope" and "itemtype" attributes in <html> tag, so that |
| + // other testcases won't be affected. |
| + mRoot.removeAttribute("ITEMSCOPE"); |
| + mRoot.removeAttribute("ITEMTYPE"); |
| + } |
| + |
| + @Override |
| + protected void gwtSetUp() throws Exception { |
| + // Get root element. |
| + mRoot = Document.get().getDocumentElement(); |
| + |
| + // Get <body> element. |
| + NodeList<Element> bodies = mRoot.getElementsByTagName("BODY"); |
| + if (bodies.getLength() != 1) |
| + throw new Exception("There shouldn't be more than 1 <body> tag"); |
| + mBody = bodies.getItem(0); |
| + |
| + // Remove all meta tags, otherwise a testcase may run with the meta tags |
| + // set up in a previous testcase, resulting in unexpected results. |
| + NodeList<Element> allMeta = mRoot.getElementsByTagName("META"); |
| + for (int i = allMeta.getLength() - 1; i >= 0; i--) { |
| + allMeta.getItem(i).removeFromParent(); |
| + } |
| + |
| + // Remove all div tags, otherwise a testcase may run with the div tags |
| + // set up in a previous testcase, resulting in unexpected results. |
| + NodeList<Element> allDiv = mRoot.getElementsByTagName("DIV"); |
| + for (int i = allDiv.getLength() - 1; i >= 0; i--) { |
| + allDiv.getItem(i).removeFromParent(); |
| + } |
| + } |
| + |
| + private void setItemScopeAndType(Element e, String type) { |
| + e.setAttribute("ITEMSCOPE", ""); |
| + e.setAttribute("ITEMTYPE", "http://schema.org/" + type); |
| + } |
| + |
| + private void setItemProp(Element e, String name) { |
| + e.setAttribute("itemprop", name); |
| + } |
| + |
| + private Element mRoot; |
| + private Element mBody; |
| +} |