| Index: test/com/dom_distiller/client/SchemaOrgParserTest.java
|
| diff --git a/test/com/dom_distiller/client/SchemaOrgParserTest.java b/test/com/dom_distiller/client/SchemaOrgParserTest.java
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..f3f5470b78bdb466100aa2a75d221ed8541a8a80
|
| --- /dev/null
|
| +++ b/test/com/dom_distiller/client/SchemaOrgParserTest.java
|
| @@ -0,0 +1,423 @@
|
| +// Copyright 2014 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +package com.dom_distiller.client;
|
| +
|
| +import com.google.gwt.dom.client.Document;
|
| +import com.google.gwt.dom.client.Element;
|
| +import com.google.gwt.dom.client.ImageElement;
|
| +import com.google.gwt.dom.client.MetaElement;
|
| +import com.google.gwt.dom.client.NodeList;
|
| +
|
| +import com.google.gwt.junit.client.GWTTestCase;
|
| +
|
| +public class SchemaOrgParserTest extends GWTTestCase {
|
| + @Override
|
| + public String getModuleName() {
|
| + return "com.dom_distiller.DomDistillerJUnit";
|
| + }
|
| +
|
| + public void testImageWithEmbeddedPublisher() {
|
| + Element rootDiv = TestUtil.createDiv(0);
|
| + setItemScopeAndType(rootDiv, "ImageObject");
|
| + mBody.appendChild(rootDiv);
|
| +
|
| + String expectedTitle = "Testcase for IMAGE";
|
| + Element h = TestUtil.createHeading(1, expectedTitle);
|
| + setItemProp(h, "headline");
|
| + rootDiv.appendChild(h);
|
| +
|
| + String expectedDescription = "Testing IMAGE with embedded publisher";
|
| + h = TestUtil.createHeading(2, expectedDescription);
|
| + setItemProp(h, "description");
|
| + rootDiv.appendChild(h);
|
| +
|
| + // This should extract the "href" attribute of the <a> tag.
|
| + String expectedUrl = "http://test_image_with_embedded_item.html";
|
| + Element link = TestUtil.createAnchor(expectedUrl, "test results");
|
| + setItemProp(link, "contentUrl");
|
| + rootDiv.appendChild(link);
|
| +
|
| + Element div = TestUtil.createDiv(1);
|
| + setItemProp(div, "publisher");
|
| + setItemScopeAndType(div, "Organization");
|
| + div.appendChild(TestUtil.createText("Publisher: "));
|
| + String expectedPublisher = "Whatever Image Incorporated";
|
| + Element span = TestUtil.createSpan(expectedPublisher);
|
| + setItemProp(span, "name");
|
| + div.appendChild(span);
|
| + rootDiv.appendChild(div);
|
| +
|
| + div = TestUtil.createDiv(2);
|
| + String expectedCopyrightYear = "1999-2022";
|
| + span = TestUtil.createSpan(expectedCopyrightYear);
|
| + setItemProp(span, "copyrightYear");
|
| + div.appendChild(span);
|
| +
|
| + String expectedCopyrightHolder = "Whoever Image Copyrighted";
|
| + span = TestUtil.createSpan(expectedCopyrightHolder);
|
| + setItemProp(span, "copyrightHolder");
|
| + div.appendChild(span);
|
| + rootDiv.appendChild(div);
|
| +
|
| + String expectedFormat = "jpeg";
|
| + span = TestUtil.createSpan(expectedFormat);
|
| + setItemProp(span, "encodingFormat");
|
| + rootDiv.appendChild(span);
|
| +
|
| + String expectedCaption = "A test for IMAGE with embedded publisher";
|
| + span = TestUtil.createSpan(expectedCaption);
|
| + setItemProp(span, "caption");
|
| + rootDiv.appendChild(span);
|
| +
|
| + // This should extract the "content" attribute of the <meta> tag.
|
| + Element meta = TestUtil.createMetaName("no_name", "true");
|
| + setItemProp(meta, "representativeOfPage");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + meta = TestUtil.createMetaName("no_name", "600");
|
| + setItemProp(meta, "width");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + meta = TestUtil.createMetaName("no_name", "400");
|
| + setItemProp(meta, "height");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot);
|
| + assertEquals("IMAGE", parser.getType());
|
| + assertEquals(expectedTitle, parser.getTitle());
|
| + assertEquals(expectedDescription, parser.getDescription());
|
| + assertEquals("", parser.getUrl());
|
| + assertEquals(expectedPublisher, parser.getPublisher());
|
| + assertEquals(null, parser.getArticle());
|
| + assertEquals("", parser.getAuthor());
|
| + assertEquals(
|
| + "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder,
|
| + parser.getCopyright());
|
| + MarkupParser.Image[] images = parser.getImages();
|
| + assertEquals(1, images.length);
|
| + MarkupParser.Image image = images[0];
|
| + assertEquals(expectedUrl, image.image);
|
| + assertEquals(expectedUrl, image.url);
|
| + assertEquals(null, image.secureUrl);
|
| + assertEquals(expectedFormat, image.type);
|
| + assertEquals(expectedCaption, image.caption);
|
| + assertEquals(600, image.width);
|
| + assertEquals(400, image.height);
|
| + }
|
| +
|
| + public void test2Images() {
|
| + Element rootDiv = TestUtil.createDiv(0);
|
| + setItemScopeAndType(rootDiv, "ImageObject");
|
| + mBody.appendChild(rootDiv);
|
| +
|
| + String expectedTitle1 = "Testcase for 1st IMAGE";
|
| + Element h = TestUtil.createHeading(1, expectedTitle1);
|
| + setItemProp(h, "headline");
|
| + rootDiv.appendChild(h);
|
| +
|
| + String expectedDescription1 = "Testing 1st IMAGE";
|
| + h = TestUtil.createHeading(2, expectedDescription1);
|
| + setItemProp(h, "description");
|
| + rootDiv.appendChild(h);
|
| +
|
| + // This should extract the "href" attribute of the <a> tag.
|
| + String expectedUrl1 = "http://test_1st image.html";
|
| + Element link = TestUtil.createAnchor(expectedUrl1, "1st test results");
|
| + setItemProp(link, "contentUrl");
|
| + rootDiv.appendChild(link);
|
| +
|
| + String expectedPublisher1 = "Whatever 1st Image Incorporated";
|
| + Element div = TestUtil.createDiv(1);
|
| + setItemProp(div, "publisher");
|
| + div.setInnerHTML(expectedPublisher1);
|
| + rootDiv.appendChild(div);
|
| +
|
| + div = TestUtil.createDiv(2);
|
| + String expectedCopyrightYear1 = "1000-1999";
|
| + Element span = TestUtil.createSpan(expectedCopyrightYear1);
|
| + setItemProp(span, "copyrightYear");
|
| + div.appendChild(span);
|
| +
|
| + String expectedCopyrightHolder1 = "Whoever 1st Image Copyrighted";
|
| + span = TestUtil.createSpan(expectedCopyrightHolder1);
|
| + setItemProp(span, "copyrightHolder");
|
| + div.appendChild(span);
|
| + rootDiv.appendChild(div);
|
| +
|
| + String expectedFormat1 = "jpeg";
|
| + span = TestUtil.createSpan(expectedFormat1);
|
| + setItemProp(span, "encodingFormat");
|
| + rootDiv.appendChild(span);
|
| +
|
| + String expectedCaption1 = "A test for 1st IMAGE";
|
| + span = TestUtil.createSpan(expectedCaption1);
|
| + setItemProp(span, "caption");
|
| + rootDiv.appendChild(span);
|
| +
|
| + // This should extract the "content" attribute of the <meta> tag.
|
| + Element meta = TestUtil.createMetaName("no_name", "false");
|
| + setItemProp(meta, "representativeOfPage");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + meta = TestUtil.createMetaName("no_name", "400");
|
| + setItemProp(meta, "width");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + meta = TestUtil.createMetaName("no_name", "300");
|
| + setItemProp(meta, "height");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + rootDiv = TestUtil.createDiv(10);
|
| + setItemScopeAndType(rootDiv, "ImageObject");
|
| + mBody.appendChild(rootDiv);
|
| +
|
| + String expectedTitle2 = "Testcase for 2nd IMAGE";
|
| + h = TestUtil.createHeading(2, expectedTitle2);
|
| + setItemProp(h, "headline");
|
| + rootDiv.appendChild(h);
|
| +
|
| + String expectedDescription2 = "Testing 2nd IMAGE";
|
| + h = TestUtil.createHeading(2, expectedDescription2);
|
| + setItemProp(h, "description");
|
| + rootDiv.appendChild(h);
|
| +
|
| + // This should extract the "href" attribute of the <a> tag.
|
| + String expectedUrl2 = "http://test_2nd mage.html";
|
| + link = TestUtil.createAnchor(expectedUrl2, "2nd test results");
|
| + setItemProp(link, "contentUrl");
|
| + rootDiv.appendChild(link);
|
| +
|
| + String expectedPublisher2 = "Whatever 2nd Image Incorporated";
|
| + div = TestUtil.createDiv(11);
|
| + setItemProp(div, "publisher");
|
| + div.setInnerHTML(expectedPublisher2);
|
| + rootDiv.appendChild(div);
|
| +
|
| + div = TestUtil.createDiv(12);
|
| + String expectedCopyrightYear2 = "2000-2999";
|
| + span = TestUtil.createSpan(expectedCopyrightYear2);
|
| + setItemProp(span, "copyrightYear");
|
| + div.appendChild(span);
|
| +
|
| + String expectedCopyrightHolder2 = "Whoever 2nd Image Copyrighted";
|
| + span = TestUtil.createSpan(expectedCopyrightHolder2);
|
| + setItemProp(span, "copyrightHolder");
|
| + div.appendChild(span);
|
| + rootDiv.appendChild(div);
|
| +
|
| + String expectedFormat2 = "gif";
|
| + span = TestUtil.createSpan(expectedFormat2);
|
| + setItemProp(span, "encodingFormat");
|
| + rootDiv.appendChild(span);
|
| +
|
| + String expectedCaption2 = "A test for 2nd IMAGE";
|
| + span = TestUtil.createSpan(expectedCaption2);
|
| + setItemProp(span, "caption");
|
| + rootDiv.appendChild(span);
|
| +
|
| + // This should extract the "content" attribute of the <meta> tag.
|
| + meta = TestUtil.createMetaName("no_name", "true");
|
| + setItemProp(meta, "representativeOfPage");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + meta = TestUtil.createMetaName("no_name", "1000");
|
| + setItemProp(meta, "width");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + meta = TestUtil.createMetaName("no_name", "600");
|
| + setItemProp(meta, "height");
|
| + rootDiv.appendChild(meta);
|
| +
|
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot);
|
| + // The basic properties of Thing should be from the first image that was
|
| + // inserted.
|
| + assertEquals("IMAGE", parser.getType());
|
| + assertEquals(expectedTitle1, parser.getTitle());
|
| + assertEquals(expectedDescription1, parser.getDescription());
|
| + assertEquals("", parser.getUrl());
|
| + assertEquals(expectedPublisher1, parser.getPublisher());
|
| + assertEquals(null, parser.getArticle());
|
| + assertEquals("", parser.getAuthor());
|
| + assertEquals("Copyright " + expectedCopyrightYear1 + " " + expectedCopyrightHolder1,
|
| + parser.getCopyright());
|
| +
|
| + MarkupParser.Image[] images = parser.getImages();
|
| + assertEquals(2, images.length);
|
| + // The 2nd image that was inserted is representative of page, so the
|
| + // images should be swapped in |images|.
|
| + MarkupParser.Image image = images[0];
|
| + assertEquals(expectedUrl2, image.image);
|
| + assertEquals(expectedUrl2, image.url);
|
| + assertEquals(null, image.secureUrl);
|
| + assertEquals(expectedFormat2, image.type);
|
| + assertEquals(expectedCaption2, image.caption);
|
| + assertEquals(1000, image.width);
|
| + assertEquals(600, image.height);
|
| + image = images[1];
|
| + assertEquals(expectedUrl1, image.image);
|
| + assertEquals(expectedUrl1, image.url);
|
| + assertEquals(null, image.secureUrl);
|
| + assertEquals(expectedFormat1, image.type);
|
| + assertEquals(expectedCaption1, image.caption);
|
| + assertEquals(400, image.width);
|
| + assertEquals(300, image.height);
|
| + }
|
| +
|
| + public void testArticleWithEmbeddedAuthorAndPublisher() {
|
| + Element rootDiv = TestUtil.createDiv(0);
|
| + setItemScopeAndType(rootDiv, "Article");
|
| + mBody.appendChild(rootDiv);
|
| +
|
| + String expectedTitle = "Testcase for ARTICLE";
|
| + Element h = TestUtil.createHeading(1, expectedTitle);
|
| + setItemProp(h, "headline");
|
| + rootDiv.appendChild(h);
|
| +
|
| + String expectedDescription = "Testing ARTICLE with embedded author and publisher";
|
| + h = TestUtil.createHeading(2, expectedDescription);
|
| + setItemProp(h, "description");
|
| + rootDiv.appendChild(h);
|
| +
|
| + // This should extract the "href" attribute of the <a> tag.
|
| + String expectedUrl = "http://test_article_with_embedded_items.html";
|
| + Element link = TestUtil.createAnchor(expectedUrl, "test results");
|
| + setItemProp(link, "url");
|
| + rootDiv.appendChild(link);
|
| +
|
| + // This should extract the "src" attribute of the <image> tag.
|
| + String expectedImage = "http://test_article_with_embedded_items.jpeg";
|
| + ImageElement image = TestUtil.createImage();
|
| + image.setSrc(expectedImage);
|
| + setItemProp(image, "image");
|
| + rootDiv.appendChild(image);
|
| +
|
| + Element div = TestUtil.createDiv(1);
|
| + setItemProp(div, "author");
|
| + setItemScopeAndType(div, "Person");
|
| + div.appendChild(TestUtil.createText("Author: "));
|
| + String expectedAuthor = "Whoever authored";
|
| + Element span = TestUtil.createSpan(expectedAuthor);
|
| + setItemProp(span, "name");
|
| + div.appendChild(span);
|
| + rootDiv.appendChild(div);
|
| +
|
| + div = TestUtil.createDiv(2);
|
| + setItemProp(div, "publisher");
|
| + setItemScopeAndType(div, "Organization");
|
| + div.appendChild(TestUtil.createText("Publisher: "));
|
| + String expectedPublisher = "Whatever Article Incorporated";
|
| + span = TestUtil.createSpan(expectedPublisher);
|
| + setItemProp(span, "name");
|
| + div.appendChild(span);
|
| + rootDiv.appendChild(div);
|
| +
|
| + String expectedDatePublished = "April 15, 2014";
|
| + span = TestUtil.createSpan(expectedDatePublished);
|
| + setItemProp(span, "datePublished");
|
| + rootDiv.appendChild(span);
|
| +
|
| + // This should extract the "datetime" attribute of the <time> tag.
|
| + String expectedTimeModified = "2014-04-16T23:59";
|
| + Element time = Document.get().createElement("time");
|
| + time.setInnerHTML("April 16, 2014 11:59pm");
|
| + time.setAttribute("datetime", expectedTimeModified);
|
| + setItemProp(time, "dateModified");
|
| + rootDiv.appendChild(time);
|
| +
|
| + String expectedCopyrightYear = "2000-2014";
|
| + span = TestUtil.createSpan(expectedCopyrightYear);
|
| + setItemProp(span, "copyrightYear");
|
| + rootDiv.appendChild(span);
|
| +
|
| + String expectedCopyrightHolder = "Whoever Article Copyrighted";
|
| + span = TestUtil.createSpan(expectedCopyrightHolder);
|
| + setItemProp(span, "copyrightHolder");
|
| + rootDiv.appendChild(span);
|
| +
|
| + String expectedSection = "Romance thriller";
|
| + span = TestUtil.createSpan(expectedSection);
|
| + setItemProp(span, "articleSection");
|
| + rootDiv.appendChild(span);
|
| +
|
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot);
|
| + assertEquals("ARTICLE", parser.getType());
|
| + assertEquals(expectedTitle, parser.getTitle());
|
| + assertEquals(expectedDescription, parser.getDescription());
|
| + assertEquals(expectedUrl, parser.getUrl());
|
| + assertEquals(expectedAuthor, parser.getAuthor());
|
| + assertEquals(expectedPublisher, parser.getPublisher());
|
| + assertEquals(
|
| + "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder,
|
| + parser.getCopyright());
|
| + MarkupParser.Image[] images = parser.getImages();
|
| + assertEquals(1, images.length);
|
| + assertEquals(expectedImage, images[0].image);
|
| + assertEquals(expectedImage, images[0].url);
|
| + MarkupParser.Article article = parser.getArticle();
|
| + assertEquals(expectedDatePublished, article.publishedTime);
|
| + assertEquals(expectedTimeModified, article.modifiedTime);
|
| + assertEquals(null, article.expirationTime);
|
| + assertEquals(expectedSection, article.section);
|
| + assertEquals(1, article.authors.length);
|
| + assertEquals(expectedAuthor, article.authors[0]);
|
| + }
|
| +
|
| + public void testItemscopeInHTMLTag() {
|
| + setItemScopeAndType(mRoot, "Article");
|
| +
|
| + String expectedTitle = "Testcase for ItemScope in HTML tag";
|
| + Element h = TestUtil.createHeading(1, expectedTitle);
|
| + setItemProp(h, "headline");
|
| + mBody.appendChild(h);
|
| +
|
| + SchemaOrgParser parser = new SchemaOrgParser(mRoot);
|
| + assertEquals("ARTICLE", parser.getType());
|
| + assertEquals(expectedTitle, parser.getTitle());
|
| + assertTrue(parser.getArticle() != null);
|
| +
|
| + // Remove "itemscope" and "itemtype" attributes in <html> tag, so that
|
| + // other testcases won't be affected.
|
| + mRoot.removeAttribute("ITEMSCOPE");
|
| + mRoot.removeAttribute("ITEMTYPE");
|
| + }
|
| +
|
| + @Override
|
| + protected void gwtSetUp() throws Exception {
|
| + // Get root element.
|
| + mRoot = Document.get().getDocumentElement();
|
| +
|
| + // Get <body> element.
|
| + NodeList<Element> bodies = mRoot.getElementsByTagName("BODY");
|
| + if (bodies.getLength() != 1)
|
| + throw new Exception("There shouldn't be more than 1 <body> tag");
|
| + mBody = bodies.getItem(0);
|
| +
|
| + // Remove all meta tags, otherwise a testcase may run with the meta tags
|
| + // set up in a previous testcase, resulting in unexpected results.
|
| + NodeList<Element> allMeta = mRoot.getElementsByTagName("META");
|
| + for (int i = allMeta.getLength() - 1; i >= 0; i--) {
|
| + allMeta.getItem(i).removeFromParent();
|
| + }
|
| +
|
| + // Remove all div tags, otherwise a testcase may run with the div tags
|
| + // set up in a previous testcase, resulting in unexpected results.
|
| + NodeList<Element> allDiv = mRoot.getElementsByTagName("DIV");
|
| + for (int i = allDiv.getLength() - 1; i >= 0; i--) {
|
| + allDiv.getItem(i).removeFromParent();
|
| + }
|
| + }
|
| +
|
| + private void setItemScopeAndType(Element e, String type) {
|
| + e.setAttribute("ITEMSCOPE", "");
|
| + e.setAttribute("ITEMTYPE", "http://schema.org/" + type);
|
| + }
|
| +
|
| + private void setItemProp(Element e, String name) {
|
| + e.setAttribute("itemprop", name);
|
| + }
|
| +
|
| + private Element mRoot;
|
| + private Element mBody;
|
| +}
|
|
|