Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3)

Unified Diff: test/com/dom_distiller/client/SchemaOrgParserTest.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: addressed all comments Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: test/com/dom_distiller/client/SchemaOrgParserTest.java
diff --git a/test/com/dom_distiller/client/SchemaOrgParserTest.java b/test/com/dom_distiller/client/SchemaOrgParserTest.java
new file mode 100644
index 0000000000000000000000000000000000000000..f3f5470b78bdb466100aa2a75d221ed8541a8a80
--- /dev/null
+++ b/test/com/dom_distiller/client/SchemaOrgParserTest.java
@@ -0,0 +1,423 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package com.dom_distiller.client;
+
+import com.google.gwt.dom.client.Document;
+import com.google.gwt.dom.client.Element;
+import com.google.gwt.dom.client.ImageElement;
+import com.google.gwt.dom.client.MetaElement;
+import com.google.gwt.dom.client.NodeList;
+
+import com.google.gwt.junit.client.GWTTestCase;
+
+public class SchemaOrgParserTest extends GWTTestCase {
+ @Override
+ public String getModuleName() {
+ return "com.dom_distiller.DomDistillerJUnit";
+ }
+
+ public void testImageWithEmbeddedPublisher() {
+ Element rootDiv = TestUtil.createDiv(0);
+ setItemScopeAndType(rootDiv, "ImageObject");
+ mBody.appendChild(rootDiv);
+
+ String expectedTitle = "Testcase for IMAGE";
+ Element h = TestUtil.createHeading(1, expectedTitle);
+ setItemProp(h, "headline");
+ rootDiv.appendChild(h);
+
+ String expectedDescription = "Testing IMAGE with embedded publisher";
+ h = TestUtil.createHeading(2, expectedDescription);
+ setItemProp(h, "description");
cjhopman 2014/04/18 01:17:01 Note: the following comment applies to all of thes
kuan 2014/04/18 23:34:38 Done. i changed to using html string and setInner
+ rootDiv.appendChild(h);
+
+ // This should extract the "href" attribute of the <a> tag.
+ String expectedUrl = "http://test_image_with_embedded_item.html";
+ Element link = TestUtil.createAnchor(expectedUrl, "test results");
+ setItemProp(link, "contentUrl");
+ rootDiv.appendChild(link);
+
+ Element div = TestUtil.createDiv(1);
+ setItemProp(div, "publisher");
+ setItemScopeAndType(div, "Organization");
+ div.appendChild(TestUtil.createText("Publisher: "));
+ String expectedPublisher = "Whatever Image Incorporated";
+ Element span = TestUtil.createSpan(expectedPublisher);
+ setItemProp(span, "name");
+ div.appendChild(span);
+ rootDiv.appendChild(div);
+
+ div = TestUtil.createDiv(2);
+ String expectedCopyrightYear = "1999-2022";
+ span = TestUtil.createSpan(expectedCopyrightYear);
+ setItemProp(span, "copyrightYear");
+ div.appendChild(span);
+
+ String expectedCopyrightHolder = "Whoever Image Copyrighted";
+ span = TestUtil.createSpan(expectedCopyrightHolder);
+ setItemProp(span, "copyrightHolder");
+ div.appendChild(span);
+ rootDiv.appendChild(div);
+
+ String expectedFormat = "jpeg";
+ span = TestUtil.createSpan(expectedFormat);
+ setItemProp(span, "encodingFormat");
+ rootDiv.appendChild(span);
+
+ String expectedCaption = "A test for IMAGE with embedded publisher";
+ span = TestUtil.createSpan(expectedCaption);
+ setItemProp(span, "caption");
+ rootDiv.appendChild(span);
+
+ // This should extract the "content" attribute of the <meta> tag.
+ Element meta = TestUtil.createMetaName("no_name", "true");
+ setItemProp(meta, "representativeOfPage");
+ rootDiv.appendChild(meta);
+
+ meta = TestUtil.createMetaName("no_name", "600");
+ setItemProp(meta, "width");
+ rootDiv.appendChild(meta);
+
+ meta = TestUtil.createMetaName("no_name", "400");
+ setItemProp(meta, "height");
+ rootDiv.appendChild(meta);
+
+ SchemaOrgParser parser = new SchemaOrgParser(mRoot);
+ assertEquals("IMAGE", parser.getType());
+ assertEquals(expectedTitle, parser.getTitle());
+ assertEquals(expectedDescription, parser.getDescription());
+ assertEquals("", parser.getUrl());
+ assertEquals(expectedPublisher, parser.getPublisher());
+ assertEquals(null, parser.getArticle());
+ assertEquals("", parser.getAuthor());
+ assertEquals(
+ "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder,
+ parser.getCopyright());
+ MarkupParser.Image[] images = parser.getImages();
+ assertEquals(1, images.length);
+ MarkupParser.Image image = images[0];
+ assertEquals(expectedUrl, image.image);
+ assertEquals(expectedUrl, image.url);
+ assertEquals(null, image.secureUrl);
+ assertEquals(expectedFormat, image.type);
+ assertEquals(expectedCaption, image.caption);
+ assertEquals(600, image.width);
+ assertEquals(400, image.height);
+ }
+
+ public void test2Images() {
+ Element rootDiv = TestUtil.createDiv(0);
+ setItemScopeAndType(rootDiv, "ImageObject");
+ mBody.appendChild(rootDiv);
+
+ String expectedTitle1 = "Testcase for 1st IMAGE";
+ Element h = TestUtil.createHeading(1, expectedTitle1);
+ setItemProp(h, "headline");
+ rootDiv.appendChild(h);
+
+ String expectedDescription1 = "Testing 1st IMAGE";
+ h = TestUtil.createHeading(2, expectedDescription1);
+ setItemProp(h, "description");
+ rootDiv.appendChild(h);
+
+ // This should extract the "href" attribute of the <a> tag.
+ String expectedUrl1 = "http://test_1st image.html";
+ Element link = TestUtil.createAnchor(expectedUrl1, "1st test results");
+ setItemProp(link, "contentUrl");
+ rootDiv.appendChild(link);
+
+ String expectedPublisher1 = "Whatever 1st Image Incorporated";
+ Element div = TestUtil.createDiv(1);
+ setItemProp(div, "publisher");
+ div.setInnerHTML(expectedPublisher1);
+ rootDiv.appendChild(div);
+
+ div = TestUtil.createDiv(2);
+ String expectedCopyrightYear1 = "1000-1999";
+ Element span = TestUtil.createSpan(expectedCopyrightYear1);
+ setItemProp(span, "copyrightYear");
+ div.appendChild(span);
+
+ String expectedCopyrightHolder1 = "Whoever 1st Image Copyrighted";
+ span = TestUtil.createSpan(expectedCopyrightHolder1);
+ setItemProp(span, "copyrightHolder");
+ div.appendChild(span);
+ rootDiv.appendChild(div);
+
+ String expectedFormat1 = "jpeg";
+ span = TestUtil.createSpan(expectedFormat1);
+ setItemProp(span, "encodingFormat");
+ rootDiv.appendChild(span);
+
+ String expectedCaption1 = "A test for 1st IMAGE";
+ span = TestUtil.createSpan(expectedCaption1);
+ setItemProp(span, "caption");
+ rootDiv.appendChild(span);
+
+ // This should extract the "content" attribute of the <meta> tag.
+ Element meta = TestUtil.createMetaName("no_name", "false");
+ setItemProp(meta, "representativeOfPage");
+ rootDiv.appendChild(meta);
+
+ meta = TestUtil.createMetaName("no_name", "400");
+ setItemProp(meta, "width");
+ rootDiv.appendChild(meta);
+
+ meta = TestUtil.createMetaName("no_name", "300");
+ setItemProp(meta, "height");
+ rootDiv.appendChild(meta);
+
+ rootDiv = TestUtil.createDiv(10);
+ setItemScopeAndType(rootDiv, "ImageObject");
+ mBody.appendChild(rootDiv);
+
+ String expectedTitle2 = "Testcase for 2nd IMAGE";
+ h = TestUtil.createHeading(2, expectedTitle2);
+ setItemProp(h, "headline");
+ rootDiv.appendChild(h);
+
+ String expectedDescription2 = "Testing 2nd IMAGE";
+ h = TestUtil.createHeading(2, expectedDescription2);
+ setItemProp(h, "description");
+ rootDiv.appendChild(h);
+
+ // This should extract the "href" attribute of the <a> tag.
+ String expectedUrl2 = "http://test_2nd mage.html";
+ link = TestUtil.createAnchor(expectedUrl2, "2nd test results");
+ setItemProp(link, "contentUrl");
+ rootDiv.appendChild(link);
+
+ String expectedPublisher2 = "Whatever 2nd Image Incorporated";
+ div = TestUtil.createDiv(11);
+ setItemProp(div, "publisher");
+ div.setInnerHTML(expectedPublisher2);
+ rootDiv.appendChild(div);
+
+ div = TestUtil.createDiv(12);
+ String expectedCopyrightYear2 = "2000-2999";
+ span = TestUtil.createSpan(expectedCopyrightYear2);
+ setItemProp(span, "copyrightYear");
+ div.appendChild(span);
+
+ String expectedCopyrightHolder2 = "Whoever 2nd Image Copyrighted";
+ span = TestUtil.createSpan(expectedCopyrightHolder2);
+ setItemProp(span, "copyrightHolder");
+ div.appendChild(span);
+ rootDiv.appendChild(div);
+
+ String expectedFormat2 = "gif";
+ span = TestUtil.createSpan(expectedFormat2);
+ setItemProp(span, "encodingFormat");
+ rootDiv.appendChild(span);
+
+ String expectedCaption2 = "A test for 2nd IMAGE";
+ span = TestUtil.createSpan(expectedCaption2);
+ setItemProp(span, "caption");
+ rootDiv.appendChild(span);
+
+ // This should extract the "content" attribute of the <meta> tag.
+ meta = TestUtil.createMetaName("no_name", "true");
+ setItemProp(meta, "representativeOfPage");
+ rootDiv.appendChild(meta);
+
+ meta = TestUtil.createMetaName("no_name", "1000");
+ setItemProp(meta, "width");
+ rootDiv.appendChild(meta);
+
+ meta = TestUtil.createMetaName("no_name", "600");
+ setItemProp(meta, "height");
+ rootDiv.appendChild(meta);
+
+ SchemaOrgParser parser = new SchemaOrgParser(mRoot);
+ // The basic properties of Thing should be from the first image that was
+ // inserted.
+ assertEquals("IMAGE", parser.getType());
+ assertEquals(expectedTitle1, parser.getTitle());
+ assertEquals(expectedDescription1, parser.getDescription());
+ assertEquals("", parser.getUrl());
+ assertEquals(expectedPublisher1, parser.getPublisher());
+ assertEquals(null, parser.getArticle());
+ assertEquals("", parser.getAuthor());
+ assertEquals("Copyright " + expectedCopyrightYear1 + " " + expectedCopyrightHolder1,
+ parser.getCopyright());
+
+ MarkupParser.Image[] images = parser.getImages();
+ assertEquals(2, images.length);
+ // The 2nd image that was inserted is representative of page, so the
+ // images should be swapped in |images|.
+ MarkupParser.Image image = images[0];
+ assertEquals(expectedUrl2, image.image);
+ assertEquals(expectedUrl2, image.url);
+ assertEquals(null, image.secureUrl);
+ assertEquals(expectedFormat2, image.type);
+ assertEquals(expectedCaption2, image.caption);
+ assertEquals(1000, image.width);
+ assertEquals(600, image.height);
+ image = images[1];
+ assertEquals(expectedUrl1, image.image);
+ assertEquals(expectedUrl1, image.url);
+ assertEquals(null, image.secureUrl);
+ assertEquals(expectedFormat1, image.type);
+ assertEquals(expectedCaption1, image.caption);
+ assertEquals(400, image.width);
+ assertEquals(300, image.height);
+ }
+
+ public void testArticleWithEmbeddedAuthorAndPublisher() {
+ Element rootDiv = TestUtil.createDiv(0);
+ setItemScopeAndType(rootDiv, "Article");
+ mBody.appendChild(rootDiv);
+
+ String expectedTitle = "Testcase for ARTICLE";
+ Element h = TestUtil.createHeading(1, expectedTitle);
+ setItemProp(h, "headline");
+ rootDiv.appendChild(h);
+
+ String expectedDescription = "Testing ARTICLE with embedded author and publisher";
+ h = TestUtil.createHeading(2, expectedDescription);
+ setItemProp(h, "description");
+ rootDiv.appendChild(h);
+
+ // This should extract the "href" attribute of the <a> tag.
+ String expectedUrl = "http://test_article_with_embedded_items.html";
+ Element link = TestUtil.createAnchor(expectedUrl, "test results");
+ setItemProp(link, "url");
+ rootDiv.appendChild(link);
+
+ // This should extract the "src" attribute of the <image> tag.
+ String expectedImage = "http://test_article_with_embedded_items.jpeg";
+ ImageElement image = TestUtil.createImage();
+ image.setSrc(expectedImage);
+ setItemProp(image, "image");
+ rootDiv.appendChild(image);
+
+ Element div = TestUtil.createDiv(1);
+ setItemProp(div, "author");
+ setItemScopeAndType(div, "Person");
+ div.appendChild(TestUtil.createText("Author: "));
+ String expectedAuthor = "Whoever authored";
+ Element span = TestUtil.createSpan(expectedAuthor);
+ setItemProp(span, "name");
+ div.appendChild(span);
+ rootDiv.appendChild(div);
+
+ div = TestUtil.createDiv(2);
+ setItemProp(div, "publisher");
+ setItemScopeAndType(div, "Organization");
+ div.appendChild(TestUtil.createText("Publisher: "));
+ String expectedPublisher = "Whatever Article Incorporated";
+ span = TestUtil.createSpan(expectedPublisher);
+ setItemProp(span, "name");
+ div.appendChild(span);
+ rootDiv.appendChild(div);
+
+ String expectedDatePublished = "April 15, 2014";
+ span = TestUtil.createSpan(expectedDatePublished);
+ setItemProp(span, "datePublished");
+ rootDiv.appendChild(span);
+
+ // This should extract the "datetime" attribute of the <time> tag.
+ String expectedTimeModified = "2014-04-16T23:59";
+ Element time = Document.get().createElement("time");
+ time.setInnerHTML("April 16, 2014 11:59pm");
+ time.setAttribute("datetime", expectedTimeModified);
+ setItemProp(time, "dateModified");
+ rootDiv.appendChild(time);
+
+ String expectedCopyrightYear = "2000-2014";
+ span = TestUtil.createSpan(expectedCopyrightYear);
+ setItemProp(span, "copyrightYear");
+ rootDiv.appendChild(span);
+
+ String expectedCopyrightHolder = "Whoever Article Copyrighted";
+ span = TestUtil.createSpan(expectedCopyrightHolder);
+ setItemProp(span, "copyrightHolder");
+ rootDiv.appendChild(span);
+
+ String expectedSection = "Romance thriller";
+ span = TestUtil.createSpan(expectedSection);
+ setItemProp(span, "articleSection");
+ rootDiv.appendChild(span);
+
+ SchemaOrgParser parser = new SchemaOrgParser(mRoot);
+ assertEquals("ARTICLE", parser.getType());
+ assertEquals(expectedTitle, parser.getTitle());
+ assertEquals(expectedDescription, parser.getDescription());
+ assertEquals(expectedUrl, parser.getUrl());
+ assertEquals(expectedAuthor, parser.getAuthor());
+ assertEquals(expectedPublisher, parser.getPublisher());
+ assertEquals(
+ "Copyright " + expectedCopyrightYear + " " + expectedCopyrightHolder,
+ parser.getCopyright());
+ MarkupParser.Image[] images = parser.getImages();
+ assertEquals(1, images.length);
+ assertEquals(expectedImage, images[0].image);
+ assertEquals(expectedImage, images[0].url);
+ MarkupParser.Article article = parser.getArticle();
+ assertEquals(expectedDatePublished, article.publishedTime);
+ assertEquals(expectedTimeModified, article.modifiedTime);
+ assertEquals(null, article.expirationTime);
+ assertEquals(expectedSection, article.section);
+ assertEquals(1, article.authors.length);
+ assertEquals(expectedAuthor, article.authors[0]);
+ }
+
+ public void testItemscopeInHTMLTag() {
+ setItemScopeAndType(mRoot, "Article");
+
+ String expectedTitle = "Testcase for ItemScope in HTML tag";
+ Element h = TestUtil.createHeading(1, expectedTitle);
+ setItemProp(h, "headline");
+ mBody.appendChild(h);
+
+ SchemaOrgParser parser = new SchemaOrgParser(mRoot);
+ assertEquals("ARTICLE", parser.getType());
+ assertEquals(expectedTitle, parser.getTitle());
+ assertTrue(parser.getArticle() != null);
+
+ // Remove "itemscope" and "itemtype" attributes in <html> tag, so that
+ // other testcases won't be affected.
+ mRoot.removeAttribute("ITEMSCOPE");
+ mRoot.removeAttribute("ITEMTYPE");
+ }
+
+ @Override
+ protected void gwtSetUp() throws Exception {
+ // Get root element.
+ mRoot = Document.get().getDocumentElement();
+
+ // Get <body> element.
+ NodeList<Element> bodies = mRoot.getElementsByTagName("BODY");
+ if (bodies.getLength() != 1)
+ throw new Exception("There shouldn't be more than 1 <body> tag");
+ mBody = bodies.getItem(0);
+
+ // Remove all meta tags, otherwise a testcase may run with the meta tags
+ // set up in a previous testcase, resulting in unexpected results.
+ NodeList<Element> allMeta = mRoot.getElementsByTagName("META");
+ for (int i = allMeta.getLength() - 1; i >= 0; i--) {
+ allMeta.getItem(i).removeFromParent();
+ }
+
+ // Remove all div tags, otherwise a testcase may run with the div tags
+ // set up in a previous testcase, resulting in unexpected results.
+ NodeList<Element> allDiv = mRoot.getElementsByTagName("DIV");
+ for (int i = allDiv.getLength() - 1; i >= 0; i--) {
+ allDiv.getItem(i).removeFromParent();
+ }
+ }
+
+ private void setItemScopeAndType(Element e, String type) {
+ e.setAttribute("ITEMSCOPE", "");
+ e.setAttribute("ITEMTYPE", "http://schema.org/" + type);
+ }
+
+ private void setItemProp(Element e, String name) {
+ e.setAttribute("itemprop", name);
+ }
+
+ private Element mRoot;
+ private Element mBody;
+}

Powered by Google App Engine
This is Rietveld 408576698