Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(38)

Unified Diff: src/com/dom_distiller/client/SchemaOrgParserAccessor.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: addressed missed-out comments Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/com/dom_distiller/client/SchemaOrgParserAccessor.java
diff --git a/src/com/dom_distiller/client/SchemaOrgParserAccessor.java b/src/com/dom_distiller/client/SchemaOrgParserAccessor.java
new file mode 100644
index 0000000000000000000000000000000000000000..ed6ddfd96bbab7c20a2dd298a79af7c384ba8120
--- /dev/null
+++ b/src/com/dom_distiller/client/SchemaOrgParserAccessor.java
@@ -0,0 +1,146 @@
+// Copyright 2014 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+package com.dom_distiller.client;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.google.gwt.dom.client.Element;
+
+/**
+ * This class instantiates SchemaOrgParser and implements MarkupParser.Parser interface to provide
+ * access to properties that SchemaOrgParser has parsed.
+ */
+public class SchemaOrgParserAccessor implements MarkupParser.Parser {
+ private final SchemaOrgParser parser;
+
+ /**
+ * The object that instantiates SchemaOrgParser and implements its MarkupParser.Parser
+ * interface.
+ */
+ public SchemaOrgParserAccessor(Element root) {
+ parser = new SchemaOrgParser(root);
+ }
+
+ @Override
+ public String getTitle() {
+ String title = parser.findStringProperty(SchemaOrgParser.HEADLINE_PROP);
+ // If there's no "headline" property, use "name" property of first article.
+ if (title.isEmpty()) {
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ if (item != null) title = item.getStringProperty(SchemaOrgParser.NAME_PROP);
+ }
+ return title;
+ }
+
+ @Override
+ public String getType() {
+ // TODO(kuan): consolidate/standardize types returned from all 3 parsers in MarkupParser.
+ // Returns ARTICLe if there's an article.
+ return parser.findFirstArticle() != null ? SchemaOrgParser.Type.ARTICLE.toString() : "";
+ }
+
+ @Override
+ public String getUrl() {
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ return item != null ? item.getStringProperty(SchemaOrgParser.URL_PROP) : "";
+ }
+
+ @Override
+ public MarkupParser.Image[] getImages() {
+ List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes();
+ if (itemScopes.isEmpty()) return null;
+
+ List<MarkupParser.Image> images = new ArrayList<MarkupParser.Image>();
+ boolean hasRepresentativeImage = false;
+ MarkupParser.Image imageOfArticle = null;
+
+ for (int i = 0; i < itemScopes.size(); i++) {
+ SchemaOrgParser.ThingItem item = itemScopes.get(i);
+ MarkupParser.Image image = item.getImage();
+ if (image == null) continue;
+ // If |image| is from an article with the "image" property, remember it for now;
+ // it'll be added to to the list later when its position in the list can be determined.
+ if (imageOfArticle == null && item.getType() == SchemaOrgParser.Type.ARTICLE) {
+ imageOfArticle = image;
+ continue;
+ }
+ // Otherwise, |image| is from an ImageObject, insert it at beginning of list if it's
+ // the first image that's representative of page.
+ if (!hasRepresentativeImage && item.isImageRepresentativeOfPage()) {
+ hasRepresentativeImage = true;
+ // Image should be the dominant, i.e. first, one.
+ images.add(0, image);
+ } else {
+ images.add(image);
+ }
+ }
+
+ // Prepend |imageOfArticle| to list if there's no image representative of page; append it
+ // otherwise.
+ if (imageOfArticle != null) {
+ if (!hasRepresentativeImage) images.add(0, imageOfArticle);
+ else images.add(imageOfArticle);
+ }
+
+ if (images.isEmpty()) return null;
+
+ return images.toArray(new MarkupParser.Image[images.size()]);
+ }
+
+ @Override
+ public String getDescription() {
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ return item != null ? item.getStringProperty(SchemaOrgParser.DESCRIPTION_PROP) : "";
+ }
+
+ @Override
+ public String getPublisher() {
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ if (item == null) return "";
+ String publisher = item.getStringProperty(SchemaOrgParser.PUBLISHER_PROP);
+ // If there's no "publisher" property, use "copyrightHolder" property of first article.
+ if (publisher.isEmpty()) {
+ publisher = item.getStringProperty(SchemaOrgParser.COPYRIGHT_HOLDER_PROP);
+ }
+ return publisher;
+ }
+
+ @Override
+ public String getCopyright() {
+ // Returns a concatenated string of copyright year and copyright holder of the first article
+ // that has these properties, delimited by a whitespace.
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ if (item == null) return "";
+ String copyright = SchemaOrgParser.concat(
+ item.getStringProperty(SchemaOrgParser.COPYRIGHT_YEAR_PROP),
+ item.getStringProperty(SchemaOrgParser.COPYRIGHT_HOLDER_PROP));
+ return copyright.isEmpty() ? copyright : "Copyright " + copyright;
+ }
+
+ @Override
+ public String getAuthor() {
+ String author = "";
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ if (item != null) {
+ author = item.getStringProperty(SchemaOrgParser.AUTHOR_PROP);
+ // If there's no "author" property, use "creator" property.
+ if (author.isEmpty()) author = item.getStringProperty(SchemaOrgParser.CREATOR_PROP);
+ }
+ // Otherwise, use "rel=author" tag.
+ return author.isEmpty() ? parser.getAuthorFromRel() : author;
+ }
+
+ @Override
+ public MarkupParser.Article getArticle() {
+ SchemaOrgParser.ThingItem item = parser.findFirstArticle();
+ return item != null ? item.getArticle() : null;
+ }
+
+ @Override
+ public boolean optOut() {
+ return false;
+ }
+}

Powered by Google App Engine
This is Rietveld 408576698