Chromium Code Reviews| Index: src/com/dom_distiller/client/SchemaOrgParserAccessor.java |
| diff --git a/src/com/dom_distiller/client/SchemaOrgParserAccessor.java b/src/com/dom_distiller/client/SchemaOrgParserAccessor.java |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..27cfefc55bfc981a91d344776c9311ea36f3cf7e |
| --- /dev/null |
| +++ b/src/com/dom_distiller/client/SchemaOrgParserAccessor.java |
| @@ -0,0 +1,178 @@ |
| +// Copyright 2014 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +package com.dom_distiller.client; |
| + |
| +import java.util.ArrayList; |
| +import java.util.List; |
| + |
| +import com.google.gwt.dom.client.Element; |
| + |
| +/** |
| + * This class instantiates SchemaOrgParser and implements MarkupParser.Parser interface to provide |
| + * access to properties that SchemaOrgParser has parsed. |
| + */ |
| +public class SchemaOrgParserAccessor implements MarkupParser.Parser { |
| + private final SchemaOrgParser parser; |
| + |
| + /** |
| + * The object that instantiates SchemaOrgParser and implements its MarkupParser.Parser |
| + * interface. |
| + */ |
| + public SchemaOrgParserAccessor(Element root) { |
| + parser = new SchemaOrgParser(root); |
| + } |
| + |
| + @Override |
| + public String getTitle() { |
| + String title = ""; |
| + List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes(); |
| + |
| + // Get the "headline" property of the first article that has it. |
| + for (int i = 0; i < itemScopes.size() && title.isEmpty(); i++) { |
| + SchemaOrgParser.ThingItem item = itemScopes.get(i); |
| + if (item.getType() == SchemaOrgParser.Type.ARTICLE) { |
|
cjhopman
2014/04/29 17:04:19
Iterating through the articles seems pretty common
kuan
2014/04/29 23:26:43
Done.
|
| + title = item.getStringProperty(SchemaOrgParser.HEADLINE_PROP); |
| + } |
| + } |
| + |
| + // If there's no "headline" property, use "name" property. |
| + for (int i = 0; i < itemScopes.size() && title.isEmpty(); i++) { |
| + SchemaOrgParser.ThingItem item = itemScopes.get(i); |
| + if (item.getType() == SchemaOrgParser.Type.ARTICLE) { |
| + title = item.getStringProperty(SchemaOrgParser.NAME_PROP); |
| + } |
| + } |
| + |
| + return title; |
| + } |
| + |
| + @Override |
| + public String getType() { |
| + // TODO(kuan): consolidate/standardize types returned from all 3 parsers in MarkupParser. |
| + // Returns ARTICLe if there's an article. |
| + return parser.findFirstArticle() != null ? SchemaOrgParser.Type.ARTICLE.toString() : ""; |
| + } |
| + |
| + @Override |
| + public String getUrl() { |
| + SchemaOrgParser.ArticleItem item = parser.findFirstArticle(); |
| + return item != null ? item.getStringProperty(SchemaOrgParser.URL_PROP) : ""; |
| + } |
| + |
| + @Override |
| + public MarkupParser.Image[] getImages() { |
| + List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes(); |
| + if (itemScopes.isEmpty()) return null; |
| + |
| + List<MarkupParser.Image> images = new ArrayList<MarkupParser.Image>(); |
| + boolean hasRepresentativeImage = false; |
| + MarkupParser.Image imageOfArticle = null; |
| + SchemaOrgParser.ImageItem associatedImageOfArticle = null; |
| + |
| + for (int i = 0; i < itemScopes.size(); i++) { |
| + SchemaOrgParser.ThingItem item = itemScopes.get(i); |
| + MarkupParser.Image image = null; |
| + if (item.getType() == SchemaOrgParser.Type.ARTICLE) { |
| + SchemaOrgParser.ArticleItem articleItem = (SchemaOrgParser.ArticleItem) item; |
| + // If article has an associated image or the "image" property, remember them for |
| + // now; they'll be added to the list later when the position in the list can be |
| + // determined. |
| + if (associatedImageOfArticle == null) { |
| + associatedImageOfArticle = articleItem.getRepresentativeImageItem(); |
| + if (associatedImageOfArticle != null) continue; |
| + } |
| + image = articleItem.getImage(); |
| + if (image == null) continue; |
| + if (imageOfArticle == null) { |
| + imageOfArticle = image; |
| + } else { |
| + images.add(image); |
| + } |
| + } else if (item.getType() == SchemaOrgParser.Type.IMAGE) { |
| + SchemaOrgParser.ImageItem imageItem = (SchemaOrgParser.ImageItem) item; |
| + image = imageItem.getImage(); |
| + // Insert |image| at beginning of list if it's the first image that's |
| + // representative of page or it's the associated image of the first article. |
| + if (!hasRepresentativeImage && (imageItem == associatedImageOfArticle || |
| + imageItem.isRepresentativeOfPage())) { |
| + hasRepresentativeImage = true; |
| + images.add(0, image); |
| + } else { |
| + images.add(image); |
| + } |
| + } |
| + } |
| + |
| + // Prepend |imageOfArticle| to list if there's no image representative of page; append it |
| + // otherwise. |
| + if (imageOfArticle != null) { |
| + if (!hasRepresentativeImage) images.add(0, imageOfArticle); |
| + else images.add(imageOfArticle); |
| + } |
| + |
| + if (images.isEmpty()) return null; |
| + |
| + return images.toArray(new MarkupParser.Image[images.size()]); |
| + } |
| + |
| + @Override |
| + public String getDescription() { |
| + SchemaOrgParser.ArticleItem item = parser.findFirstArticle(); |
| + return item != null ? item.getStringProperty(SchemaOrgParser.DESCRIPTION_PROP) : ""; |
| + } |
| + |
| + @Override |
| + public String getPublisher() { |
| + // Returns either the "publisher" or "copyrightHolder" property of the first article. |
| + String publisher = ""; |
| + SchemaOrgParser.ArticleItem article = parser.findFirstArticle(); |
| + if (article != null) { |
| + publisher = article.getPersonOrOrganizationName(SchemaOrgParser.PUBLISHER_PROP); |
| + if (publisher.isEmpty()) { |
| + publisher = article.getPersonOrOrganizationName( |
| + SchemaOrgParser.COPYRIGHT_HOLDER_PROP); |
| + } |
| + } |
| + return publisher; |
| + } |
| + |
| + @Override |
| + public String getCopyright() { |
| + // Returns a concatenated string of copyright year and copyright holder of the first article |
| + // that has these properties, delimited by a whitespace. |
| + SchemaOrgParser.ArticleItem item = parser.findFirstArticle(); |
| + if (item == null) return ""; |
| + String copyright = SchemaOrgParser.concat( |
| + item.getStringProperty(SchemaOrgParser.COPYRIGHT_YEAR_PROP), |
| + item.getPersonOrOrganizationName(SchemaOrgParser.COPYRIGHT_HOLDER_PROP)); |
| + return copyright.isEmpty() ? copyright : "Copyright " + copyright; |
| + } |
| + |
| + @Override |
| + public String getAuthor() { |
| + String author = ""; |
| + SchemaOrgParser.ArticleItem item = parser.findFirstArticle(); |
| + if (item != null) { |
| + author = item.getPersonOrOrganizationName(SchemaOrgParser.AUTHOR_PROP); |
| + // If there's no "author" property, use "creator" property. |
| + if (author.isEmpty()) { |
| + author = item.getPersonOrOrganizationName(SchemaOrgParser.CREATOR_PROP); |
| + } |
| + } |
| + // Otherwise, use "rel=author" tag. |
| + return author.isEmpty() ? parser.getAuthorFromRel() : author; |
| + } |
| + |
| + @Override |
| + public MarkupParser.Article getArticle() { |
| + SchemaOrgParser.ArticleItem item = parser.findFirstArticle(); |
| + return item != null ? item.getArticle() : null; |
| + } |
| + |
| + @Override |
| + public boolean optOut() { |
| + return false; |
| + } |
| +} |