src/com/dom_distiller/client/SchemaOrgParserAccessor.java - Issue 240073007: recognize and parse Schema.org Markup

Side by Side Diff: src/com/dom_distiller/client/SchemaOrgParserAccessor.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master

Patch Set: addressed missed-out comments Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« src/com/dom_distiller/client/SchemaOrgParser.java ('K') | « src/com/dom_distiller/client/SchemaOrgParser.java ('k') | test/com/dom_distiller/client/SchemaOrgParserAccessorTest.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 package com.dom_distiller.client;

	6

	7 import java.util.ArrayList;

	8 import java.util.List;

	9

	10 import com.google.gwt.dom.client.Element;

	11

	12 /**

	13 * This class instantiates SchemaOrgParser and implements MarkupParser.Parser in terface to provide

	14 * access to properties that SchemaOrgParser has parsed.

	15 */

	16 public class SchemaOrgParserAccessor implements MarkupParser.Parser {

	17 private final SchemaOrgParser parser;

	18

	19 /**

	20 * The object that instantiates SchemaOrgParser and implements its MarkupPar ser.Parser

	21 * interface.

	22 */

	23 public SchemaOrgParserAccessor(Element root) {

	24 parser = new SchemaOrgParser(root);

	25 }

	26

	27 @Override

	28 public String getTitle() {

	29 String title = parser.findStringProperty(SchemaOrgParser.HEADLINE_PROP);

	30 // If there's no "headline" property, use "name" property of first artic le.

	31 if (title.isEmpty()) {

	32 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	33 if (item != null) title = item.getStringProperty(SchemaOrgParser.NAM E_PROP);

	34 }

	35 return title;

	36 }

	37

	38 @Override

	39 public String getType() {

	40 // TODO(kuan): consolidate/standardize types returned from all 3 parsers in MarkupParser.

	41 // Returns ARTICLe if there's an article.

	42 return parser.findFirstArticle() != null ? SchemaOrgParser.Type.ARTICLE. toString() : "";

	43 }

	44

	45 @Override

	46 public String getUrl() {

	47 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	48 return item != null ? item.getStringProperty(SchemaOrgParser.URL_PROP) : "";

	49 }

	50

	51 @Override

	52 public MarkupParser.Image[] getImages() {

	53 List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes();

	54 if (itemScopes.isEmpty()) return null;

	55

	56 List<MarkupParser.Image> images = new ArrayList<MarkupParser.Image>();

	57 boolean hasRepresentativeImage = false;

	58 MarkupParser.Image imageOfArticle = null;

	59

	60 for (int i = 0; i < itemScopes.size(); i++) {

	61 SchemaOrgParser.ThingItem item = itemScopes.get(i);

	62 MarkupParser.Image image = item.getImage();

	63 if (image == null) continue;

	64 // If \|image\| is from an article with the "image" property, remember it for now;

	65 // it'll be added to to the list later when its position in the list can be determined.

	66 if (imageOfArticle == null && item.getType() == SchemaOrgParser.Type .ARTICLE) {

	67 imageOfArticle = image;

	68 continue;

	69 }

	70 // Otherwise, \|image\| is from an ImageObject, insert it at beginning of list if it's

	71 // the first image that's representative of page.

	72 if (!hasRepresentativeImage && item.isImageRepresentativeOfPage()) {

	73 hasRepresentativeImage = true;

	74 // Image should be the dominant, i.e. first, one.

	75 images.add(0, image);

	76 } else {

	77 images.add(image);

	78 }

	79 }

	80

	81 // Prepend \|imageOfArticle\| to list if there's no image representative o f page; append it

	82 // otherwise.

	83 if (imageOfArticle != null) {

	84 if (!hasRepresentativeImage) images.add(0, imageOfArticle);

	85 else images.add(imageOfArticle);

	86 }

	87

	88 if (images.isEmpty()) return null;

	89

	90 return images.toArray(new MarkupParser.Image[images.size()]);

	91 }

	92

	93 @Override

	94 public String getDescription() {

	95 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	96 return item != null ? item.getStringProperty(SchemaOrgParser.DESCRIPTION _PROP) : "";

	97 }

	98

	99 @Override

	100 public String getPublisher() {

	101 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	102 if (item == null) return "";

	103 String publisher = item.getStringProperty(SchemaOrgParser.PUBLISHER_PROP );

	104 // If there's no "publisher" property, use "copyrightHolder" property of first article.

	105 if (publisher.isEmpty()) {

	106 publisher = item.getStringProperty(SchemaOrgParser.COPYRIGHT_HOLDER_ PROP);

	107 }

	108 return publisher;

	109 }

	110

	111 @Override

	112 public String getCopyright() {

	113 // Returns a concatenated string of copyright year and copyright holder of the first article

	114 // that has these properties, delimited by a whitespace.

	115 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	116 if (item == null) return "";

	117 String copyright = SchemaOrgParser.concat(

	118 item.getStringProperty(SchemaOrgParser.COPYRIGHT_YEAR_PROP),

	119 item.getStringProperty(SchemaOrgParser.COPYRIGHT_HOLDER_PROP));

	120 return copyright.isEmpty() ? copyright : "Copyright " + copyright;

	121 }

	122

	123 @Override

	124 public String getAuthor() {

	125 String author = "";

	126 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	127 if (item != null) {

	128 author = item.getStringProperty(SchemaOrgParser.AUTHOR_PROP);

	129 // If there's no "author" property, use "creator" property.

	130 if (author.isEmpty()) author = item.getStringProperty(SchemaOrgPars er.CREATOR_PROP);

	131 }

	132 // Otherwise, use "rel=author" tag.

	133 return author.isEmpty() ? parser.getAuthorFromRel() : author;

	134 }

	135

	136 @Override

	137 public MarkupParser.Article getArticle() {

	138 SchemaOrgParser.ThingItem item = parser.findFirstArticle();

	139 return item != null ? item.getArticle() : null;

	140 }

	141

	142 @Override

	143 public boolean optOut() {

	144 return false;

	145 }

	146 }

OLD	NEW