Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: src/com/dom_distiller/client/SchemaOrgParserAccessor.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: rm 1 more unused prop in image Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package com.dom_distiller.client;
6
7 import java.util.ArrayList;
8 import java.util.List;
9
10 import com.google.gwt.dom.client.Element;
11
12 /**
13 * This class instantiates SchemaOrgParser and implements MarkupParser.Parser in terface to provide
14 * access to properties that SchemaOrgParser has parsed.
15 */
16 public class SchemaOrgParserAccessor implements MarkupParser.Parser {
17 private final SchemaOrgParser parser;
18
19 /**
20 * The object that instantiates SchemaOrgParser and implements its MarkupPar ser.Parser
21 * interface.
22 */
23 public SchemaOrgParserAccessor(Element root) {
24 parser = new SchemaOrgParser(root);
25 }
26
27 @Override
28 public String getTitle() {
29 String title = "";
30 List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes();
31
32 // Get the "headline" property of the first article that has it.
33 for (int i = 0; i < itemScopes.size() && title.isEmpty(); i++) {
34 SchemaOrgParser.ThingItem item = itemScopes.get(i);
35 if (item.getType() == SchemaOrgParser.Type.ARTICLE) {
cjhopman 2014/04/29 17:04:19 Iterating through the articles seems pretty common
kuan 2014/04/29 23:26:43 Done.
36 title = item.getStringProperty(SchemaOrgParser.HEADLINE_PROP);
37 }
38 }
39
40 // If there's no "headline" property, use "name" property.
41 for (int i = 0; i < itemScopes.size() && title.isEmpty(); i++) {
42 SchemaOrgParser.ThingItem item = itemScopes.get(i);
43 if (item.getType() == SchemaOrgParser.Type.ARTICLE) {
44 title = item.getStringProperty(SchemaOrgParser.NAME_PROP);
45 }
46 }
47
48 return title;
49 }
50
51 @Override
52 public String getType() {
53 // TODO(kuan): consolidate/standardize types returned from all 3 parsers in MarkupParser.
54 // Returns ARTICLe if there's an article.
55 return parser.findFirstArticle() != null ? SchemaOrgParser.Type.ARTICLE. toString() : "";
56 }
57
58 @Override
59 public String getUrl() {
60 SchemaOrgParser.ArticleItem item = parser.findFirstArticle();
61 return item != null ? item.getStringProperty(SchemaOrgParser.URL_PROP) : "";
62 }
63
64 @Override
65 public MarkupParser.Image[] getImages() {
66 List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes();
67 if (itemScopes.isEmpty()) return null;
68
69 List<MarkupParser.Image> images = new ArrayList<MarkupParser.Image>();
70 boolean hasRepresentativeImage = false;
71 MarkupParser.Image imageOfArticle = null;
72 SchemaOrgParser.ImageItem associatedImageOfArticle = null;
73
74 for (int i = 0; i < itemScopes.size(); i++) {
75 SchemaOrgParser.ThingItem item = itemScopes.get(i);
76 MarkupParser.Image image = null;
77 if (item.getType() == SchemaOrgParser.Type.ARTICLE) {
78 SchemaOrgParser.ArticleItem articleItem = (SchemaOrgParser.Artic leItem) item;
79 // If article has an associated image or the "image" property, r emember them for
80 // now; they'll be added to the list later when the position in the list can be
81 // determined.
82 if (associatedImageOfArticle == null) {
83 associatedImageOfArticle = articleItem.getRepresentativeImag eItem();
84 if (associatedImageOfArticle != null) continue;
85 }
86 image = articleItem.getImage();
87 if (image == null) continue;
88 if (imageOfArticle == null) {
89 imageOfArticle = image;
90 } else {
91 images.add(image);
92 }
93 } else if (item.getType() == SchemaOrgParser.Type.IMAGE) {
94 SchemaOrgParser.ImageItem imageItem = (SchemaOrgParser.ImageItem ) item;
95 image = imageItem.getImage();
96 // Insert |image| at beginning of list if it's the first image t hat's
97 // representative of page or it's the associated image of the fi rst article.
98 if (!hasRepresentativeImage && (imageItem == associatedImageOfAr ticle ||
99 imageItem.isRepresentativeOfPage ())) {
100 hasRepresentativeImage = true;
101 images.add(0, image);
102 } else {
103 images.add(image);
104 }
105 }
106 }
107
108 // Prepend |imageOfArticle| to list if there's no image representative o f page; append it
109 // otherwise.
110 if (imageOfArticle != null) {
111 if (!hasRepresentativeImage) images.add(0, imageOfArticle);
112 else images.add(imageOfArticle);
113 }
114
115 if (images.isEmpty()) return null;
116
117 return images.toArray(new MarkupParser.Image[images.size()]);
118 }
119
120 @Override
121 public String getDescription() {
122 SchemaOrgParser.ArticleItem item = parser.findFirstArticle();
123 return item != null ? item.getStringProperty(SchemaOrgParser.DESCRIPTION _PROP) : "";
124 }
125
126 @Override
127 public String getPublisher() {
128 // Returns either the "publisher" or "copyrightHolder" property of the f irst article.
129 String publisher = "";
130 SchemaOrgParser.ArticleItem article = parser.findFirstArticle();
131 if (article != null) {
132 publisher = article.getPersonOrOrganizationName(SchemaOrgParser.PUBL ISHER_PROP);
133 if (publisher.isEmpty()) {
134 publisher = article.getPersonOrOrganizationName(
135 SchemaOrgParser.COPYRIGHT_HOLDER_PROP);
136 }
137 }
138 return publisher;
139 }
140
141 @Override
142 public String getCopyright() {
143 // Returns a concatenated string of copyright year and copyright holder of the first article
144 // that has these properties, delimited by a whitespace.
145 SchemaOrgParser.ArticleItem item = parser.findFirstArticle();
146 if (item == null) return "";
147 String copyright = SchemaOrgParser.concat(
148 item.getStringProperty(SchemaOrgParser.COPYRIGHT_YEAR_PROP),
149 item.getPersonOrOrganizationName(SchemaOrgParser.COPYRIGHT_HOLDE R_PROP));
150 return copyright.isEmpty() ? copyright : "Copyright " + copyright;
151 }
152
153 @Override
154 public String getAuthor() {
155 String author = "";
156 SchemaOrgParser.ArticleItem item = parser.findFirstArticle();
157 if (item != null) {
158 author = item.getPersonOrOrganizationName(SchemaOrgParser.AUTHOR_PR OP);
159 // If there's no "author" property, use "creator" property.
160 if (author.isEmpty()) {
161 author = item.getPersonOrOrganizationName(SchemaOrgParser.CREAT OR_PROP);
162 }
163 }
164 // Otherwise, use "rel=author" tag.
165 return author.isEmpty() ? parser.getAuthorFromRel() : author;
166 }
167
168 @Override
169 public MarkupParser.Article getArticle() {
170 SchemaOrgParser.ArticleItem item = parser.findFirstArticle();
171 return item != null ? item.getArticle() : null;
172 }
173
174 @Override
175 public boolean optOut() {
176 return false;
177 }
178 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698