Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(317)

Side by Side Diff: src/com/dom_distiller/client/SchemaOrgParserAccessor.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: addressed comments Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package com.dom_distiller.client;
6
7 import java.util.ArrayList;
8 import java.util.List;
9
10 import com.google.gwt.dom.client.Element;
11
12 /**
13 * This class instantiates SchemaOrgParser and implements MarkupParser.Parser in terface to provide
14 * access to properties that SchemaOrgParser has parsed.
15 */
16 public class SchemaOrgParserAccessor implements MarkupParser.Parser {
17 private final SchemaOrgParser parser;
18
19 /**
20 * The object that instantiates SchemaOrgParser and implements its MarkupPar ser.Parser
21 * interface.
22 */
23 public SchemaOrgParserAccessor(Element root) {
24 parser = new SchemaOrgParser(root);
25 }
26
27 @Override
28 public String getTitle() {
29 String title = "";
30 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
31
32 // Get the "headline" property of the first article that has it.
33 for (int i = 0; i < articles.size() && title.isEmpty(); i++) {
34 title = articles.get(i).getStringProperty(SchemaOrgParser.HEADLINE_P ROP);
35 }
36
37 // If there's no "headline" property, use "name" property.
38 for (int i = 0; i < articles.size() && title.isEmpty(); i++) {
39 title = articles.get(i).getStringProperty(SchemaOrgParser.NAME_PROP) ;
40 }
41
42 return title;
43 }
44
45 @Override
46 public String getType() {
47 // TODO(kuan): consolidate/standardize types returned from all 3 parsers in MarkupParser.
48 // Returns ARTICLe if there's an article.
49 return parser.getArticleItems().isEmpty() ? "" : SchemaOrgParser.Type.AR TICLE.toString();
50 }
51
52 @Override
53 public String getUrl() {
54 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
55 return articles.isEmpty() ? "" :
56 articles.get(0).getStringProperty(SchemaOrgParser.URL_PROP);
57 }
58
59 @Override
60 public MarkupParser.Image[] getImages() {
61 List<MarkupParser.Image> images = new ArrayList<MarkupParser.Image>();
62
63 // First, get images from ArticleItem's.
64 List<SchemaOrgParser.ArticleItem> articleItems = parser.getArticleItems( );
65 MarkupParser.Image imageOfArticle = null;
66 SchemaOrgParser.ImageItem associatedImageOfArticle = null;
67
68 for (int i = 0; i < articleItems.size(); i++) {
69 SchemaOrgParser.ArticleItem articleItem = articleItems.get(i);
70 // If article has an associated image or the "image" property, remem ber them for now;
71 // they'll be added to the list later when the position in the list can be determined.
72 if (associatedImageOfArticle == null) {
73 associatedImageOfArticle = articleItem.getRepresentativeImageIte m();
74 if (associatedImageOfArticle != null) continue;
75 }
76 MarkupParser.Image image = articleItem.getImage();
77 if (image == null) continue;
78 if (imageOfArticle == null) {
79 imageOfArticle = image;
80 } else {
81 images.add(image);
82 }
83 }
84
85 // Then, get images from ImageItem's.
86 List<SchemaOrgParser.ImageItem> imageItems = parser.getImageItems();
87 boolean hasRepresentativeImage = false;
88
89 for (int i = 0; i < imageItems.size(); i++) {
90 SchemaOrgParser.ImageItem imageItem = imageItems.get(i);
91 MarkupParser.Image image = imageItem.getImage();
92 // Insert |image| at beginning of list if it's the first image that' s representative of
93 // page or it's the associated image of the first article.
94 if (!hasRepresentativeImage && (imageItem == associatedImageOfArticl e ||
95 imageItem.isRepresentativeOfPage())) {
96 hasRepresentativeImage = true;
97 images.add(0, image);
98 } else {
99 images.add(image);
100 }
101 }
102
103 // Prepend |imageOfArticle| to list if there's no image representative o f page; append it
104 // otherwise.
105 if (imageOfArticle != null) {
106 if (!hasRepresentativeImage) images.add(0, imageOfArticle);
107 else images.add(imageOfArticle);
108 }
109
110 if (images.isEmpty()) return null;
111
112 return images.toArray(new MarkupParser.Image[images.size()]);
113 }
114
115 @Override
116 public String getDescription() {
117 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
118 return articles.isEmpty() ? "" :
119 articles.get(0).getStringProperty(SchemaOrgParser.DESCRIPTION_PR OP);
120 }
121
122 @Override
123 public String getPublisher() {
124 // Returns either the "publisher" or "copyrightHolder" property of the f irst article.
125 String publisher = "";
126 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
127 if (!articles.isEmpty()) {
128 SchemaOrgParser.ArticleItem article = articles.get(0);
129 publisher = article.getPersonOrOrganizationName(SchemaOrgParser.PUBL ISHER_PROP);
130 if (publisher.isEmpty()) {
131 publisher = article.getPersonOrOrganizationName(
132 SchemaOrgParser.COPYRIGHT_HOLDER_PROP);
133 }
134 }
135 return publisher;
136 }
137
138 @Override
139 public String getCopyright() {
140 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
141 return articles.isEmpty() ? "" : articles.get(0).getCopyright();
142 }
143
144 @Override
145 public String getAuthor() {
146 String author = "";
147 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
148 if (!articles.isEmpty()) {
149 SchemaOrgParser.ArticleItem article = articles.get(0);
150 author = article.getPersonOrOrganizationName(SchemaOrgParser.AUTHOR _PROP);
151 // If there's no "author" property, use "creator" property.
152 if (author.isEmpty()) {
153 author = article.getPersonOrOrganizationName(SchemaOrgParser.CR EATOR_PROP);
154 }
155 }
156 // Otherwise, use "rel=author" tag.
157 return author.isEmpty() ? parser.getAuthorFromRel() : author;
158 }
159
160 @Override
161 public MarkupParser.Article getArticle() {
162 List<SchemaOrgParser.ArticleItem> articles = parser.getArticleItems();
163 return articles.isEmpty() ? null : articles.get(0).getArticle();
164 }
165
166 @Override
167 public boolean optOut() {
168 return false;
169 }
170 }
OLDNEW
« no previous file with comments | « src/com/dom_distiller/client/SchemaOrgParser.java ('k') | test/com/dom_distiller/client/SchemaOrgParserAccessorTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698