Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(954)

Side by Side Diff: src/com/dom_distiller/client/SchemaOrgParserAccessor.java

Issue 240073007: recognize and parse Schema.org Markup (Closed) Base URL: https://code.google.com/p/dom-distiller/@master
Patch Set: addressed missed-out comments Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 package com.dom_distiller.client;
6
7 import java.util.ArrayList;
8 import java.util.List;
9
10 import com.google.gwt.dom.client.Element;
11
12 /**
13 * This class instantiates SchemaOrgParser and implements MarkupParser.Parser in terface to provide
14 * access to properties that SchemaOrgParser has parsed.
15 */
16 public class SchemaOrgParserAccessor implements MarkupParser.Parser {
17 private final SchemaOrgParser parser;
18
19 /**
20 * The object that instantiates SchemaOrgParser and implements its MarkupPar ser.Parser
21 * interface.
22 */
23 public SchemaOrgParserAccessor(Element root) {
24 parser = new SchemaOrgParser(root);
25 }
26
27 @Override
28 public String getTitle() {
29 String title = parser.findStringProperty(SchemaOrgParser.HEADLINE_PROP);
30 // If there's no "headline" property, use "name" property of first artic le.
31 if (title.isEmpty()) {
32 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
33 if (item != null) title = item.getStringProperty(SchemaOrgParser.NAM E_PROP);
34 }
35 return title;
36 }
37
38 @Override
39 public String getType() {
40 // TODO(kuan): consolidate/standardize types returned from all 3 parsers in MarkupParser.
41 // Returns ARTICLe if there's an article.
42 return parser.findFirstArticle() != null ? SchemaOrgParser.Type.ARTICLE. toString() : "";
43 }
44
45 @Override
46 public String getUrl() {
47 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
48 return item != null ? item.getStringProperty(SchemaOrgParser.URL_PROP) : "";
49 }
50
51 @Override
52 public MarkupParser.Image[] getImages() {
53 List<SchemaOrgParser.ThingItem> itemScopes = parser.getItemScopes();
54 if (itemScopes.isEmpty()) return null;
55
56 List<MarkupParser.Image> images = new ArrayList<MarkupParser.Image>();
57 boolean hasRepresentativeImage = false;
58 MarkupParser.Image imageOfArticle = null;
59
60 for (int i = 0; i < itemScopes.size(); i++) {
61 SchemaOrgParser.ThingItem item = itemScopes.get(i);
62 MarkupParser.Image image = item.getImage();
63 if (image == null) continue;
64 // If |image| is from an article with the "image" property, remember it for now;
65 // it'll be added to to the list later when its position in the list can be determined.
66 if (imageOfArticle == null && item.getType() == SchemaOrgParser.Type .ARTICLE) {
67 imageOfArticle = image;
68 continue;
69 }
70 // Otherwise, |image| is from an ImageObject, insert it at beginning of list if it's
71 // the first image that's representative of page.
72 if (!hasRepresentativeImage && item.isImageRepresentativeOfPage()) {
73 hasRepresentativeImage = true;
74 // Image should be the dominant, i.e. first, one.
75 images.add(0, image);
76 } else {
77 images.add(image);
78 }
79 }
80
81 // Prepend |imageOfArticle| to list if there's no image representative o f page; append it
82 // otherwise.
83 if (imageOfArticle != null) {
84 if (!hasRepresentativeImage) images.add(0, imageOfArticle);
85 else images.add(imageOfArticle);
86 }
87
88 if (images.isEmpty()) return null;
89
90 return images.toArray(new MarkupParser.Image[images.size()]);
91 }
92
93 @Override
94 public String getDescription() {
95 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
96 return item != null ? item.getStringProperty(SchemaOrgParser.DESCRIPTION _PROP) : "";
97 }
98
99 @Override
100 public String getPublisher() {
101 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
102 if (item == null) return "";
103 String publisher = item.getStringProperty(SchemaOrgParser.PUBLISHER_PROP );
104 // If there's no "publisher" property, use "copyrightHolder" property of first article.
105 if (publisher.isEmpty()) {
106 publisher = item.getStringProperty(SchemaOrgParser.COPYRIGHT_HOLDER_ PROP);
107 }
108 return publisher;
109 }
110
111 @Override
112 public String getCopyright() {
113 // Returns a concatenated string of copyright year and copyright holder of the first article
114 // that has these properties, delimited by a whitespace.
115 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
116 if (item == null) return "";
117 String copyright = SchemaOrgParser.concat(
118 item.getStringProperty(SchemaOrgParser.COPYRIGHT_YEAR_PROP),
119 item.getStringProperty(SchemaOrgParser.COPYRIGHT_HOLDER_PROP));
120 return copyright.isEmpty() ? copyright : "Copyright " + copyright;
121 }
122
123 @Override
124 public String getAuthor() {
125 String author = "";
126 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
127 if (item != null) {
128 author = item.getStringProperty(SchemaOrgParser.AUTHOR_PROP);
129 // If there's no "author" property, use "creator" property.
130 if (author.isEmpty()) author = item.getStringProperty(SchemaOrgPars er.CREATOR_PROP);
131 }
132 // Otherwise, use "rel=author" tag.
133 return author.isEmpty() ? parser.getAuthorFromRel() : author;
134 }
135
136 @Override
137 public MarkupParser.Article getArticle() {
138 SchemaOrgParser.ThingItem item = parser.findFirstArticle();
139 return item != null ? item.getArticle() : null;
140 }
141
142 @Override
143 public boolean optOut() {
144 return false;
145 }
146 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698