Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(514)

Unified Diff: java/org/chromium/distiller/SchemaOrgParser.java

Issue 1705123002: Add support for Schema.org/Recipe Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: wychen's comments addressed Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: java/org/chromium/distiller/SchemaOrgParser.java
diff --git a/java/org/chromium/distiller/SchemaOrgParser.java b/java/org/chromium/distiller/SchemaOrgParser.java
index 89c09f5f187e2d5345242a86a6b7ef4ca77cb389..df5899abff50ba18c21ce8fac7f52451c210b6ad 100644
--- a/java/org/chromium/distiller/SchemaOrgParser.java
+++ b/java/org/chromium/distiller/SchemaOrgParser.java
@@ -37,6 +37,8 @@ public class SchemaOrgParser {
static final String DESCRIPTION_PROP = "description";
static final String IMAGE_PROP = "image";
static final String HEADLINE_PROP = "headline";
+ static final String MAIN_ENTITY_OF_PAGE = "mainEntityOfPage";
+ static final String MAIN_ENTITY = "mainEntity";
static final String PUBLISHER_PROP = "publisher";
static final String COPYRIGHT_HOLDER_PROP = "copyrightHolder";
static final String COPYRIGHT_YEAR_PROP = "copyrightYear";
@@ -57,35 +59,51 @@ public class SchemaOrgParser {
static final String GIVEN_NAME_PROP = "givenName";
static final String LEGAL_NAME_PROP = "legalName";
static final String AUTHOR_REL = "author";
+ static final String COOK_TIME = "cookTime";
+ static final String COOKING_METHOD = "cookingMethod";
+ static final String NUTRITION = "nutrition";
+ static final String PREP_TIME = "prepTime";
+ static final String RECIPE_CATEGORY = "recipeCategory";
+ static final String RECIPE_CUISINE = "recipeCuisine";
+ static final String RECIPE_INGREDIENT = "recipeIngredient";
+ static final String INGREDIENTS = "ingredients";
+ static final String RECIPE_INSTRUCTIONS = "recipeInstructions";
+ static final String RECIPE_YIELD = "recipeYield";
+ static final String TOTAL_TIME = "totalTime";
enum Type { // All these types are extended from Thing, directly or indirectly.
IMAGE,
ARTICLE,
PERSON,
ORGANIZATION,
+ RECIPE,
UNSUPPORTED,
}
- static class ThingItem {
+ static abstract class ThingItem {
private final Element mElement;
private final Type mType;
- private final Map<String, String> mStringProperties;
+ private final Map<String, List<String>> mStringProperties;
private final Map<String, ThingItem> mItemProperties;
ThingItem(Type type, Element element) {
mElement = element;
mType = type;
- mStringProperties = new HashMap<String, String>();
+ mStringProperties = new HashMap<String, List<String>>();
mItemProperties = new HashMap<String, ThingItem>();
- addStringPropertyName(NAME_PROP);
- addStringPropertyName(URL_PROP);
- addStringPropertyName(DESCRIPTION_PROP);
- addStringPropertyName(IMAGE_PROP);
+ addStringPropertyName(NAME_PROP);
+ addStringPropertyName(URL_PROP);
+ addStringPropertyName(DESCRIPTION_PROP);
+ addStringPropertyName(IMAGE_PROP);
+ addStringPropertyName(MAIN_ENTITY_OF_PAGE);
+ addStringPropertyName(MAIN_ENTITY);
}
final void addStringPropertyName(String name) {
- mStringProperties.put(name, "");
+ if (mStringProperties.get(name) == null) {
+ mStringProperties.put(name, new ArrayList<String>());
+ }
}
final void addItemPropertyName(String name) {
@@ -93,7 +111,11 @@ public class SchemaOrgParser {
}
final String getStringProperty(String name) {
- return !mStringProperties.containsKey(name) ? "" : mStringProperties.get(name);
+ return !mStringProperties.containsKey(name) ? "" : DomUtil.join(mStringProperties.get(name).toArray(), ", ");
+ }
+
+ final List<String> getProperty(String name) {
+ return mStringProperties.get(name);
}
final ThingItem getItemProperty(String name) {
@@ -104,11 +126,14 @@ public class SchemaOrgParser {
final boolean isSupported() { return mType != Type.UNSUPPORTED; }
- // Store |value| for property with |name|, unless the property already has a non-empty
- // value, in which case |value| will be ignored. This means we only keep the first value.
+ // Store |value| for property with |name|. Values are added
+ // into a list indexed by |name|. Using a list of values is
+ // necessary in order to support duplicated properties like
+ // 'recipeIngredient' which appears frequently more than once
+ // in a Recipe.
final void putStringValue(String name, String value) {
- if (mStringProperties.containsKey(name) && mStringProperties.get(name).isEmpty()) {
- mStringProperties.put(name, value);
+ if (mStringProperties.containsKey(name)) {
+ mStringProperties.get(name).add(value);
}
}
@@ -118,6 +143,13 @@ public class SchemaOrgParser {
if (mItemProperties.containsKey(name)) mItemProperties.put(name, value);
}
+ /**
+ * Derived items should provide their own implementations.
+ *
+ * @return HTML output.
+ */
+ public abstract String generateOutput();
+
final Element getElement() {
return mElement;
}
@@ -142,6 +174,7 @@ public class SchemaOrgParser {
sTypeUrls.put("http://schema.org/EducationalOrganization", Type.ORGANIZATION);
sTypeUrls.put("http://schema.org/GovernmentOrganization", Type.ORGANIZATION);
sTypeUrls.put("http://schema.org/NGO", Type.ORGANIZATION);
+ sTypeUrls.put("http://schema.org/Recipe", Type.RECIPE);
}
private final TimingInfo mTimingInfo;
@@ -174,6 +207,43 @@ public class SchemaOrgParser {
return images;
}
+ /**
+ * Get the main entity of a page if any.
+ *
+ * Main entity of a page is retrieved by the mainEntityOfPage
+ * or by its inverse property mainEntity.
+ *
+ * @return ThingItem which is the main entity of this page.
+ */
+ final ThingItem getMainEntity() {
+ for (ThingItem mItemScope : mItemScopes) {
+ if(!mItemScope.getStringProperty(MAIN_ENTITY).isEmpty() ||
+ !mItemScope.getStringProperty(MAIN_ENTITY_OF_PAGE).isEmpty()) {
+ return mItemScope;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Look for all top level entities on the page. As we are
+ * representing the graph hierarchy by having a set of
+ * children in every node, the top level entities (roots) are those
+ * nodes that do not appear as children in any other node.
+ *
+ * @return List<ThingItem> which are considered top level entities.
+ */
+ final List<ThingItem> getTopLevelEntities() {
+ List<ThingItem> candidates = new ArrayList<>(mItemScopes);
+ for (ThingItem mItemScope : mItemScopes) {
+ for (Map.Entry<String, ThingItem> entry :
+ mItemScope.mItemProperties.entrySet()) {
+ candidates.remove(entry.getValue());
+ }
+ }
+ return candidates;
+ }
+
final String getAuthorFromRel() { return mAuthorFromRel; }
private void parse(Element root) {
@@ -278,6 +348,9 @@ public class SchemaOrgParser {
case ORGANIZATION:
newItem = new OrganizationItem(e);
break;
+ case RECIPE:
+ newItem = new RecipeItem(e);
+ break;
case UNSUPPORTED:
newItem = new UnsupportedItem(e);
break;
@@ -313,6 +386,11 @@ public class SchemaOrgParser {
image.height = JavaScript.parseInt(getStringProperty(HEIGHT_PROP), 10);
return image;
}
+
+ @Override
+ public String generateOutput() {
+ return "";
+ }
}
static class ArticleItem extends ThingItem {
@@ -389,9 +467,14 @@ public class SchemaOrgParser {
image.url = imageUrl;
return image;
}
+
+ @Override
+ public String generateOutput() {
+ return "";
+ }
}
- private static class PersonItem extends ThingItem {
+ static class PersonItem extends ThingItem {
PersonItem(Element element) {
super(Type.PERSON, element);
@@ -406,9 +489,98 @@ public class SchemaOrgParser {
return !name.isEmpty() ? name :
concat(getStringProperty(GIVEN_NAME_PROP), getStringProperty(FAMILY_NAME_PROP));
}
+
+ Person getPerson() {
+ Person person = new Person();
+ person.name = getName();
+ return person;
+ }
+
+ class Person {
+ String name;
+ }
+
+ @Override
+ public String generateOutput() {
+ return MarkupGenerator.generateMarkup(getPerson());
+ }
}
- private static class OrganizationItem extends ThingItem {
+ static class RecipeItem extends ThingItem {
+
+ RecipeItem(Element element) {
+ super(Type.RECIPE, element);
+
+ addStringPropertyName(AUTHOR_PROP);
+ addStringPropertyName(CREATOR_PROP);
+ addStringPropertyName(COOK_TIME);
+ addStringPropertyName(COOKING_METHOD);
+ addStringPropertyName(NUTRITION);
+ addStringPropertyName(PREP_TIME);
+ addStringPropertyName(RECIPE_CATEGORY);
+ addStringPropertyName(RECIPE_CUISINE);
+ addStringPropertyName(RECIPE_INGREDIENT);
+ addStringPropertyName(INGREDIENTS);
+ addStringPropertyName(RECIPE_INSTRUCTIONS);
+ addStringPropertyName(RECIPE_YIELD);
+ addStringPropertyName(TOTAL_TIME);
+
+ addItemPropertyName(AUTHOR_PROP);
+ addItemPropertyName(CREATOR_PROP);
+ }
+
+ final Recipe getRecipe() {
+ Recipe recipe = new Recipe();
+ recipe.title = getStringProperty(NAME_PROP);
+ recipe.imageSrc = getStringProperty(IMAGE_PROP);
+ recipe.cookTime = DomUtil.formatDuration(
+ getStringProperty(COOK_TIME));
+ recipe.author = retrieveProperty(AUTHOR_PROP);
+ recipe.creator = retrieveProperty(CREATOR_PROP);
+ recipe.description = getStringProperty(DESCRIPTION_PROP);
+ recipe.recipeYield = getStringProperty(RECIPE_YIELD);
+ recipe.prepTime = DomUtil.formatDuration(
+ getStringProperty(PREP_TIME));
+ recipe.recipeIngredient = getProperty(RECIPE_INGREDIENT).isEmpty() ?
+ getProperty(INGREDIENTS) : getProperty(RECIPE_INGREDIENT);
+ recipe.recipeInstructions = getProperty(RECIPE_INSTRUCTIONS);
+ recipe.totalTime = DomUtil.formatDuration(
+ getStringProperty(TOTAL_TIME));
+ return recipe;
+ }
+
+ String retrieveProperty(String property) {
+ String value = getStringProperty(property);
+ if(value.isEmpty()) {
+ ThingItem itemProperty = getItemProperty(property);
+ if (itemProperty != null) {
+ value = itemProperty.generateOutput();
+ }
+ }
+ return value;
+ }
+
+ @Override
+ public String generateOutput() {
+ return MarkupGenerator.generateMarkup(getRecipe());
+ }
+
+ class Recipe {
+ String title;
+ String imageSrc;
+ String author;
+ String creator;
+ String description;
+ String recipeYield;
+ String prepTime;
+ String cookTime;
+ String totalTime;
+ List<String> recipeIngredient;
+ List<String> recipeInstructions;
+ }
+ }
+
+ static class OrganizationItem extends ThingItem {
OrganizationItem(Element element) {
super(Type.ORGANIZATION, element);
@@ -420,12 +592,22 @@ public class SchemaOrgParser {
String name = getStringProperty(NAME_PROP);
return !name.isEmpty() ? name : getStringProperty(LEGAL_NAME_PROP);
}
+
+ @Override
+ public String generateOutput() {
+ return "";
+ }
}
private static class UnsupportedItem extends ThingItem {
UnsupportedItem(Element element) {
super(Type.UNSUPPORTED, element);
}
+
+ @Override
+ public String generateOutput() {
+ return "";
+ }
}
private static boolean isItemScope(Element e) {

Powered by Google App Engine
This is Rietveld 408576698