Index: java/org/chromium/distiller/SchemaOrgParser.java |
diff --git a/java/org/chromium/distiller/SchemaOrgParser.java b/java/org/chromium/distiller/SchemaOrgParser.java |
index 89c09f5f187e2d5345242a86a6b7ef4ca77cb389..df5899abff50ba18c21ce8fac7f52451c210b6ad 100644 |
--- a/java/org/chromium/distiller/SchemaOrgParser.java |
+++ b/java/org/chromium/distiller/SchemaOrgParser.java |
@@ -37,6 +37,8 @@ public class SchemaOrgParser { |
static final String DESCRIPTION_PROP = "description"; |
static final String IMAGE_PROP = "image"; |
static final String HEADLINE_PROP = "headline"; |
+ static final String MAIN_ENTITY_OF_PAGE = "mainEntityOfPage"; |
+ static final String MAIN_ENTITY = "mainEntity"; |
static final String PUBLISHER_PROP = "publisher"; |
static final String COPYRIGHT_HOLDER_PROP = "copyrightHolder"; |
static final String COPYRIGHT_YEAR_PROP = "copyrightYear"; |
@@ -57,35 +59,51 @@ public class SchemaOrgParser { |
static final String GIVEN_NAME_PROP = "givenName"; |
static final String LEGAL_NAME_PROP = "legalName"; |
static final String AUTHOR_REL = "author"; |
+ static final String COOK_TIME = "cookTime"; |
+ static final String COOKING_METHOD = "cookingMethod"; |
+ static final String NUTRITION = "nutrition"; |
+ static final String PREP_TIME = "prepTime"; |
+ static final String RECIPE_CATEGORY = "recipeCategory"; |
+ static final String RECIPE_CUISINE = "recipeCuisine"; |
+ static final String RECIPE_INGREDIENT = "recipeIngredient"; |
+ static final String INGREDIENTS = "ingredients"; |
+ static final String RECIPE_INSTRUCTIONS = "recipeInstructions"; |
+ static final String RECIPE_YIELD = "recipeYield"; |
+ static final String TOTAL_TIME = "totalTime"; |
enum Type { // All these types are extended from Thing, directly or indirectly. |
IMAGE, |
ARTICLE, |
PERSON, |
ORGANIZATION, |
+ RECIPE, |
UNSUPPORTED, |
} |
- static class ThingItem { |
+ static abstract class ThingItem { |
private final Element mElement; |
private final Type mType; |
- private final Map<String, String> mStringProperties; |
+ private final Map<String, List<String>> mStringProperties; |
private final Map<String, ThingItem> mItemProperties; |
ThingItem(Type type, Element element) { |
mElement = element; |
mType = type; |
- mStringProperties = new HashMap<String, String>(); |
+ mStringProperties = new HashMap<String, List<String>>(); |
mItemProperties = new HashMap<String, ThingItem>(); |
- addStringPropertyName(NAME_PROP); |
- addStringPropertyName(URL_PROP); |
- addStringPropertyName(DESCRIPTION_PROP); |
- addStringPropertyName(IMAGE_PROP); |
+ addStringPropertyName(NAME_PROP); |
+ addStringPropertyName(URL_PROP); |
+ addStringPropertyName(DESCRIPTION_PROP); |
+ addStringPropertyName(IMAGE_PROP); |
+ addStringPropertyName(MAIN_ENTITY_OF_PAGE); |
+ addStringPropertyName(MAIN_ENTITY); |
} |
final void addStringPropertyName(String name) { |
- mStringProperties.put(name, ""); |
+ if (mStringProperties.get(name) == null) { |
+ mStringProperties.put(name, new ArrayList<String>()); |
+ } |
} |
final void addItemPropertyName(String name) { |
@@ -93,7 +111,11 @@ public class SchemaOrgParser { |
} |
final String getStringProperty(String name) { |
- return !mStringProperties.containsKey(name) ? "" : mStringProperties.get(name); |
+ return !mStringProperties.containsKey(name) ? "" : DomUtil.join(mStringProperties.get(name).toArray(), ", "); |
+ } |
+ |
+ final List<String> getProperty(String name) { |
+ return mStringProperties.get(name); |
} |
final ThingItem getItemProperty(String name) { |
@@ -104,11 +126,14 @@ public class SchemaOrgParser { |
final boolean isSupported() { return mType != Type.UNSUPPORTED; } |
- // Store |value| for property with |name|, unless the property already has a non-empty |
- // value, in which case |value| will be ignored. This means we only keep the first value. |
+ // Store |value| for property with |name|. Values are added |
+ // into a list indexed by |name|. Using a list of values is |
+ // necessary in order to support duplicated properties like |
+ // 'recipeIngredient' which appears frequently more than once |
+ // in a Recipe. |
final void putStringValue(String name, String value) { |
- if (mStringProperties.containsKey(name) && mStringProperties.get(name).isEmpty()) { |
- mStringProperties.put(name, value); |
+ if (mStringProperties.containsKey(name)) { |
+ mStringProperties.get(name).add(value); |
} |
} |
@@ -118,6 +143,13 @@ public class SchemaOrgParser { |
if (mItemProperties.containsKey(name)) mItemProperties.put(name, value); |
} |
+ /** |
+ * Derived items should provide their own implementations. |
+ * |
+ * @return HTML output. |
+ */ |
+ public abstract String generateOutput(); |
+ |
final Element getElement() { |
return mElement; |
} |
@@ -142,6 +174,7 @@ public class SchemaOrgParser { |
sTypeUrls.put("http://schema.org/EducationalOrganization", Type.ORGANIZATION); |
sTypeUrls.put("http://schema.org/GovernmentOrganization", Type.ORGANIZATION); |
sTypeUrls.put("http://schema.org/NGO", Type.ORGANIZATION); |
+ sTypeUrls.put("http://schema.org/Recipe", Type.RECIPE); |
} |
private final TimingInfo mTimingInfo; |
@@ -174,6 +207,43 @@ public class SchemaOrgParser { |
return images; |
} |
+ /** |
+ * Get the main entity of a page if any. |
+ * |
+ * Main entity of a page is retrieved by the mainEntityOfPage |
+ * or by its inverse property mainEntity. |
+ * |
+ * @return ThingItem which is the main entity of this page. |
+ */ |
+ final ThingItem getMainEntity() { |
+ for (ThingItem mItemScope : mItemScopes) { |
+ if(!mItemScope.getStringProperty(MAIN_ENTITY).isEmpty() || |
+ !mItemScope.getStringProperty(MAIN_ENTITY_OF_PAGE).isEmpty()) { |
+ return mItemScope; |
+ } |
+ } |
+ return null; |
+ } |
+ |
+ /** |
+ * Look for all top level entities on the page. As we are |
+ * representing the graph hierarchy by having a set of |
+ * children in every node, the top level entities (roots) are those |
+ * nodes that do not appear as children in any other node. |
+ * |
+ * @return List<ThingItem> which are considered top level entities. |
+ */ |
+ final List<ThingItem> getTopLevelEntities() { |
+ List<ThingItem> candidates = new ArrayList<>(mItemScopes); |
+ for (ThingItem mItemScope : mItemScopes) { |
+ for (Map.Entry<String, ThingItem> entry : |
+ mItemScope.mItemProperties.entrySet()) { |
+ candidates.remove(entry.getValue()); |
+ } |
+ } |
+ return candidates; |
+ } |
+ |
final String getAuthorFromRel() { return mAuthorFromRel; } |
private void parse(Element root) { |
@@ -278,6 +348,9 @@ public class SchemaOrgParser { |
case ORGANIZATION: |
newItem = new OrganizationItem(e); |
break; |
+ case RECIPE: |
+ newItem = new RecipeItem(e); |
+ break; |
case UNSUPPORTED: |
newItem = new UnsupportedItem(e); |
break; |
@@ -313,6 +386,11 @@ public class SchemaOrgParser { |
image.height = JavaScript.parseInt(getStringProperty(HEIGHT_PROP), 10); |
return image; |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
static class ArticleItem extends ThingItem { |
@@ -389,9 +467,14 @@ public class SchemaOrgParser { |
image.url = imageUrl; |
return image; |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
- private static class PersonItem extends ThingItem { |
+ static class PersonItem extends ThingItem { |
PersonItem(Element element) { |
super(Type.PERSON, element); |
@@ -406,9 +489,98 @@ public class SchemaOrgParser { |
return !name.isEmpty() ? name : |
concat(getStringProperty(GIVEN_NAME_PROP), getStringProperty(FAMILY_NAME_PROP)); |
} |
+ |
+ Person getPerson() { |
+ Person person = new Person(); |
+ person.name = getName(); |
+ return person; |
+ } |
+ |
+ class Person { |
+ String name; |
+ } |
+ |
+ @Override |
+ public String generateOutput() { |
+ return MarkupGenerator.generateMarkup(getPerson()); |
+ } |
} |
- private static class OrganizationItem extends ThingItem { |
+ static class RecipeItem extends ThingItem { |
+ |
+ RecipeItem(Element element) { |
+ super(Type.RECIPE, element); |
+ |
+ addStringPropertyName(AUTHOR_PROP); |
+ addStringPropertyName(CREATOR_PROP); |
+ addStringPropertyName(COOK_TIME); |
+ addStringPropertyName(COOKING_METHOD); |
+ addStringPropertyName(NUTRITION); |
+ addStringPropertyName(PREP_TIME); |
+ addStringPropertyName(RECIPE_CATEGORY); |
+ addStringPropertyName(RECIPE_CUISINE); |
+ addStringPropertyName(RECIPE_INGREDIENT); |
+ addStringPropertyName(INGREDIENTS); |
+ addStringPropertyName(RECIPE_INSTRUCTIONS); |
+ addStringPropertyName(RECIPE_YIELD); |
+ addStringPropertyName(TOTAL_TIME); |
+ |
+ addItemPropertyName(AUTHOR_PROP); |
+ addItemPropertyName(CREATOR_PROP); |
+ } |
+ |
+ final Recipe getRecipe() { |
+ Recipe recipe = new Recipe(); |
+ recipe.title = getStringProperty(NAME_PROP); |
+ recipe.imageSrc = getStringProperty(IMAGE_PROP); |
+ recipe.cookTime = DomUtil.formatDuration( |
+ getStringProperty(COOK_TIME)); |
+ recipe.author = retrieveProperty(AUTHOR_PROP); |
+ recipe.creator = retrieveProperty(CREATOR_PROP); |
+ recipe.description = getStringProperty(DESCRIPTION_PROP); |
+ recipe.recipeYield = getStringProperty(RECIPE_YIELD); |
+ recipe.prepTime = DomUtil.formatDuration( |
+ getStringProperty(PREP_TIME)); |
+ recipe.recipeIngredient = getProperty(RECIPE_INGREDIENT).isEmpty() ? |
+ getProperty(INGREDIENTS) : getProperty(RECIPE_INGREDIENT); |
+ recipe.recipeInstructions = getProperty(RECIPE_INSTRUCTIONS); |
+ recipe.totalTime = DomUtil.formatDuration( |
+ getStringProperty(TOTAL_TIME)); |
+ return recipe; |
+ } |
+ |
+ String retrieveProperty(String property) { |
+ String value = getStringProperty(property); |
+ if(value.isEmpty()) { |
+ ThingItem itemProperty = getItemProperty(property); |
+ if (itemProperty != null) { |
+ value = itemProperty.generateOutput(); |
+ } |
+ } |
+ return value; |
+ } |
+ |
+ @Override |
+ public String generateOutput() { |
+ return MarkupGenerator.generateMarkup(getRecipe()); |
+ } |
+ |
+ class Recipe { |
+ String title; |
+ String imageSrc; |
+ String author; |
+ String creator; |
+ String description; |
+ String recipeYield; |
+ String prepTime; |
+ String cookTime; |
+ String totalTime; |
+ List<String> recipeIngredient; |
+ List<String> recipeInstructions; |
+ } |
+ } |
+ |
+ static class OrganizationItem extends ThingItem { |
OrganizationItem(Element element) { |
super(Type.ORGANIZATION, element); |
@@ -420,12 +592,22 @@ public class SchemaOrgParser { |
String name = getStringProperty(NAME_PROP); |
return !name.isEmpty() ? name : getStringProperty(LEGAL_NAME_PROP); |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
private static class UnsupportedItem extends ThingItem { |
UnsupportedItem(Element element) { |
super(Type.UNSUPPORTED, element); |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
private static boolean isItemScope(Element e) { |