Chromium Code Reviews| Index: java/org/chromium/distiller/SchemaOrgParser.java |
| diff --git a/java/org/chromium/distiller/SchemaOrgParser.java b/java/org/chromium/distiller/SchemaOrgParser.java |
| index 0386c1cb61e776c60c025ce92441e08738f519aa..5eeb3f78ea9e3f99b0bae449beb149cfa7db87d4 100644 |
| --- a/java/org/chromium/distiller/SchemaOrgParser.java |
| +++ b/java/org/chromium/distiller/SchemaOrgParser.java |
| @@ -38,6 +38,8 @@ public class SchemaOrgParser { |
| static final String DESCRIPTION_PROP = "description"; |
| static final String IMAGE_PROP = "image"; |
| static final String HEADLINE_PROP = "headline"; |
| + static final String MAIN_ENTITY_OF_PAGE = "mainEntityOfPage"; |
| + static final String MAIN_ENTITY = "mainEntity"; |
| static final String PUBLISHER_PROP = "publisher"; |
| static final String COPYRIGHT_HOLDER_PROP = "copyrightHolder"; |
| static final String COPYRIGHT_YEAR_PROP = "copyrightYear"; |
| @@ -58,33 +60,49 @@ public class SchemaOrgParser { |
| static final String GIVEN_NAME_PROP = "givenName"; |
| static final String LEGAL_NAME_PROP = "legalName"; |
| static final String AUTHOR_REL = "author"; |
| + static final String COOK_TIME = "cookTime"; |
| + static final String COOKING_METHOD = "cookingMethod"; |
| + static final String NUTRITION = "nutrition"; |
| + static final String PREP_TIME = "prepTime"; |
| + static final String RECIPE_CATEGORY = "recipeCategory"; |
| + static final String RECIPE_CUISINE = "recipeCuisine"; |
| + static final String RECIPE_INGREDIENT = "recipeIngredient"; |
| + static final String INGREDIENTS = "ingredients"; |
| + static final String RECIPE_INSTRUCTIONS = "recipeInstructions"; |
| + static final String RECIPE_YIELD = "recipeYield"; |
| + static final String TOTAL_TIME = "totalTime"; |
| enum Type { // All these types are extended from Thing, directly or indirectly. |
| IMAGE, |
| ARTICLE, |
| PERSON, |
| ORGANIZATION, |
| + RECIPE, |
| UNSUPPORTED, |
| } |
| - static class ThingItem { |
| + static abstract class ThingItem { |
| private final Type mType; |
| - private final Map<String, String> mStringProperties; |
| + private final Map<String, List<String>> mStringProperties; |
| private final Map<String, ThingItem> mItemProperties; |
| ThingItem(Type type) { |
| mType = type; |
| - mStringProperties = new HashMap<String, String>(); |
| + mStringProperties = new HashMap<String, List<String>>(); |
| mItemProperties = new HashMap<String, ThingItem>(); |
| addStringPropertyName(NAME_PROP); |
| addStringPropertyName(URL_PROP); |
| addStringPropertyName(DESCRIPTION_PROP); |
| addStringPropertyName(IMAGE_PROP); |
| + addStringPropertyName(MAIN_ENTITY_OF_PAGE); |
| + addStringPropertyName(MAIN_ENTITY); |
| } |
| final void addStringPropertyName(String name) { |
| - mStringProperties.put(name, ""); |
| + if (mStringProperties.get(name) == null) { |
| + mStringProperties.put(name, new ArrayList<String>()); |
| + } |
| } |
| final void addItemPropertyName(String name) { |
| @@ -92,7 +110,11 @@ public class SchemaOrgParser { |
| } |
| final String getStringProperty(String name) { |
| - return !mStringProperties.containsKey(name) ? "" : mStringProperties.get(name); |
| + return !mStringProperties.containsKey(name) ? "" : DomUtil.join(mStringProperties.get(name).toArray(), ", "); |
|
wychen
2016/03/14 22:58:42
Possible l10n issues here. Not all languages use "
|
| + } |
| + |
| + final List<String> getProperty(String name) { |
| + return mStringProperties.get(name); |
| } |
| final ThingItem getItemProperty(String name) { |
| @@ -103,11 +125,14 @@ public class SchemaOrgParser { |
| final boolean isSupported() { return mType != Type.UNSUPPORTED; } |
| - // Store |value| for property with |name|, unless the property already has a non-empty |
| - // value, in which case |value| will be ignored. This means we only keep the first value. |
| + // Store |value| for property with |name|. Values are added |
| + // into a list indexed by |name|. Using a list of values is |
| + // necessary in order to support duplicated properties like |
| + // 'recipeIngredient' which appears frequently more than once |
| + // in a Recipe. |
| final void putStringValue(String name, String value) { |
| - if (mStringProperties.containsKey(name) && mStringProperties.get(name).isEmpty()) { |
| - mStringProperties.put(name, value); |
| + if (mStringProperties.containsKey(name)) { |
| + mStringProperties.get(name).add(value); |
| } |
| } |
| @@ -116,6 +141,13 @@ public class SchemaOrgParser { |
| final void putItemValue(String name, ThingItem value) { |
| if (mItemProperties.containsKey(name)) mItemProperties.put(name, value); |
| } |
| + |
| + /** |
| + * Derived items should provide their own implementations. |
| + * |
| + * @return HTML output. |
| + */ |
| + public abstract String generateOutput(); |
| } |
| private final List<ThingItem> mItemScopes = new ArrayList<ThingItem>(); |
| @@ -137,6 +169,7 @@ public class SchemaOrgParser { |
| sTypeUrls.put("http://schema.org/EducationalOrganization", Type.ORGANIZATION); |
| sTypeUrls.put("http://schema.org/GovernmentOrganization", Type.ORGANIZATION); |
| sTypeUrls.put("http://schema.org/NGO", Type.ORGANIZATION); |
| + sTypeUrls.put("http://schema.org/Recipe", Type.RECIPE); |
| } |
| private final TimingInfo mTimingInfo; |
| @@ -174,6 +207,43 @@ public class SchemaOrgParser { |
| return images; |
| } |
| + /** |
| + * Get the main entity of a page if any. |
| + * |
| + * Main entity of a page is retrieved by the mainEntityOfPage |
| + * or by its inverse property mainEntity. |
| + * |
| + * @return ThingItem which is the main entity of this page. |
| + */ |
| + final ThingItem getMainEntity() { |
| + for (ThingItem mItemScope : mItemScopes) { |
| + if(!mItemScope.getStringProperty(MAIN_ENTITY).isEmpty() || |
| + !mItemScope.getStringProperty(MAIN_ENTITY_OF_PAGE).isEmpty()) { |
| + return mItemScope; |
| + } |
| + } |
| + return null; |
| + } |
| + |
| + /** |
| + * Look for all top level entities on the page. As we are |
| + * representing the graph hierarchy by having a set of |
| + * children in every node, the top level entities (roots) are those |
| + * nodes that do not appear as children in any other node. |
| + * |
| + * @return List<ThingItem> which are considered top level entities. |
| + */ |
| + final List<ThingItem> getTopLevelEntities() { |
| + List<ThingItem> candidates = new ArrayList<>(mItemScopes); |
| + for (ThingItem mItemScope : mItemScopes) { |
| + for (Map.Entry<String, ThingItem> entry : |
| + mItemScope.mItemProperties.entrySet()) { |
| + candidates.remove(entry.getValue()); |
| + } |
| + } |
| + return candidates; |
| + } |
| + |
| final String getAuthorFromRel() { return mAuthorFromRel; } |
| private void parse(Element root) { |
| @@ -278,6 +348,9 @@ public class SchemaOrgParser { |
| case ORGANIZATION: |
| newItem = new OrganizationItem(); |
| break; |
| + case RECIPE: |
| + newItem = new RecipeItem(); |
| + break; |
| case UNSUPPORTED: |
| newItem = new UnsupportedItem(); |
| break; |
| @@ -313,6 +386,11 @@ public class SchemaOrgParser { |
| image.height = JavaScript.parseInt(getStringProperty(HEIGHT_PROP), 10); |
| return image; |
| } |
| + |
| + @Override |
| + public String generateOutput() { |
| + return ""; |
| + } |
| } |
| static class ArticleItem extends ThingItem { |
| @@ -389,9 +467,14 @@ public class SchemaOrgParser { |
| image.url = imageUrl; |
| return image; |
| } |
| + |
| + @Override |
| + public String generateOutput() { |
| + return ""; |
| + } |
| } |
| - private static class PersonItem extends ThingItem { |
| + static class PersonItem extends ThingItem { |
| PersonItem() { |
| super(Type.PERSON); |
| @@ -406,6 +489,21 @@ public class SchemaOrgParser { |
| return !name.isEmpty() ? name : |
| concat(getStringProperty(GIVEN_NAME_PROP), getStringProperty(FAMILY_NAME_PROP)); |
| } |
| + |
| + Person getPerson() { |
| + Person person = new Person(); |
| + person.name = getName(); |
| + return person; |
| + } |
| + |
| + class Person { |
| + String name; |
| + } |
| + |
| + @Override |
| + public String generateOutput() { |
| + return MarkupGenerator.generateMarkup(getPerson()); |
| + } |
| } |
| private static class OrganizationItem extends ThingItem { |
| @@ -420,12 +518,96 @@ public class SchemaOrgParser { |
| String name = getStringProperty(NAME_PROP); |
| return !name.isEmpty() ? name : getStringProperty(LEGAL_NAME_PROP); |
| } |
| + |
| + @Override |
| + public String generateOutput() { |
| + return ""; |
| + } |
| + } |
| + |
| + static class RecipeItem extends ThingItem { |
| + |
| + RecipeItem() { |
| + super(Type.RECIPE); |
| + |
| + addStringPropertyName(AUTHOR_PROP); |
| + addStringPropertyName(CREATOR_PROP); |
| + addStringPropertyName(COOK_TIME); |
| + addStringPropertyName(COOKING_METHOD); |
| + addStringPropertyName(NUTRITION); |
| + addStringPropertyName(PREP_TIME); |
| + addStringPropertyName(RECIPE_CATEGORY); |
| + addStringPropertyName(RECIPE_CUISINE); |
| + addStringPropertyName(RECIPE_INGREDIENT); |
| + addStringPropertyName(INGREDIENTS); |
| + addStringPropertyName(RECIPE_INSTRUCTIONS); |
| + addStringPropertyName(RECIPE_YIELD); |
| + addStringPropertyName(TOTAL_TIME); |
| + |
| + addItemPropertyName(AUTHOR_PROP); |
| + addItemPropertyName(CREATOR_PROP); |
| + } |
| + |
| + final Recipe getRecipe() { |
| + Recipe recipe = new Recipe(); |
| + recipe.title = getStringProperty(NAME_PROP); |
| + recipe.imageSrc = getStringProperty(IMAGE_PROP); |
| + recipe.cookTime = DomUtil.formatDuration( |
| + getStringProperty(COOK_TIME)); |
| + recipe.author = retrieveProperty(AUTHOR_PROP); |
| + recipe.creator = retrieveProperty(CREATOR_PROP); |
| + recipe.description = getStringProperty(DESCRIPTION_PROP); |
| + recipe.recipeYield = getStringProperty(RECIPE_YIELD); |
| + recipe.prepTime = DomUtil.formatDuration( |
| + getStringProperty(PREP_TIME)); |
| + recipe.recipeIngredient = getProperty(RECIPE_INGREDIENT).isEmpty() ? |
| + getProperty(INGREDIENTS) : getProperty(RECIPE_INGREDIENT); |
| + recipe.recipeInstructions = getProperty(RECIPE_INSTRUCTIONS); |
| + recipe.totalTime = DomUtil.formatDuration( |
| + getStringProperty(TOTAL_TIME)); |
| + return recipe; |
| + } |
| + |
| + String retrieveProperty(String property) { |
| + String value = getStringProperty(property); |
| + if(value.isEmpty()) { |
| + ThingItem itemProperty = getItemProperty(property); |
| + if (itemProperty != null) { |
| + value = itemProperty.generateOutput(); |
| + } |
| + } |
| + return value; |
| + } |
| + |
| + @Override |
| + public String generateOutput() { |
| + return MarkupGenerator.generateMarkup(getRecipe()); |
| + } |
| + |
| + class Recipe { |
| + String title; |
| + String imageSrc; |
| + String author; |
| + String creator; |
| + String description; |
| + String recipeYield; |
| + String prepTime; |
| + String cookTime; |
| + String totalTime; |
| + List<String> recipeIngredient; |
| + List<String> recipeInstructions; |
| + } |
| } |
| private static class UnsupportedItem extends ThingItem { |
| UnsupportedItem() { |
| super(Type.UNSUPPORTED); |
| } |
| + |
| + @Override |
| + public String generateOutput() { |
| + return ""; |
| + } |
| } |
| private static boolean isItemScope(Element e) { |