| Index: java/org/chromium/distiller/SchemaOrgParser.java
|
| diff --git a/java/org/chromium/distiller/SchemaOrgParser.java b/java/org/chromium/distiller/SchemaOrgParser.java
|
| index 89c09f5f187e2d5345242a86a6b7ef4ca77cb389..df5899abff50ba18c21ce8fac7f52451c210b6ad 100644
|
| --- a/java/org/chromium/distiller/SchemaOrgParser.java
|
| +++ b/java/org/chromium/distiller/SchemaOrgParser.java
|
| @@ -37,6 +37,8 @@ public class SchemaOrgParser {
|
| static final String DESCRIPTION_PROP = "description";
|
| static final String IMAGE_PROP = "image";
|
| static final String HEADLINE_PROP = "headline";
|
| + static final String MAIN_ENTITY_OF_PAGE = "mainEntityOfPage";
|
| + static final String MAIN_ENTITY = "mainEntity";
|
| static final String PUBLISHER_PROP = "publisher";
|
| static final String COPYRIGHT_HOLDER_PROP = "copyrightHolder";
|
| static final String COPYRIGHT_YEAR_PROP = "copyrightYear";
|
| @@ -57,35 +59,51 @@ public class SchemaOrgParser {
|
| static final String GIVEN_NAME_PROP = "givenName";
|
| static final String LEGAL_NAME_PROP = "legalName";
|
| static final String AUTHOR_REL = "author";
|
| + static final String COOK_TIME = "cookTime";
|
| + static final String COOKING_METHOD = "cookingMethod";
|
| + static final String NUTRITION = "nutrition";
|
| + static final String PREP_TIME = "prepTime";
|
| + static final String RECIPE_CATEGORY = "recipeCategory";
|
| + static final String RECIPE_CUISINE = "recipeCuisine";
|
| + static final String RECIPE_INGREDIENT = "recipeIngredient";
|
| + static final String INGREDIENTS = "ingredients";
|
| + static final String RECIPE_INSTRUCTIONS = "recipeInstructions";
|
| + static final String RECIPE_YIELD = "recipeYield";
|
| + static final String TOTAL_TIME = "totalTime";
|
|
|
| enum Type { // All these types are extended from Thing, directly or indirectly.
|
| IMAGE,
|
| ARTICLE,
|
| PERSON,
|
| ORGANIZATION,
|
| + RECIPE,
|
| UNSUPPORTED,
|
| }
|
|
|
| - static class ThingItem {
|
| + static abstract class ThingItem {
|
| private final Element mElement;
|
| private final Type mType;
|
| - private final Map<String, String> mStringProperties;
|
| + private final Map<String, List<String>> mStringProperties;
|
| private final Map<String, ThingItem> mItemProperties;
|
|
|
| ThingItem(Type type, Element element) {
|
| mElement = element;
|
| mType = type;
|
| - mStringProperties = new HashMap<String, String>();
|
| + mStringProperties = new HashMap<String, List<String>>();
|
| mItemProperties = new HashMap<String, ThingItem>();
|
|
|
| - addStringPropertyName(NAME_PROP);
|
| - addStringPropertyName(URL_PROP);
|
| - addStringPropertyName(DESCRIPTION_PROP);
|
| - addStringPropertyName(IMAGE_PROP);
|
| + addStringPropertyName(NAME_PROP);
|
| + addStringPropertyName(URL_PROP);
|
| + addStringPropertyName(DESCRIPTION_PROP);
|
| + addStringPropertyName(IMAGE_PROP);
|
| + addStringPropertyName(MAIN_ENTITY_OF_PAGE);
|
| + addStringPropertyName(MAIN_ENTITY);
|
| }
|
|
|
| final void addStringPropertyName(String name) {
|
| - mStringProperties.put(name, "");
|
| + if (mStringProperties.get(name) == null) {
|
| + mStringProperties.put(name, new ArrayList<String>());
|
| + }
|
| }
|
|
|
| final void addItemPropertyName(String name) {
|
| @@ -93,7 +111,11 @@ public class SchemaOrgParser {
|
| }
|
|
|
| final String getStringProperty(String name) {
|
| - return !mStringProperties.containsKey(name) ? "" : mStringProperties.get(name);
|
| + return !mStringProperties.containsKey(name) ? "" : DomUtil.join(mStringProperties.get(name).toArray(), ", ");
|
| + }
|
| +
|
| + final List<String> getProperty(String name) {
|
| + return mStringProperties.get(name);
|
| }
|
|
|
| final ThingItem getItemProperty(String name) {
|
| @@ -104,11 +126,14 @@ public class SchemaOrgParser {
|
|
|
| final boolean isSupported() { return mType != Type.UNSUPPORTED; }
|
|
|
| - // Store |value| for property with |name|, unless the property already has a non-empty
|
| - // value, in which case |value| will be ignored. This means we only keep the first value.
|
| + // Store |value| for property with |name|. Values are added
|
| + // into a list indexed by |name|. Using a list of values is
|
| + // necessary in order to support duplicated properties like
|
| + // 'recipeIngredient' which appears frequently more than once
|
| + // in a Recipe.
|
| final void putStringValue(String name, String value) {
|
| - if (mStringProperties.containsKey(name) && mStringProperties.get(name).isEmpty()) {
|
| - mStringProperties.put(name, value);
|
| + if (mStringProperties.containsKey(name)) {
|
| + mStringProperties.get(name).add(value);
|
| }
|
| }
|
|
|
| @@ -118,6 +143,13 @@ public class SchemaOrgParser {
|
| if (mItemProperties.containsKey(name)) mItemProperties.put(name, value);
|
| }
|
|
|
| + /**
|
| + * Derived items should provide their own implementations.
|
| + *
|
| + * @return HTML output.
|
| + */
|
| + public abstract String generateOutput();
|
| +
|
| final Element getElement() {
|
| return mElement;
|
| }
|
| @@ -142,6 +174,7 @@ public class SchemaOrgParser {
|
| sTypeUrls.put("http://schema.org/EducationalOrganization", Type.ORGANIZATION);
|
| sTypeUrls.put("http://schema.org/GovernmentOrganization", Type.ORGANIZATION);
|
| sTypeUrls.put("http://schema.org/NGO", Type.ORGANIZATION);
|
| + sTypeUrls.put("http://schema.org/Recipe", Type.RECIPE);
|
| }
|
|
|
| private final TimingInfo mTimingInfo;
|
| @@ -174,6 +207,43 @@ public class SchemaOrgParser {
|
| return images;
|
| }
|
|
|
| + /**
|
| + * Get the main entity of a page if any.
|
| + *
|
| + * Main entity of a page is retrieved by the mainEntityOfPage
|
| + * or by its inverse property mainEntity.
|
| + *
|
| + * @return ThingItem which is the main entity of this page.
|
| + */
|
| + final ThingItem getMainEntity() {
|
| + for (ThingItem mItemScope : mItemScopes) {
|
| + if(!mItemScope.getStringProperty(MAIN_ENTITY).isEmpty() ||
|
| + !mItemScope.getStringProperty(MAIN_ENTITY_OF_PAGE).isEmpty()) {
|
| + return mItemScope;
|
| + }
|
| + }
|
| + return null;
|
| + }
|
| +
|
| + /**
|
| + * Look for all top level entities on the page. As we are
|
| + * representing the graph hierarchy by having a set of
|
| + * children in every node, the top level entities (roots) are those
|
| + * nodes that do not appear as children in any other node.
|
| + *
|
| + * @return List<ThingItem> which are considered top level entities.
|
| + */
|
| + final List<ThingItem> getTopLevelEntities() {
|
| + List<ThingItem> candidates = new ArrayList<>(mItemScopes);
|
| + for (ThingItem mItemScope : mItemScopes) {
|
| + for (Map.Entry<String, ThingItem> entry :
|
| + mItemScope.mItemProperties.entrySet()) {
|
| + candidates.remove(entry.getValue());
|
| + }
|
| + }
|
| + return candidates;
|
| + }
|
| +
|
| final String getAuthorFromRel() { return mAuthorFromRel; }
|
|
|
| private void parse(Element root) {
|
| @@ -278,6 +348,9 @@ public class SchemaOrgParser {
|
| case ORGANIZATION:
|
| newItem = new OrganizationItem(e);
|
| break;
|
| + case RECIPE:
|
| + newItem = new RecipeItem(e);
|
| + break;
|
| case UNSUPPORTED:
|
| newItem = new UnsupportedItem(e);
|
| break;
|
| @@ -313,6 +386,11 @@ public class SchemaOrgParser {
|
| image.height = JavaScript.parseInt(getStringProperty(HEIGHT_PROP), 10);
|
| return image;
|
| }
|
| +
|
| + @Override
|
| + public String generateOutput() {
|
| + return "";
|
| + }
|
| }
|
|
|
| static class ArticleItem extends ThingItem {
|
| @@ -389,9 +467,14 @@ public class SchemaOrgParser {
|
| image.url = imageUrl;
|
| return image;
|
| }
|
| +
|
| + @Override
|
| + public String generateOutput() {
|
| + return "";
|
| + }
|
| }
|
|
|
| - private static class PersonItem extends ThingItem {
|
| + static class PersonItem extends ThingItem {
|
| PersonItem(Element element) {
|
| super(Type.PERSON, element);
|
|
|
| @@ -406,9 +489,98 @@ public class SchemaOrgParser {
|
| return !name.isEmpty() ? name :
|
| concat(getStringProperty(GIVEN_NAME_PROP), getStringProperty(FAMILY_NAME_PROP));
|
| }
|
| +
|
| + Person getPerson() {
|
| + Person person = new Person();
|
| + person.name = getName();
|
| + return person;
|
| + }
|
| +
|
| + class Person {
|
| + String name;
|
| + }
|
| +
|
| + @Override
|
| + public String generateOutput() {
|
| + return MarkupGenerator.generateMarkup(getPerson());
|
| + }
|
| }
|
|
|
| - private static class OrganizationItem extends ThingItem {
|
| + static class RecipeItem extends ThingItem {
|
| +
|
| + RecipeItem(Element element) {
|
| + super(Type.RECIPE, element);
|
| +
|
| + addStringPropertyName(AUTHOR_PROP);
|
| + addStringPropertyName(CREATOR_PROP);
|
| + addStringPropertyName(COOK_TIME);
|
| + addStringPropertyName(COOKING_METHOD);
|
| + addStringPropertyName(NUTRITION);
|
| + addStringPropertyName(PREP_TIME);
|
| + addStringPropertyName(RECIPE_CATEGORY);
|
| + addStringPropertyName(RECIPE_CUISINE);
|
| + addStringPropertyName(RECIPE_INGREDIENT);
|
| + addStringPropertyName(INGREDIENTS);
|
| + addStringPropertyName(RECIPE_INSTRUCTIONS);
|
| + addStringPropertyName(RECIPE_YIELD);
|
| + addStringPropertyName(TOTAL_TIME);
|
| +
|
| + addItemPropertyName(AUTHOR_PROP);
|
| + addItemPropertyName(CREATOR_PROP);
|
| + }
|
| +
|
| + final Recipe getRecipe() {
|
| + Recipe recipe = new Recipe();
|
| + recipe.title = getStringProperty(NAME_PROP);
|
| + recipe.imageSrc = getStringProperty(IMAGE_PROP);
|
| + recipe.cookTime = DomUtil.formatDuration(
|
| + getStringProperty(COOK_TIME));
|
| + recipe.author = retrieveProperty(AUTHOR_PROP);
|
| + recipe.creator = retrieveProperty(CREATOR_PROP);
|
| + recipe.description = getStringProperty(DESCRIPTION_PROP);
|
| + recipe.recipeYield = getStringProperty(RECIPE_YIELD);
|
| + recipe.prepTime = DomUtil.formatDuration(
|
| + getStringProperty(PREP_TIME));
|
| + recipe.recipeIngredient = getProperty(RECIPE_INGREDIENT).isEmpty() ?
|
| + getProperty(INGREDIENTS) : getProperty(RECIPE_INGREDIENT);
|
| + recipe.recipeInstructions = getProperty(RECIPE_INSTRUCTIONS);
|
| + recipe.totalTime = DomUtil.formatDuration(
|
| + getStringProperty(TOTAL_TIME));
|
| + return recipe;
|
| + }
|
| +
|
| + String retrieveProperty(String property) {
|
| + String value = getStringProperty(property);
|
| + if(value.isEmpty()) {
|
| + ThingItem itemProperty = getItemProperty(property);
|
| + if (itemProperty != null) {
|
| + value = itemProperty.generateOutput();
|
| + }
|
| + }
|
| + return value;
|
| + }
|
| +
|
| + @Override
|
| + public String generateOutput() {
|
| + return MarkupGenerator.generateMarkup(getRecipe());
|
| + }
|
| +
|
| + class Recipe {
|
| + String title;
|
| + String imageSrc;
|
| + String author;
|
| + String creator;
|
| + String description;
|
| + String recipeYield;
|
| + String prepTime;
|
| + String cookTime;
|
| + String totalTime;
|
| + List<String> recipeIngredient;
|
| + List<String> recipeInstructions;
|
| + }
|
| + }
|
| +
|
| + static class OrganizationItem extends ThingItem {
|
| OrganizationItem(Element element) {
|
| super(Type.ORGANIZATION, element);
|
|
|
| @@ -420,12 +592,22 @@ public class SchemaOrgParser {
|
| String name = getStringProperty(NAME_PROP);
|
| return !name.isEmpty() ? name : getStringProperty(LEGAL_NAME_PROP);
|
| }
|
| +
|
| + @Override
|
| + public String generateOutput() {
|
| + return "";
|
| + }
|
| }
|
|
|
| private static class UnsupportedItem extends ThingItem {
|
| UnsupportedItem(Element element) {
|
| super(Type.UNSUPPORTED, element);
|
| }
|
| +
|
| + @Override
|
| + public String generateOutput() {
|
| + return "";
|
| + }
|
| }
|
|
|
| private static boolean isItemScope(Element e) {
|
|
|