Index: java/org/chromium/distiller/SchemaOrgParser.java |
diff --git a/java/org/chromium/distiller/SchemaOrgParser.java b/java/org/chromium/distiller/SchemaOrgParser.java |
index 0386c1cb61e776c60c025ce92441e08738f519aa..5eeb3f78ea9e3f99b0bae449beb149cfa7db87d4 100644 |
--- a/java/org/chromium/distiller/SchemaOrgParser.java |
+++ b/java/org/chromium/distiller/SchemaOrgParser.java |
@@ -38,6 +38,8 @@ public class SchemaOrgParser { |
static final String DESCRIPTION_PROP = "description"; |
static final String IMAGE_PROP = "image"; |
static final String HEADLINE_PROP = "headline"; |
+ static final String MAIN_ENTITY_OF_PAGE = "mainEntityOfPage"; |
+ static final String MAIN_ENTITY = "mainEntity"; |
static final String PUBLISHER_PROP = "publisher"; |
static final String COPYRIGHT_HOLDER_PROP = "copyrightHolder"; |
static final String COPYRIGHT_YEAR_PROP = "copyrightYear"; |
@@ -58,33 +60,49 @@ public class SchemaOrgParser { |
static final String GIVEN_NAME_PROP = "givenName"; |
static final String LEGAL_NAME_PROP = "legalName"; |
static final String AUTHOR_REL = "author"; |
+ static final String COOK_TIME = "cookTime"; |
+ static final String COOKING_METHOD = "cookingMethod"; |
+ static final String NUTRITION = "nutrition"; |
+ static final String PREP_TIME = "prepTime"; |
+ static final String RECIPE_CATEGORY = "recipeCategory"; |
+ static final String RECIPE_CUISINE = "recipeCuisine"; |
+ static final String RECIPE_INGREDIENT = "recipeIngredient"; |
+ static final String INGREDIENTS = "ingredients"; |
+ static final String RECIPE_INSTRUCTIONS = "recipeInstructions"; |
+ static final String RECIPE_YIELD = "recipeYield"; |
+ static final String TOTAL_TIME = "totalTime"; |
enum Type { // All these types are extended from Thing, directly or indirectly. |
IMAGE, |
ARTICLE, |
PERSON, |
ORGANIZATION, |
+ RECIPE, |
UNSUPPORTED, |
} |
- static class ThingItem { |
+ static abstract class ThingItem { |
private final Type mType; |
- private final Map<String, String> mStringProperties; |
+ private final Map<String, List<String>> mStringProperties; |
private final Map<String, ThingItem> mItemProperties; |
ThingItem(Type type) { |
mType = type; |
- mStringProperties = new HashMap<String, String>(); |
+ mStringProperties = new HashMap<String, List<String>>(); |
mItemProperties = new HashMap<String, ThingItem>(); |
addStringPropertyName(NAME_PROP); |
addStringPropertyName(URL_PROP); |
addStringPropertyName(DESCRIPTION_PROP); |
addStringPropertyName(IMAGE_PROP); |
+ addStringPropertyName(MAIN_ENTITY_OF_PAGE); |
+ addStringPropertyName(MAIN_ENTITY); |
} |
final void addStringPropertyName(String name) { |
- mStringProperties.put(name, ""); |
+ if (mStringProperties.get(name) == null) { |
+ mStringProperties.put(name, new ArrayList<String>()); |
+ } |
} |
final void addItemPropertyName(String name) { |
@@ -92,7 +110,11 @@ public class SchemaOrgParser { |
} |
final String getStringProperty(String name) { |
- return !mStringProperties.containsKey(name) ? "" : mStringProperties.get(name); |
+ return !mStringProperties.containsKey(name) ? "" : DomUtil.join(mStringProperties.get(name).toArray(), ", "); |
wychen
2016/03/14 22:58:42
Possible l10n issues here. Not all languages use "
|
+ } |
+ |
+ final List<String> getProperty(String name) { |
+ return mStringProperties.get(name); |
} |
final ThingItem getItemProperty(String name) { |
@@ -103,11 +125,14 @@ public class SchemaOrgParser { |
final boolean isSupported() { return mType != Type.UNSUPPORTED; } |
- // Store |value| for property with |name|, unless the property already has a non-empty |
- // value, in which case |value| will be ignored. This means we only keep the first value. |
+ // Store |value| for property with |name|. Values are added |
+ // into a list indexed by |name|. Using a list of values is |
+ // necessary in order to support duplicated properties like |
+ // 'recipeIngredient' which appears frequently more than once |
+ // in a Recipe. |
final void putStringValue(String name, String value) { |
- if (mStringProperties.containsKey(name) && mStringProperties.get(name).isEmpty()) { |
- mStringProperties.put(name, value); |
+ if (mStringProperties.containsKey(name)) { |
+ mStringProperties.get(name).add(value); |
} |
} |
@@ -116,6 +141,13 @@ public class SchemaOrgParser { |
final void putItemValue(String name, ThingItem value) { |
if (mItemProperties.containsKey(name)) mItemProperties.put(name, value); |
} |
+ |
+ /** |
+ * Derived items should provide their own implementations. |
+ * |
+ * @return HTML output. |
+ */ |
+ public abstract String generateOutput(); |
} |
private final List<ThingItem> mItemScopes = new ArrayList<ThingItem>(); |
@@ -137,6 +169,7 @@ public class SchemaOrgParser { |
sTypeUrls.put("http://schema.org/EducationalOrganization", Type.ORGANIZATION); |
sTypeUrls.put("http://schema.org/GovernmentOrganization", Type.ORGANIZATION); |
sTypeUrls.put("http://schema.org/NGO", Type.ORGANIZATION); |
+ sTypeUrls.put("http://schema.org/Recipe", Type.RECIPE); |
} |
private final TimingInfo mTimingInfo; |
@@ -174,6 +207,43 @@ public class SchemaOrgParser { |
return images; |
} |
+ /** |
+ * Get the main entity of a page if any. |
+ * |
+ * Main entity of a page is retrieved by the mainEntityOfPage |
+ * or by its inverse property mainEntity. |
+ * |
+ * @return ThingItem which is the main entity of this page. |
+ */ |
+ final ThingItem getMainEntity() { |
+ for (ThingItem mItemScope : mItemScopes) { |
+ if(!mItemScope.getStringProperty(MAIN_ENTITY).isEmpty() || |
+ !mItemScope.getStringProperty(MAIN_ENTITY_OF_PAGE).isEmpty()) { |
+ return mItemScope; |
+ } |
+ } |
+ return null; |
+ } |
+ |
+ /** |
+ * Look for all top level entities on the page. As we are |
+ * representing the graph hierarchy by having a set of |
+ * children in every node, the top level entities (roots) are those |
+ * nodes that do not appear as children in any other node. |
+ * |
+ * @return List<ThingItem> which are considered top level entities. |
+ */ |
+ final List<ThingItem> getTopLevelEntities() { |
+ List<ThingItem> candidates = new ArrayList<>(mItemScopes); |
+ for (ThingItem mItemScope : mItemScopes) { |
+ for (Map.Entry<String, ThingItem> entry : |
+ mItemScope.mItemProperties.entrySet()) { |
+ candidates.remove(entry.getValue()); |
+ } |
+ } |
+ return candidates; |
+ } |
+ |
final String getAuthorFromRel() { return mAuthorFromRel; } |
private void parse(Element root) { |
@@ -278,6 +348,9 @@ public class SchemaOrgParser { |
case ORGANIZATION: |
newItem = new OrganizationItem(); |
break; |
+ case RECIPE: |
+ newItem = new RecipeItem(); |
+ break; |
case UNSUPPORTED: |
newItem = new UnsupportedItem(); |
break; |
@@ -313,6 +386,11 @@ public class SchemaOrgParser { |
image.height = JavaScript.parseInt(getStringProperty(HEIGHT_PROP), 10); |
return image; |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
static class ArticleItem extends ThingItem { |
@@ -389,9 +467,14 @@ public class SchemaOrgParser { |
image.url = imageUrl; |
return image; |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
- private static class PersonItem extends ThingItem { |
+ static class PersonItem extends ThingItem { |
PersonItem() { |
super(Type.PERSON); |
@@ -406,6 +489,21 @@ public class SchemaOrgParser { |
return !name.isEmpty() ? name : |
concat(getStringProperty(GIVEN_NAME_PROP), getStringProperty(FAMILY_NAME_PROP)); |
} |
+ |
+ Person getPerson() { |
+ Person person = new Person(); |
+ person.name = getName(); |
+ return person; |
+ } |
+ |
+ class Person { |
+ String name; |
+ } |
+ |
+ @Override |
+ public String generateOutput() { |
+ return MarkupGenerator.generateMarkup(getPerson()); |
+ } |
} |
private static class OrganizationItem extends ThingItem { |
@@ -420,12 +518,96 @@ public class SchemaOrgParser { |
String name = getStringProperty(NAME_PROP); |
return !name.isEmpty() ? name : getStringProperty(LEGAL_NAME_PROP); |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
+ } |
+ |
+ static class RecipeItem extends ThingItem { |
+ |
+ RecipeItem() { |
+ super(Type.RECIPE); |
+ |
+ addStringPropertyName(AUTHOR_PROP); |
+ addStringPropertyName(CREATOR_PROP); |
+ addStringPropertyName(COOK_TIME); |
+ addStringPropertyName(COOKING_METHOD); |
+ addStringPropertyName(NUTRITION); |
+ addStringPropertyName(PREP_TIME); |
+ addStringPropertyName(RECIPE_CATEGORY); |
+ addStringPropertyName(RECIPE_CUISINE); |
+ addStringPropertyName(RECIPE_INGREDIENT); |
+ addStringPropertyName(INGREDIENTS); |
+ addStringPropertyName(RECIPE_INSTRUCTIONS); |
+ addStringPropertyName(RECIPE_YIELD); |
+ addStringPropertyName(TOTAL_TIME); |
+ |
+ addItemPropertyName(AUTHOR_PROP); |
+ addItemPropertyName(CREATOR_PROP); |
+ } |
+ |
+ final Recipe getRecipe() { |
+ Recipe recipe = new Recipe(); |
+ recipe.title = getStringProperty(NAME_PROP); |
+ recipe.imageSrc = getStringProperty(IMAGE_PROP); |
+ recipe.cookTime = DomUtil.formatDuration( |
+ getStringProperty(COOK_TIME)); |
+ recipe.author = retrieveProperty(AUTHOR_PROP); |
+ recipe.creator = retrieveProperty(CREATOR_PROP); |
+ recipe.description = getStringProperty(DESCRIPTION_PROP); |
+ recipe.recipeYield = getStringProperty(RECIPE_YIELD); |
+ recipe.prepTime = DomUtil.formatDuration( |
+ getStringProperty(PREP_TIME)); |
+ recipe.recipeIngredient = getProperty(RECIPE_INGREDIENT).isEmpty() ? |
+ getProperty(INGREDIENTS) : getProperty(RECIPE_INGREDIENT); |
+ recipe.recipeInstructions = getProperty(RECIPE_INSTRUCTIONS); |
+ recipe.totalTime = DomUtil.formatDuration( |
+ getStringProperty(TOTAL_TIME)); |
+ return recipe; |
+ } |
+ |
+ String retrieveProperty(String property) { |
+ String value = getStringProperty(property); |
+ if(value.isEmpty()) { |
+ ThingItem itemProperty = getItemProperty(property); |
+ if (itemProperty != null) { |
+ value = itemProperty.generateOutput(); |
+ } |
+ } |
+ return value; |
+ } |
+ |
+ @Override |
+ public String generateOutput() { |
+ return MarkupGenerator.generateMarkup(getRecipe()); |
+ } |
+ |
+ class Recipe { |
+ String title; |
+ String imageSrc; |
+ String author; |
+ String creator; |
+ String description; |
+ String recipeYield; |
+ String prepTime; |
+ String cookTime; |
+ String totalTime; |
+ List<String> recipeIngredient; |
+ List<String> recipeInstructions; |
+ } |
} |
private static class UnsupportedItem extends ThingItem { |
UnsupportedItem() { |
super(Type.UNSUPPORTED); |
} |
+ |
+ @Override |
+ public String generateOutput() { |
+ return ""; |
+ } |
} |
private static boolean isItemScope(Element e) { |