Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(477)

Unified Diff: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java

Issue 2020403002: Add support for figure element (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: merged from master Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebFigure.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
index 5b4eb0037ded139f59cadf45ba8df2cb4622e4f6..bfe0481d8fa847caee126691ee2f65aec5a0206c 100644
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
@@ -7,6 +7,8 @@ package org.chromium.distiller.extractors.embeds;
import com.google.gwt.dom.client.Element;
import com.google.gwt.dom.client.ImageElement;
import org.chromium.distiller.LogUtil;
+import com.google.gwt.dom.client.NodeList;
wychen 2016/06/07 16:27:20 import should be sorted.
marcelorcorrea 2016/06/07 17:02:25 Done.
+import org.chromium.distiller.webdocument.WebFigure;
import org.chromium.distiller.webdocument.WebImage;
import java.util.HashSet;
@@ -18,12 +20,18 @@ import java.util.Set;
*/
public class ImageExtractor implements EmbedExtractor {
private static final Set<String> relevantTags = new HashSet<>();
+ private String imgSrc;
+ private int width;
+ private int height;
+
static {
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions.
relevantTags.add("IMG");
+ relevantTags.add("FIGURE");
}
+
private static final String[] LAZY_IMAGE_ATTRIBUTES =
- {"data-src", "data-original", "datasrc", "data-url"};
+ {"data-src", "data-original", "datasrc", "data-url"};
@Override
public Set<String> getRelevantTagNames() {
@@ -35,17 +43,13 @@ public class ImageExtractor implements EmbedExtractor {
if (!relevantTags.contains(e.getTagName())) {
return null;
}
- String imgSrc = "";
- // Getting OffSetWidth/Height as default values, even they are
- // affected by padding, border, etc.
- int width = e.getOffsetWidth();
- int height = e.getOffsetHeight();
+
if ("IMG".equals(e.getTagName())) {
// This will get the absolute URL of the image and
// the displayed image dimension.
ImageElement imageElement = ImageElement.as(e);
// Try to get lazily-loaded images before falling back to get the src attribute.
- for(String attr: LAZY_IMAGE_ATTRIBUTES) {
+ for (String attr : LAZY_IMAGE_ATTRIBUTES) {
imgSrc = imageElement.getAttribute(attr);
if (!imgSrc.isEmpty())
break;
@@ -61,12 +65,39 @@ public class ImageExtractor implements EmbedExtractor {
// to get the real dimensions.
width = imageElement.getWidth();
height = imageElement.getHeight();
+ extractImageAttributes(ImageElement.as(e));
+ }
+ if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
+ LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
+ }
+ return new WebImage(e, width, height, imgSrc);
+
+ } else if ("FIGURE".equals(e.getTagName())) {
+ Element img = getFirstElementByTagName(e, "IMG");
+ if (img != null) {
+ String caption = "";
+ extractImageAttributes(ImageElement.as(img));
+ Element cap = getFirstElementByTagName(e, "FIGCAPTION");
+ if (cap != null) {
+ caption = cap.getInnerText();
+ }
+ return new WebFigure(img, width, height, imgSrc, caption);
}
}
+ return null;
+ }
+
+ private void extractImageAttributes(ImageElement img) {
wychen 2016/06/07 16:27:20 Let's support lazily-loaded images in <figure> as
marcelorcorrea 2016/06/07 17:02:25 Done.
+ imgSrc = img.getSrc();
+ width = img.getWidth();
+ height = img.getHeight();
+ }
- if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
- LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
+ private Element getFirstElementByTagName(Element e, String tagName) {
+ NodeList<Element> elements = e.getElementsByTagName(tagName);
+ if (elements.getLength() > 0) {
+ return elements.getItem(0);
}
- return new WebImage(e, width, height, imgSrc);
+ return null;
}
}
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebFigure.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698