Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
index 5b4eb0037ded139f59cadf45ba8df2cb4622e4f6..bfe0481d8fa847caee126691ee2f65aec5a0206c 100644 |
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
@@ -7,6 +7,8 @@ package org.chromium.distiller.extractors.embeds; |
import com.google.gwt.dom.client.Element; |
import com.google.gwt.dom.client.ImageElement; |
import org.chromium.distiller.LogUtil; |
+import com.google.gwt.dom.client.NodeList; |
wychen
2016/06/07 16:27:20
import should be sorted.
marcelorcorrea
2016/06/07 17:02:25
Done.
|
+import org.chromium.distiller.webdocument.WebFigure; |
import org.chromium.distiller.webdocument.WebImage; |
import java.util.HashSet; |
@@ -18,12 +20,18 @@ import java.util.Set; |
*/ |
public class ImageExtractor implements EmbedExtractor { |
private static final Set<String> relevantTags = new HashSet<>(); |
+ private String imgSrc; |
+ private int width; |
+ private int height; |
+ |
static { |
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions. |
relevantTags.add("IMG"); |
+ relevantTags.add("FIGURE"); |
} |
+ |
private static final String[] LAZY_IMAGE_ATTRIBUTES = |
- {"data-src", "data-original", "datasrc", "data-url"}; |
+ {"data-src", "data-original", "datasrc", "data-url"}; |
@Override |
public Set<String> getRelevantTagNames() { |
@@ -35,17 +43,13 @@ public class ImageExtractor implements EmbedExtractor { |
if (!relevantTags.contains(e.getTagName())) { |
return null; |
} |
- String imgSrc = ""; |
- // Getting OffSetWidth/Height as default values, even they are |
- // affected by padding, border, etc. |
- int width = e.getOffsetWidth(); |
- int height = e.getOffsetHeight(); |
+ |
if ("IMG".equals(e.getTagName())) { |
// This will get the absolute URL of the image and |
// the displayed image dimension. |
ImageElement imageElement = ImageElement.as(e); |
// Try to get lazily-loaded images before falling back to get the src attribute. |
- for(String attr: LAZY_IMAGE_ATTRIBUTES) { |
+ for (String attr : LAZY_IMAGE_ATTRIBUTES) { |
imgSrc = imageElement.getAttribute(attr); |
if (!imgSrc.isEmpty()) |
break; |
@@ -61,12 +65,39 @@ public class ImageExtractor implements EmbedExtractor { |
// to get the real dimensions. |
width = imageElement.getWidth(); |
height = imageElement.getHeight(); |
+ extractImageAttributes(ImageElement.as(e)); |
+ } |
+ if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
+ LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
+ } |
+ return new WebImage(e, width, height, imgSrc); |
+ |
+ } else if ("FIGURE".equals(e.getTagName())) { |
+ Element img = getFirstElementByTagName(e, "IMG"); |
+ if (img != null) { |
+ String caption = ""; |
+ extractImageAttributes(ImageElement.as(img)); |
+ Element cap = getFirstElementByTagName(e, "FIGCAPTION"); |
+ if (cap != null) { |
+ caption = cap.getInnerText(); |
+ } |
+ return new WebFigure(img, width, height, imgSrc, caption); |
} |
} |
+ return null; |
+ } |
+ |
+ private void extractImageAttributes(ImageElement img) { |
wychen
2016/06/07 16:27:20
Let's support lazily-loaded images in <figure> as
marcelorcorrea
2016/06/07 17:02:25
Done.
|
+ imgSrc = img.getSrc(); |
+ width = img.getWidth(); |
+ height = img.getHeight(); |
+ } |
- if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
- LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
+ private Element getFirstElementByTagName(Element e, String tagName) { |
+ NodeList<Element> elements = e.getElementsByTagName(tagName); |
+ if (elements.getLength() > 0) { |
+ return elements.getItem(0); |
} |
- return new WebImage(e, width, height, imgSrc); |
+ return null; |
} |
} |