Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
index c9b527a117c8c5d99d483b94de5ff2bd6d486f9c..a0a8d12163a4e88ba8e11a1acf6f163b603a9053 100644 |
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
@@ -6,6 +6,8 @@ package org.chromium.distiller.extractors.embeds; |
import com.google.gwt.dom.client.Element; |
import com.google.gwt.dom.client.ImageElement; |
+import com.google.gwt.dom.client.NodeList; |
+import org.chromium.distiller.webdocument.WebFigure; |
import org.chromium.distiller.webdocument.WebImage; |
import java.util.HashSet; |
@@ -17,9 +19,15 @@ import java.util.Set; |
*/ |
public class ImageExtractor implements EmbedExtractor { |
private static final Set<String> relevantTags = new HashSet<>(); |
+ private String src; |
+ private String caption; |
wychen
2016/05/31 22:14:46
This doesn't need to be in class scope.
marcelorcorrea
2016/06/01 18:09:54
Done.
|
+ private int width; |
+ private int height; |
+ |
static { |
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions. |
relevantTags.add("IMG"); |
+ relevantTags.add("FIGURE"); |
} |
@Override |
@@ -32,22 +40,41 @@ public class ImageExtractor implements EmbedExtractor { |
if (!relevantTags.contains(e.getTagName())) { |
return null; |
} |
- String imgSrc = ""; |
// Getting OffSetWidth/Height as default values, even they are |
// affected by padding, border, etc. |
- int width = e.getOffsetWidth(); |
- int height = e.getOffsetHeight(); |
+ width = e.getOffsetWidth(); |
+ height = e.getOffsetHeight(); |
+ src = ""; |
+ caption = ""; |
+ |
if ("IMG".equals(e.getTagName())) { |
- // This will get the absolute URL of the image and |
- // the displayed image dimension. |
- ImageElement imageElement = ImageElement.as(e); |
- imgSrc = imageElement.getSrc(); |
- // As an ImageElement is manipulated here, it is possible |
- // to get the real dimensions. |
- width = imageElement.getWidth(); |
- height = imageElement.getHeight(); |
+ extractImageAttributes(ImageElement.as(e)); |
+ return new WebImage(e, width, height, src); |
+ } else if ("FIGURE".equals(e.getTagName())) { |
+ Element img = getFirstElementByTagName(e, "IMG"); |
+ if (img != null) { |
+ extractImageAttributes(ImageElement.as(img)); |
+ Element cap = getFirstElementByTagName(e, "FIGCAPTION"); |
+ if (cap != null) { |
+ caption = cap.getInnerText(); |
+ } |
+ return new WebFigure(img, width, height, src, caption); |
+ } |
} |
+ return null; |
+ } |
- return new WebImage(e, width, height, imgSrc); |
+ private void extractImageAttributes(ImageElement img) { |
+ src = img.getSrc(); |
+ width = img.getWidth(); |
+ height = img.getHeight(); |
+ } |
+ |
+ private Element getFirstElementByTagName(Element e, String tagName) { |
+ NodeList<Element> elements = e.getElementsByTagName(tagName); |
+ if (elements.getLength() > 0) { |
+ return elements.getItem(0); |
+ } |
+ return null; |
} |
} |