Chromium Code Reviews| Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| index 5b4eb0037ded139f59cadf45ba8df2cb4622e4f6..bfe0481d8fa847caee126691ee2f65aec5a0206c 100644 |
| --- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| +++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| @@ -7,6 +7,8 @@ package org.chromium.distiller.extractors.embeds; |
| import com.google.gwt.dom.client.Element; |
| import com.google.gwt.dom.client.ImageElement; |
| import org.chromium.distiller.LogUtil; |
| +import com.google.gwt.dom.client.NodeList; |
|
wychen
2016/06/07 16:27:20
import should be sorted.
marcelorcorrea
2016/06/07 17:02:25
Done.
|
| +import org.chromium.distiller.webdocument.WebFigure; |
| import org.chromium.distiller.webdocument.WebImage; |
| import java.util.HashSet; |
| @@ -18,12 +20,18 @@ import java.util.Set; |
| */ |
| public class ImageExtractor implements EmbedExtractor { |
| private static final Set<String> relevantTags = new HashSet<>(); |
| + private String imgSrc; |
| + private int width; |
| + private int height; |
| + |
| static { |
| // TODO(mdjones): Add "DIV" to this list for css images and possibly captions. |
| relevantTags.add("IMG"); |
| + relevantTags.add("FIGURE"); |
| } |
| + |
| private static final String[] LAZY_IMAGE_ATTRIBUTES = |
| - {"data-src", "data-original", "datasrc", "data-url"}; |
| + {"data-src", "data-original", "datasrc", "data-url"}; |
| @Override |
| public Set<String> getRelevantTagNames() { |
| @@ -35,17 +43,13 @@ public class ImageExtractor implements EmbedExtractor { |
| if (!relevantTags.contains(e.getTagName())) { |
| return null; |
| } |
| - String imgSrc = ""; |
| - // Getting OffSetWidth/Height as default values, even they are |
| - // affected by padding, border, etc. |
| - int width = e.getOffsetWidth(); |
| - int height = e.getOffsetHeight(); |
| + |
| if ("IMG".equals(e.getTagName())) { |
| // This will get the absolute URL of the image and |
| // the displayed image dimension. |
| ImageElement imageElement = ImageElement.as(e); |
| // Try to get lazily-loaded images before falling back to get the src attribute. |
| - for(String attr: LAZY_IMAGE_ATTRIBUTES) { |
| + for (String attr : LAZY_IMAGE_ATTRIBUTES) { |
| imgSrc = imageElement.getAttribute(attr); |
| if (!imgSrc.isEmpty()) |
| break; |
| @@ -61,12 +65,39 @@ public class ImageExtractor implements EmbedExtractor { |
| // to get the real dimensions. |
| width = imageElement.getWidth(); |
| height = imageElement.getHeight(); |
| + extractImageAttributes(ImageElement.as(e)); |
| + } |
| + if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
| + LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
| + } |
| + return new WebImage(e, width, height, imgSrc); |
| + |
| + } else if ("FIGURE".equals(e.getTagName())) { |
| + Element img = getFirstElementByTagName(e, "IMG"); |
| + if (img != null) { |
| + String caption = ""; |
| + extractImageAttributes(ImageElement.as(img)); |
| + Element cap = getFirstElementByTagName(e, "FIGCAPTION"); |
| + if (cap != null) { |
| + caption = cap.getInnerText(); |
| + } |
| + return new WebFigure(img, width, height, imgSrc, caption); |
| } |
| } |
| + return null; |
| + } |
| + |
| + private void extractImageAttributes(ImageElement img) { |
|
wychen
2016/06/07 16:27:20
Let's support lazily-loaded images in <figure> as
marcelorcorrea
2016/06/07 17:02:25
Done.
|
| + imgSrc = img.getSrc(); |
| + width = img.getWidth(); |
| + height = img.getHeight(); |
| + } |
| - if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
| - LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
| + private Element getFirstElementByTagName(Element e, String tagName) { |
| + NodeList<Element> elements = e.getElementsByTagName(tagName); |
| + if (elements.getLength() > 0) { |
| + return elements.getItem(0); |
| } |
| - return new WebImage(e, width, height, imgSrc); |
| + return null; |
| } |
| } |