Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
index c9b527a117c8c5d99d483b94de5ff2bd6d486f9c..5b4eb0037ded139f59cadf45ba8df2cb4622e4f6 100644 |
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
@@ -6,6 +6,7 @@ package org.chromium.distiller.extractors.embeds; |
import com.google.gwt.dom.client.Element; |
import com.google.gwt.dom.client.ImageElement; |
+import org.chromium.distiller.LogUtil; |
import org.chromium.distiller.webdocument.WebImage; |
import java.util.HashSet; |
@@ -21,6 +22,8 @@ public class ImageExtractor implements EmbedExtractor { |
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions. |
relevantTags.add("IMG"); |
} |
+ private static final String[] LAZY_IMAGE_ATTRIBUTES = |
+ {"data-src", "data-original", "datasrc", "data-url"}; |
@Override |
public Set<String> getRelevantTagNames() { |
@@ -41,13 +44,29 @@ public class ImageExtractor implements EmbedExtractor { |
// This will get the absolute URL of the image and |
// the displayed image dimension. |
ImageElement imageElement = ImageElement.as(e); |
- imgSrc = imageElement.getSrc(); |
- // As an ImageElement is manipulated here, it is possible |
- // to get the real dimensions. |
- width = imageElement.getWidth(); |
- height = imageElement.getHeight(); |
+ // Try to get lazily-loaded images before falling back to get the src attribute. |
+ for(String attr: LAZY_IMAGE_ATTRIBUTES) { |
+ imgSrc = imageElement.getAttribute(attr); |
+ if (!imgSrc.isEmpty()) |
+ break; |
+ } |
+ if (!imgSrc.isEmpty()) { |
+ // We cannot trust the dimension if the image is not loaded yet. |
+ // In some cases there are 1x1 placeholder images. |
+ width = 0; |
+ height = 0; |
+ } else { |
+ imgSrc = imageElement.getSrc(); |
+ // As an ImageElement is manipulated here, it is possible |
+ // to get the real dimensions. |
+ width = imageElement.getWidth(); |
+ height = imageElement.getHeight(); |
+ } |
} |
+ if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
+ LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
+ } |
return new WebImage(e, width, height, imgSrc); |
} |
} |