Chromium Code Reviews| Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| index c9b527a117c8c5d99d483b94de5ff2bd6d486f9c..14ee360458c0387f8915c1057666a2ce1dfe3555 100644 |
| --- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| +++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java |
| @@ -6,6 +6,7 @@ package org.chromium.distiller.extractors.embeds; |
| import com.google.gwt.dom.client.Element; |
| import com.google.gwt.dom.client.ImageElement; |
| +import org.chromium.distiller.LogUtil; |
| import org.chromium.distiller.webdocument.WebImage; |
| import java.util.HashSet; |
| @@ -21,6 +22,7 @@ public class ImageExtractor implements EmbedExtractor { |
| // TODO(mdjones): Add "DIV" to this list for css images and possibly captions. |
| relevantTags.add("IMG"); |
| } |
| + private static final String[] LAZY_IMAGE_ATTRIBUTES = {"data-src", "data-original", "datasrc", "data-url"}; |
| @Override |
| public Set<String> getRelevantTagNames() { |
| @@ -41,13 +43,29 @@ public class ImageExtractor implements EmbedExtractor { |
| // This will get the absolute URL of the image and |
| // the displayed image dimension. |
| ImageElement imageElement = ImageElement.as(e); |
| - imgSrc = imageElement.getSrc(); |
| - // As an ImageElement is manipulated here, it is possible |
| - // to get the real dimensions. |
| - width = imageElement.getWidth(); |
| - height = imageElement.getHeight(); |
| + // Try to get lazily-loaded images. |
|
mdjones
2016/06/03 23:43:38
Try to get lazily-loaded images before falling bac
wychen
2016/06/04 00:24:40
Done.
|
| + for(String attr: LAZY_IMAGE_ATTRIBUTES) { |
| + imgSrc = imageElement.getAttribute(attr); |
| + if (!imgSrc.isEmpty()) |
| + break; |
| + } |
| + if (!imgSrc.isEmpty()) { |
| + // We cannot trust the dimension if the image is not loaded yet. |
| + // In some cases there are 1x1 placeholder images. |
| + width = 0; |
| + height = 0; |
| + } else { |
| + imgSrc = imageElement.getSrc(); |
| + // As an ImageElement is manipulated here, it is possible |
| + // to get the real dimensions. |
| + width = imageElement.getWidth(); |
| + height = imageElement.getHeight(); |
| + } |
| } |
| + if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
| + LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
| + } |
| return new WebImage(e, width, height, imgSrc); |
| } |
| } |