| Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
|
| diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
|
| index c9b527a117c8c5d99d483b94de5ff2bd6d486f9c..5b4eb0037ded139f59cadf45ba8df2cb4622e4f6 100644
|
| --- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
|
| +++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
|
| @@ -6,6 +6,7 @@ package org.chromium.distiller.extractors.embeds;
|
|
|
| import com.google.gwt.dom.client.Element;
|
| import com.google.gwt.dom.client.ImageElement;
|
| +import org.chromium.distiller.LogUtil;
|
| import org.chromium.distiller.webdocument.WebImage;
|
|
|
| import java.util.HashSet;
|
| @@ -21,6 +22,8 @@ public class ImageExtractor implements EmbedExtractor {
|
| // TODO(mdjones): Add "DIV" to this list for css images and possibly captions.
|
| relevantTags.add("IMG");
|
| }
|
| + private static final String[] LAZY_IMAGE_ATTRIBUTES =
|
| + {"data-src", "data-original", "datasrc", "data-url"};
|
|
|
| @Override
|
| public Set<String> getRelevantTagNames() {
|
| @@ -41,13 +44,29 @@ public class ImageExtractor implements EmbedExtractor {
|
| // This will get the absolute URL of the image and
|
| // the displayed image dimension.
|
| ImageElement imageElement = ImageElement.as(e);
|
| - imgSrc = imageElement.getSrc();
|
| - // As an ImageElement is manipulated here, it is possible
|
| - // to get the real dimensions.
|
| - width = imageElement.getWidth();
|
| - height = imageElement.getHeight();
|
| + // Try to get lazily-loaded images before falling back to get the src attribute.
|
| + for(String attr: LAZY_IMAGE_ATTRIBUTES) {
|
| + imgSrc = imageElement.getAttribute(attr);
|
| + if (!imgSrc.isEmpty())
|
| + break;
|
| + }
|
| + if (!imgSrc.isEmpty()) {
|
| + // We cannot trust the dimension if the image is not loaded yet.
|
| + // In some cases there are 1x1 placeholder images.
|
| + width = 0;
|
| + height = 0;
|
| + } else {
|
| + imgSrc = imageElement.getSrc();
|
| + // As an ImageElement is manipulated here, it is possible
|
| + // to get the real dimensions.
|
| + width = imageElement.getWidth();
|
| + height = imageElement.getHeight();
|
| + }
|
| }
|
|
|
| + if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
|
| + LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
|
| + }
|
| return new WebImage(e, width, height, imgSrc);
|
| }
|
| }
|
|
|