Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(431)

Unified Diff: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java

Issue 2000093005: Support extraction of lazily-loaded images (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: fix comments Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
index c9b527a117c8c5d99d483b94de5ff2bd6d486f9c..5b4eb0037ded139f59cadf45ba8df2cb4622e4f6 100644
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
@@ -6,6 +6,7 @@ package org.chromium.distiller.extractors.embeds;
import com.google.gwt.dom.client.Element;
import com.google.gwt.dom.client.ImageElement;
+import org.chromium.distiller.LogUtil;
import org.chromium.distiller.webdocument.WebImage;
import java.util.HashSet;
@@ -21,6 +22,8 @@ public class ImageExtractor implements EmbedExtractor {
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions.
relevantTags.add("IMG");
}
+ private static final String[] LAZY_IMAGE_ATTRIBUTES =
+ {"data-src", "data-original", "datasrc", "data-url"};
@Override
public Set<String> getRelevantTagNames() {
@@ -41,13 +44,29 @@ public class ImageExtractor implements EmbedExtractor {
// This will get the absolute URL of the image and
// the displayed image dimension.
ImageElement imageElement = ImageElement.as(e);
- imgSrc = imageElement.getSrc();
- // As an ImageElement is manipulated here, it is possible
- // to get the real dimensions.
- width = imageElement.getWidth();
- height = imageElement.getHeight();
+ // Try to get lazily-loaded images before falling back to get the src attribute.
+ for(String attr: LAZY_IMAGE_ATTRIBUTES) {
+ imgSrc = imageElement.getAttribute(attr);
+ if (!imgSrc.isEmpty())
+ break;
+ }
+ if (!imgSrc.isEmpty()) {
+ // We cannot trust the dimension if the image is not loaded yet.
+ // In some cases there are 1x1 placeholder images.
+ width = 0;
+ height = 0;
+ } else {
+ imgSrc = imageElement.getSrc();
+ // As an ImageElement is manipulated here, it is possible
+ // to get the real dimensions.
+ width = imageElement.getWidth();
+ height = imageElement.getHeight();
+ }
}
+ if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
+ LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
+ }
return new WebImage(e, width, height, imgSrc);
}
}
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698