Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(675)

Side by Side Diff: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java

Issue 2000093005: Support extraction of lazily-loaded images (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: fix comments Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.extractors.embeds; 5 package org.chromium.distiller.extractors.embeds;
6 6
7 import com.google.gwt.dom.client.Element; 7 import com.google.gwt.dom.client.Element;
8 import com.google.gwt.dom.client.ImageElement; 8 import com.google.gwt.dom.client.ImageElement;
9 import org.chromium.distiller.LogUtil;
9 import org.chromium.distiller.webdocument.WebImage; 10 import org.chromium.distiller.webdocument.WebImage;
10 11
11 import java.util.HashSet; 12 import java.util.HashSet;
12 import java.util.Set; 13 import java.util.Set;
13 14
14 /** 15 /**
15 * This class treats images as another type of embed and provides heuristics for lead image 16 * This class treats images as another type of embed and provides heuristics for lead image
16 * candidacy. 17 * candidacy.
17 */ 18 */
18 public class ImageExtractor implements EmbedExtractor { 19 public class ImageExtractor implements EmbedExtractor {
19 private static final Set<String> relevantTags = new HashSet<>(); 20 private static final Set<String> relevantTags = new HashSet<>();
20 static { 21 static {
21 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. 22 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions.
22 relevantTags.add("IMG"); 23 relevantTags.add("IMG");
23 } 24 }
25 private static final String[] LAZY_IMAGE_ATTRIBUTES =
26 {"data-src", "data-original", "datasrc", "data-url"};
24 27
25 @Override 28 @Override
26 public Set<String> getRelevantTagNames() { 29 public Set<String> getRelevantTagNames() {
27 return relevantTags; 30 return relevantTags;
28 } 31 }
29 32
30 @Override 33 @Override
31 public WebImage extract(Element e) { 34 public WebImage extract(Element e) {
32 if (!relevantTags.contains(e.getTagName())) { 35 if (!relevantTags.contains(e.getTagName())) {
33 return null; 36 return null;
34 } 37 }
35 String imgSrc = ""; 38 String imgSrc = "";
36 // Getting OffSetWidth/Height as default values, even they are 39 // Getting OffSetWidth/Height as default values, even they are
37 // affected by padding, border, etc. 40 // affected by padding, border, etc.
38 int width = e.getOffsetWidth(); 41 int width = e.getOffsetWidth();
39 int height = e.getOffsetHeight(); 42 int height = e.getOffsetHeight();
40 if ("IMG".equals(e.getTagName())) { 43 if ("IMG".equals(e.getTagName())) {
41 // This will get the absolute URL of the image and 44 // This will get the absolute URL of the image and
42 // the displayed image dimension. 45 // the displayed image dimension.
43 ImageElement imageElement = ImageElement.as(e); 46 ImageElement imageElement = ImageElement.as(e);
44 imgSrc = imageElement.getSrc(); 47 // Try to get lazily-loaded images before falling back to get the sr c attribute.
45 // As an ImageElement is manipulated here, it is possible 48 for(String attr: LAZY_IMAGE_ATTRIBUTES) {
46 // to get the real dimensions. 49 imgSrc = imageElement.getAttribute(attr);
47 width = imageElement.getWidth(); 50 if (!imgSrc.isEmpty())
48 height = imageElement.getHeight(); 51 break;
52 }
53 if (!imgSrc.isEmpty()) {
54 // We cannot trust the dimension if the image is not loaded yet.
55 // In some cases there are 1x1 placeholder images.
56 width = 0;
57 height = 0;
58 } else {
59 imgSrc = imageElement.getSrc();
60 // As an ImageElement is manipulated here, it is possible
61 // to get the real dimensions.
62 width = imageElement.getWidth();
63 height = imageElement.getHeight();
64 }
49 } 65 }
50 66
67 if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
68 LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
69 }
51 return new WebImage(e, width, height, imgSrc); 70 return new WebImage(e, width, height, imgSrc);
52 } 71 }
53 } 72 }
OLDNEW
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698