Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(129)

Side by Side Diff: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java

Issue 2000093005: Support extraction of lazily-loaded images (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.extractors.embeds; 5 package org.chromium.distiller.extractors.embeds;
6 6
7 import com.google.gwt.dom.client.Element; 7 import com.google.gwt.dom.client.Element;
8 import com.google.gwt.dom.client.ImageElement; 8 import com.google.gwt.dom.client.ImageElement;
9 import org.chromium.distiller.LogUtil;
9 import org.chromium.distiller.webdocument.WebImage; 10 import org.chromium.distiller.webdocument.WebImage;
10 11
11 import java.util.HashSet; 12 import java.util.HashSet;
12 import java.util.Set; 13 import java.util.Set;
13 14
14 /** 15 /**
15 * This class treats images as another type of embed and provides heuristics for lead image 16 * This class treats images as another type of embed and provides heuristics for lead image
16 * candidacy. 17 * candidacy.
17 */ 18 */
18 public class ImageExtractor implements EmbedExtractor { 19 public class ImageExtractor implements EmbedExtractor {
19 private static final Set<String> relevantTags = new HashSet<>(); 20 private static final Set<String> relevantTags = new HashSet<>();
20 static { 21 static {
21 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. 22 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions.
22 relevantTags.add("IMG"); 23 relevantTags.add("IMG");
23 } 24 }
25 private static final String[] LAZY_IMAGE_ATTRIBUTES = {"data-src", "data-ori ginal", "datasrc", "data-url"};
24 26
25 @Override 27 @Override
26 public Set<String> getRelevantTagNames() { 28 public Set<String> getRelevantTagNames() {
27 return relevantTags; 29 return relevantTags;
28 } 30 }
29 31
30 @Override 32 @Override
31 public WebImage extract(Element e) { 33 public WebImage extract(Element e) {
32 if (!relevantTags.contains(e.getTagName())) { 34 if (!relevantTags.contains(e.getTagName())) {
33 return null; 35 return null;
34 } 36 }
35 String imgSrc = ""; 37 String imgSrc = "";
36 // Getting OffSetWidth/Height as default values, even they are 38 // Getting OffSetWidth/Height as default values, even they are
37 // affected by padding, border, etc. 39 // affected by padding, border, etc.
38 int width = e.getOffsetWidth(); 40 int width = e.getOffsetWidth();
39 int height = e.getOffsetHeight(); 41 int height = e.getOffsetHeight();
40 if ("IMG".equals(e.getTagName())) { 42 if ("IMG".equals(e.getTagName())) {
41 // This will get the absolute URL of the image and 43 // This will get the absolute URL of the image and
42 // the displayed image dimension. 44 // the displayed image dimension.
43 ImageElement imageElement = ImageElement.as(e); 45 ImageElement imageElement = ImageElement.as(e);
44 imgSrc = imageElement.getSrc(); 46 // Try to get lazily-loaded images.
mdjones 2016/06/03 23:43:38 Try to get lazily-loaded images before falling bac
wychen 2016/06/04 00:24:40 Done.
45 // As an ImageElement is manipulated here, it is possible 47 for(String attr: LAZY_IMAGE_ATTRIBUTES) {
46 // to get the real dimensions. 48 imgSrc = imageElement.getAttribute(attr);
47 width = imageElement.getWidth(); 49 if (!imgSrc.isEmpty())
48 height = imageElement.getHeight(); 50 break;
51 }
52 if (!imgSrc.isEmpty()) {
53 // We cannot trust the dimension if the image is not loaded yet.
54 // In some cases there are 1x1 placeholder images.
55 width = 0;
56 height = 0;
57 } else {
58 imgSrc = imageElement.getSrc();
59 // As an ImageElement is manipulated here, it is possible
60 // to get the real dimensions.
61 width = imageElement.getWidth();
62 height = imageElement.getHeight();
63 }
49 } 64 }
50 65
66 if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
67 LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
68 }
51 return new WebImage(e, width, height, imgSrc); 69 return new WebImage(e, width, height, imgSrc);
52 } 70 }
53 } 71 }
OLDNEW
« no previous file with comments | « no previous file | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698