OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.extractors.embeds; | 5 package org.chromium.distiller.extractors.embeds; |
6 | 6 |
7 import com.google.gwt.dom.client.Element; | 7 import com.google.gwt.dom.client.Element; |
8 import com.google.gwt.dom.client.ImageElement; | 8 import com.google.gwt.dom.client.ImageElement; |
9 import org.chromium.distiller.LogUtil; | |
9 import org.chromium.distiller.webdocument.WebImage; | 10 import org.chromium.distiller.webdocument.WebImage; |
10 | 11 |
11 import java.util.HashSet; | 12 import java.util.HashSet; |
12 import java.util.Set; | 13 import java.util.Set; |
13 | 14 |
14 /** | 15 /** |
15 * This class treats images as another type of embed and provides heuristics for lead image | 16 * This class treats images as another type of embed and provides heuristics for lead image |
16 * candidacy. | 17 * candidacy. |
17 */ | 18 */ |
18 public class ImageExtractor implements EmbedExtractor { | 19 public class ImageExtractor implements EmbedExtractor { |
19 private static final Set<String> relevantTags = new HashSet<>(); | 20 private static final Set<String> relevantTags = new HashSet<>(); |
20 static { | 21 static { |
21 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. | 22 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. |
22 relevantTags.add("IMG"); | 23 relevantTags.add("IMG"); |
23 } | 24 } |
25 private static final String[] LAZY_IMAGE_ATTRIBUTES = {"data-src", "data-ori ginal", "datasrc", "data-url"}; | |
24 | 26 |
25 @Override | 27 @Override |
26 public Set<String> getRelevantTagNames() { | 28 public Set<String> getRelevantTagNames() { |
27 return relevantTags; | 29 return relevantTags; |
28 } | 30 } |
29 | 31 |
30 @Override | 32 @Override |
31 public WebImage extract(Element e) { | 33 public WebImage extract(Element e) { |
32 if (!relevantTags.contains(e.getTagName())) { | 34 if (!relevantTags.contains(e.getTagName())) { |
33 return null; | 35 return null; |
34 } | 36 } |
35 String imgSrc = ""; | 37 String imgSrc = ""; |
36 // Getting OffSetWidth/Height as default values, even they are | 38 // Getting OffSetWidth/Height as default values, even they are |
37 // affected by padding, border, etc. | 39 // affected by padding, border, etc. |
38 int width = e.getOffsetWidth(); | 40 int width = e.getOffsetWidth(); |
39 int height = e.getOffsetHeight(); | 41 int height = e.getOffsetHeight(); |
40 if ("IMG".equals(e.getTagName())) { | 42 if ("IMG".equals(e.getTagName())) { |
41 // This will get the absolute URL of the image and | 43 // This will get the absolute URL of the image and |
42 // the displayed image dimension. | 44 // the displayed image dimension. |
43 ImageElement imageElement = ImageElement.as(e); | 45 ImageElement imageElement = ImageElement.as(e); |
44 imgSrc = imageElement.getSrc(); | 46 // Try to get lazily-loaded images. |
mdjones
2016/06/03 23:43:38
Try to get lazily-loaded images before falling bac
wychen
2016/06/04 00:24:40
Done.
| |
45 // As an ImageElement is manipulated here, it is possible | 47 for(String attr: LAZY_IMAGE_ATTRIBUTES) { |
46 // to get the real dimensions. | 48 imgSrc = imageElement.getAttribute(attr); |
47 width = imageElement.getWidth(); | 49 if (!imgSrc.isEmpty()) |
48 height = imageElement.getHeight(); | 50 break; |
51 } | |
52 if (!imgSrc.isEmpty()) { | |
53 // We cannot trust the dimension if the image is not loaded yet. | |
54 // In some cases there are 1x1 placeholder images. | |
55 width = 0; | |
56 height = 0; | |
57 } else { | |
58 imgSrc = imageElement.getSrc(); | |
59 // As an ImageElement is manipulated here, it is possible | |
60 // to get the real dimensions. | |
61 width = imageElement.getWidth(); | |
62 height = imageElement.getHeight(); | |
63 } | |
49 } | 64 } |
50 | 65 |
66 if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { | |
67 LogUtil.logToConsole("Extracted WebImage: " + imgSrc); | |
68 } | |
51 return new WebImage(e, width, height, imgSrc); | 69 return new WebImage(e, width, height, imgSrc); |
52 } | 70 } |
53 } | 71 } |
OLD | NEW |