OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.extractors.embeds; | 5 package org.chromium.distiller.extractors.embeds; |
6 | 6 |
7 import com.google.gwt.dom.client.Element; | 7 import com.google.gwt.dom.client.Element; |
8 import com.google.gwt.dom.client.ImageElement; | 8 import com.google.gwt.dom.client.ImageElement; |
9 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
10 import com.google.gwt.dom.client.NodeList; | |
wychen
2016/06/07 16:27:20
import should be sorted.
marcelorcorrea
2016/06/07 17:02:25
Done.
| |
11 import org.chromium.distiller.webdocument.WebFigure; | |
10 import org.chromium.distiller.webdocument.WebImage; | 12 import org.chromium.distiller.webdocument.WebImage; |
11 | 13 |
12 import java.util.HashSet; | 14 import java.util.HashSet; |
13 import java.util.Set; | 15 import java.util.Set; |
14 | 16 |
15 /** | 17 /** |
16 * This class treats images as another type of embed and provides heuristics for lead image | 18 * This class treats images as another type of embed and provides heuristics for lead image |
17 * candidacy. | 19 * candidacy. |
18 */ | 20 */ |
19 public class ImageExtractor implements EmbedExtractor { | 21 public class ImageExtractor implements EmbedExtractor { |
20 private static final Set<String> relevantTags = new HashSet<>(); | 22 private static final Set<String> relevantTags = new HashSet<>(); |
23 private String imgSrc; | |
24 private int width; | |
25 private int height; | |
26 | |
21 static { | 27 static { |
22 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. | 28 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. |
23 relevantTags.add("IMG"); | 29 relevantTags.add("IMG"); |
30 relevantTags.add("FIGURE"); | |
24 } | 31 } |
32 | |
25 private static final String[] LAZY_IMAGE_ATTRIBUTES = | 33 private static final String[] LAZY_IMAGE_ATTRIBUTES = |
26 {"data-src", "data-original", "datasrc", "data-url"}; | 34 {"data-src", "data-original", "datasrc", "data-url"}; |
27 | 35 |
28 @Override | 36 @Override |
29 public Set<String> getRelevantTagNames() { | 37 public Set<String> getRelevantTagNames() { |
30 return relevantTags; | 38 return relevantTags; |
31 } | 39 } |
32 | 40 |
33 @Override | 41 @Override |
34 public WebImage extract(Element e) { | 42 public WebImage extract(Element e) { |
35 if (!relevantTags.contains(e.getTagName())) { | 43 if (!relevantTags.contains(e.getTagName())) { |
36 return null; | 44 return null; |
37 } | 45 } |
38 String imgSrc = ""; | 46 |
39 // Getting OffSetWidth/Height as default values, even they are | |
40 // affected by padding, border, etc. | |
41 int width = e.getOffsetWidth(); | |
42 int height = e.getOffsetHeight(); | |
43 if ("IMG".equals(e.getTagName())) { | 47 if ("IMG".equals(e.getTagName())) { |
44 // This will get the absolute URL of the image and | 48 // This will get the absolute URL of the image and |
45 // the displayed image dimension. | 49 // the displayed image dimension. |
46 ImageElement imageElement = ImageElement.as(e); | 50 ImageElement imageElement = ImageElement.as(e); |
47 // Try to get lazily-loaded images before falling back to get the sr c attribute. | 51 // Try to get lazily-loaded images before falling back to get the sr c attribute. |
48 for(String attr: LAZY_IMAGE_ATTRIBUTES) { | 52 for (String attr : LAZY_IMAGE_ATTRIBUTES) { |
49 imgSrc = imageElement.getAttribute(attr); | 53 imgSrc = imageElement.getAttribute(attr); |
50 if (!imgSrc.isEmpty()) | 54 if (!imgSrc.isEmpty()) |
51 break; | 55 break; |
52 } | 56 } |
53 if (!imgSrc.isEmpty()) { | 57 if (!imgSrc.isEmpty()) { |
54 // We cannot trust the dimension if the image is not loaded yet. | 58 // We cannot trust the dimension if the image is not loaded yet. |
55 // In some cases there are 1x1 placeholder images. | 59 // In some cases there are 1x1 placeholder images. |
56 width = 0; | 60 width = 0; |
57 height = 0; | 61 height = 0; |
58 } else { | 62 } else { |
59 imgSrc = imageElement.getSrc(); | 63 imgSrc = imageElement.getSrc(); |
60 // As an ImageElement is manipulated here, it is possible | 64 // As an ImageElement is manipulated here, it is possible |
61 // to get the real dimensions. | 65 // to get the real dimensions. |
62 width = imageElement.getWidth(); | 66 width = imageElement.getWidth(); |
63 height = imageElement.getHeight(); | 67 height = imageElement.getHeight(); |
68 extractImageAttributes(ImageElement.as(e)); | |
69 } | |
70 if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { | |
71 LogUtil.logToConsole("Extracted WebImage: " + imgSrc); | |
72 } | |
73 return new WebImage(e, width, height, imgSrc); | |
74 | |
75 } else if ("FIGURE".equals(e.getTagName())) { | |
76 Element img = getFirstElementByTagName(e, "IMG"); | |
77 if (img != null) { | |
78 String caption = ""; | |
79 extractImageAttributes(ImageElement.as(img)); | |
80 Element cap = getFirstElementByTagName(e, "FIGCAPTION"); | |
81 if (cap != null) { | |
82 caption = cap.getInnerText(); | |
83 } | |
84 return new WebFigure(img, width, height, imgSrc, caption); | |
64 } | 85 } |
65 } | 86 } |
87 return null; | |
88 } | |
66 | 89 |
67 if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { | 90 private void extractImageAttributes(ImageElement img) { |
wychen
2016/06/07 16:27:20
Let's support lazily-loaded images in <figure> as
marcelorcorrea
2016/06/07 17:02:25
Done.
| |
68 LogUtil.logToConsole("Extracted WebImage: " + imgSrc); | 91 imgSrc = img.getSrc(); |
92 width = img.getWidth(); | |
93 height = img.getHeight(); | |
94 } | |
95 | |
96 private Element getFirstElementByTagName(Element e, String tagName) { | |
97 NodeList<Element> elements = e.getElementsByTagName(tagName); | |
98 if (elements.getLength() > 0) { | |
99 return elements.getItem(0); | |
69 } | 100 } |
70 return new WebImage(e, width, height, imgSrc); | 101 return null; |
71 } | 102 } |
72 } | 103 } |
OLD | NEW |