OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.extractors.embeds; | 5 package org.chromium.distiller.extractors.embeds; |
6 | 6 |
7 import com.google.gwt.dom.client.Element; | 7 import com.google.gwt.dom.client.Element; |
8 import com.google.gwt.dom.client.ImageElement; | 8 import com.google.gwt.dom.client.ImageElement; |
9 import com.google.gwt.dom.client.NodeList; | |
10 import org.chromium.distiller.webdocument.WebFigure; | |
wychen
2016/05/31 21:28:04
Forget to upload this one?
| |
9 import org.chromium.distiller.webdocument.WebImage; | 11 import org.chromium.distiller.webdocument.WebImage; |
10 | 12 |
11 import java.util.HashSet; | 13 import java.util.HashSet; |
12 import java.util.Set; | 14 import java.util.Set; |
13 | 15 |
14 /** | 16 /** |
15 * This class treats images as another type of embed and provides heuristics for lead image | 17 * This class treats images as another type of embed and provides heuristics for lead image |
16 * candidacy. | 18 * candidacy. |
17 */ | 19 */ |
18 public class ImageExtractor implements EmbedExtractor { | 20 public class ImageExtractor implements EmbedExtractor { |
19 private static final Set<String> relevantTags = new HashSet<>(); | 21 private static final Set<String> relevantTags = new HashSet<>(); |
22 private String src; | |
23 private String caption; | |
24 private int width; | |
25 private int height; | |
26 | |
20 static { | 27 static { |
21 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. | 28 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. |
22 relevantTags.add("IMG"); | 29 relevantTags.add("IMG"); |
30 relevantTags.add("FIGURE"); | |
23 } | 31 } |
24 | 32 |
25 @Override | 33 @Override |
26 public Set<String> getRelevantTagNames() { | 34 public Set<String> getRelevantTagNames() { |
27 return relevantTags; | 35 return relevantTags; |
28 } | 36 } |
29 | 37 |
30 @Override | 38 @Override |
31 public WebImage extract(Element e) { | 39 public WebImage extract(Element e) { |
32 if (!relevantTags.contains(e.getTagName())) { | 40 if (!relevantTags.contains(e.getTagName())) { |
33 return null; | 41 return null; |
34 } | 42 } |
35 String imgSrc = ""; | |
36 // Getting OffSetWidth/Height as default values, even they are | 43 // Getting OffSetWidth/Height as default values, even they are |
37 // affected by padding, border, etc. | 44 // affected by padding, border, etc. |
38 int width = e.getOffsetWidth(); | 45 width = e.getOffsetWidth(); |
39 int height = e.getOffsetHeight(); | 46 height = e.getOffsetHeight(); |
47 src = ""; | |
48 caption = ""; | |
49 | |
40 if ("IMG".equals(e.getTagName())) { | 50 if ("IMG".equals(e.getTagName())) { |
41 // This will get the absolute URL of the image and | 51 extractImageAttributes(ImageElement.as(e)); |
42 // the displayed image dimension. | 52 } else if ("FIGURE".equals(e.getTagName())) { |
43 ImageElement imageElement = ImageElement.as(e); | 53 Element img = getElementByTagName(e, "IMG"); |
44 imgSrc = imageElement.getSrc(); | 54 if (img != null) { |
wychen
2016/05/31 21:28:04
If a malformed <figure> contains no <img>s, an emp
marcelorcorrea
2016/05/31 21:46:44
Done.
| |
45 // As an ImageElement is manipulated here, it is possible | 55 extractImageAttributes(ImageElement.as(img)); |
46 // to get the real dimensions. | 56 Element cap = getElementByTagName(e, "FIGCAPTION"); |
47 width = imageElement.getWidth(); | 57 if (cap != null) { |
48 height = imageElement.getHeight(); | 58 caption = cap.getInnerText(); |
59 } | |
60 return new WebFigure(img, width, height, src, caption); | |
61 } | |
49 } | 62 } |
63 return new WebImage(e, width, height, src); | |
64 } | |
50 | 65 |
51 return new WebImage(e, width, height, imgSrc); | 66 private void extractImageAttributes(ImageElement img) { |
67 src = img.getSrc(); | |
68 width = img.getWidth(); | |
69 height = img.getHeight(); | |
70 } | |
71 | |
72 private Element getElementByTagName(Element e, String tagName) { | |
wychen
2016/05/31 21:28:04
getFirstElementByTagName?
marcelorcorrea
2016/05/31 21:46:44
Done.
| |
73 NodeList<Element> elements = e.getElementsByTagName(tagName); | |
74 if (elements.getLength() > 0) { | |
75 return elements.getItem(0); | |
76 } | |
77 return null; | |
52 } | 78 } |
53 } | 79 } |
OLD | NEW |