Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(63)

Side by Side Diff: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java

Issue 2020403002: Add support for figure element (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.extractors.embeds; 5 package org.chromium.distiller.extractors.embeds;
6 6
7 import com.google.gwt.dom.client.Element; 7 import com.google.gwt.dom.client.Element;
8 import com.google.gwt.dom.client.ImageElement; 8 import com.google.gwt.dom.client.ImageElement;
9 import com.google.gwt.dom.client.NodeList;
10 import org.chromium.distiller.webdocument.WebFigure;
wychen 2016/05/31 21:28:04 Forget to upload this one?
9 import org.chromium.distiller.webdocument.WebImage; 11 import org.chromium.distiller.webdocument.WebImage;
10 12
11 import java.util.HashSet; 13 import java.util.HashSet;
12 import java.util.Set; 14 import java.util.Set;
13 15
14 /** 16 /**
15 * This class treats images as another type of embed and provides heuristics for lead image 17 * This class treats images as another type of embed and provides heuristics for lead image
16 * candidacy. 18 * candidacy.
17 */ 19 */
18 public class ImageExtractor implements EmbedExtractor { 20 public class ImageExtractor implements EmbedExtractor {
19 private static final Set<String> relevantTags = new HashSet<>(); 21 private static final Set<String> relevantTags = new HashSet<>();
22 private String src;
23 private String caption;
24 private int width;
25 private int height;
26
20 static { 27 static {
21 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. 28 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions.
22 relevantTags.add("IMG"); 29 relevantTags.add("IMG");
30 relevantTags.add("FIGURE");
23 } 31 }
24 32
25 @Override 33 @Override
26 public Set<String> getRelevantTagNames() { 34 public Set<String> getRelevantTagNames() {
27 return relevantTags; 35 return relevantTags;
28 } 36 }
29 37
30 @Override 38 @Override
31 public WebImage extract(Element e) { 39 public WebImage extract(Element e) {
32 if (!relevantTags.contains(e.getTagName())) { 40 if (!relevantTags.contains(e.getTagName())) {
33 return null; 41 return null;
34 } 42 }
35 String imgSrc = "";
36 // Getting OffSetWidth/Height as default values, even they are 43 // Getting OffSetWidth/Height as default values, even they are
37 // affected by padding, border, etc. 44 // affected by padding, border, etc.
38 int width = e.getOffsetWidth(); 45 width = e.getOffsetWidth();
39 int height = e.getOffsetHeight(); 46 height = e.getOffsetHeight();
47 src = "";
48 caption = "";
49
40 if ("IMG".equals(e.getTagName())) { 50 if ("IMG".equals(e.getTagName())) {
41 // This will get the absolute URL of the image and 51 extractImageAttributes(ImageElement.as(e));
42 // the displayed image dimension. 52 } else if ("FIGURE".equals(e.getTagName())) {
43 ImageElement imageElement = ImageElement.as(e); 53 Element img = getElementByTagName(e, "IMG");
44 imgSrc = imageElement.getSrc(); 54 if (img != null) {
wychen 2016/05/31 21:28:04 If a malformed <figure> contains no <img>s, an emp
marcelorcorrea 2016/05/31 21:46:44 Done.
45 // As an ImageElement is manipulated here, it is possible 55 extractImageAttributes(ImageElement.as(img));
46 // to get the real dimensions. 56 Element cap = getElementByTagName(e, "FIGCAPTION");
47 width = imageElement.getWidth(); 57 if (cap != null) {
48 height = imageElement.getHeight(); 58 caption = cap.getInnerText();
59 }
60 return new WebFigure(img, width, height, src, caption);
61 }
49 } 62 }
63 return new WebImage(e, width, height, src);
64 }
50 65
51 return new WebImage(e, width, height, imgSrc); 66 private void extractImageAttributes(ImageElement img) {
67 src = img.getSrc();
68 width = img.getWidth();
69 height = img.getHeight();
70 }
71
72 private Element getElementByTagName(Element e, String tagName) {
wychen 2016/05/31 21:28:04 getFirstElementByTagName?
marcelorcorrea 2016/05/31 21:46:44 Done.
73 NodeList<Element> elements = e.getElementsByTagName(tagName);
74 if (elements.getLength() > 0) {
75 return elements.getItem(0);
76 }
77 return null;
52 } 78 }
53 } 79 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698