Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.extractors.embeds; | 5 package org.chromium.distiller.extractors.embeds; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Document; | 7 import com.google.gwt.dom.client.Document; |
| 8 import com.google.gwt.dom.client.Element; | 8 import com.google.gwt.dom.client.Element; |
| 9 import com.google.gwt.dom.client.ImageElement; | 9 import com.google.gwt.dom.client.ImageElement; |
| 10 import com.google.gwt.dom.client.NodeList; | 10 import com.google.gwt.dom.client.NodeList; |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 22 */ | 22 */ |
| 23 public class ImageExtractor implements EmbedExtractor { | 23 public class ImageExtractor implements EmbedExtractor { |
| 24 private static final Set<String> relevantTags = new HashSet<>(); | 24 private static final Set<String> relevantTags = new HashSet<>(); |
| 25 private String imgSrc; | 25 private String imgSrc; |
| 26 private int width; | 26 private int width; |
| 27 private int height; | 27 private int height; |
| 28 | 28 |
| 29 static { | 29 static { |
| 30 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. | 30 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap tions. |
| 31 relevantTags.add("IMG"); | 31 relevantTags.add("IMG"); |
| 32 relevantTags.add("PICTURE"); | |
| 32 relevantTags.add("FIGURE"); | 33 relevantTags.add("FIGURE"); |
| 33 } | 34 } |
| 34 | 35 |
| 35 private static final String[] LAZY_IMAGE_ATTRIBUTES = | 36 private static final String[] LAZY_IMAGE_ATTRIBUTES = |
| 36 {"data-src", "data-original", "datasrc", "data-url"}; | 37 {"data-src", "data-original", "datasrc", "data-url"}; |
| 37 | 38 |
| 38 @Override | 39 @Override |
| 39 public Set<String> getRelevantTagNames() { | 40 public Set<String> getRelevantTagNames() { |
| 40 return relevantTags; | 41 return relevantTags; |
| 41 } | 42 } |
| 42 | 43 |
| 43 @Override | 44 @Override |
| 44 public WebImage extract(Element e) { | 45 public WebImage extract(Element e) { |
| 45 if (!relevantTags.contains(e.getTagName())) { | 46 if (!relevantTags.contains(e.getTagName())) { |
| 46 return null; | 47 return null; |
| 47 } | 48 } |
| 48 imgSrc = ""; | 49 imgSrc = ""; |
| 49 | 50 |
| 50 if ("IMG".equals(e.getTagName())) { | 51 if ("IMG".equals(e.getTagName())) { |
| 51 extractImageAttributes(ImageElement.as(e)); | 52 extractImageAttributes(ImageElement.as(e)); |
| 52 return new WebImage(e, width, height, imgSrc); | 53 return new WebImage(e, width, height, imgSrc); |
| 54 } else if ("PICTURE".equals(e.getTagName())) { | |
|
mdjones
2017/01/17 17:41:46
How would you feel about having some sort of utili
wychen
2017/01/18 17:29:54
Done.
| |
| 55 return new WebImage(e, width, height, imgSrc); | |
|
mdjones
2017/01/17 17:41:47
nit: indented too far.
wychen
2017/01/18 17:29:54
Done.
| |
| 53 } else if ("FIGURE".equals(e.getTagName())) { | 56 } else if ("FIGURE".equals(e.getTagName())) { |
| 54 Element img = getFirstElementByTagName(e, "IMG"); | 57 Element img = getFirstElementByTagName(e, "PICTURE"); |
| 58 if (img == null) { | |
| 59 img = getFirstElementByTagName(e, "IMG"); | |
| 60 } | |
| 55 if (img != null) { | 61 if (img != null) { |
|
mdjones
2017/01/17 17:41:46
nit: early return instead of nesting?
wychen
2017/01/18 17:29:54
Done.
| |
| 56 extractImageAttributes(ImageElement.as(img)); | 62 if ("IMG".equals(img.getTagName())) { |
| 63 extractImageAttributes(ImageElement.as(img)); | |
| 64 } | |
| 57 Element figcaption; | 65 Element figcaption; |
| 58 Element cap = getFirstElementByTagName(e, "FIGCAPTION"); | 66 Element cap = getFirstElementByTagName(e, "FIGCAPTION"); |
| 59 if (cap != null) { | 67 if (cap != null) { |
| 60 // We look for links because some sites put non-caption | 68 // We look for links because some sites put non-caption |
| 61 // elements into <figcaption>. For example: image credit | 69 // elements into <figcaption>. For example: image credit |
| 62 // could contain a link. So we get the whole DOM structure w ithin | 70 // could contain a link. So we get the whole DOM structure w ithin |
| 63 // <figcaption> only when it contains links, otherwise we ge t the innerText. | 71 // <figcaption> only when it contains links, otherwise we ge t the innerText. |
| 64 figcaption = getFirstElementByTagName(cap, "A") != null ? | 72 figcaption = getFirstElementByTagName(cap, "A") != null ? |
| 65 cap : createFigcaptionElement(cap); | 73 cap : createFigcaptionElement(cap); |
| 66 } else { | 74 } else { |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 105 } | 113 } |
| 106 return null; | 114 return null; |
| 107 } | 115 } |
| 108 | 116 |
| 109 private Element createFigcaptionElement(Element element) { | 117 private Element createFigcaptionElement(Element element) { |
| 110 Element figcaption = Document.get().createElement("FIGCAPTION"); | 118 Element figcaption = Document.get().createElement("FIGCAPTION"); |
| 111 figcaption.setInnerText(DomUtil.getInnerText(element)); | 119 figcaption.setInnerText(DomUtil.getInnerText(element)); |
| 112 return figcaption; | 120 return figcaption; |
| 113 } | 121 } |
| 114 } | 122 } |
| OLD | NEW |