Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(508)

Unified Diff: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java

Issue 2638823002: Support <picture> in image extraction (Closed)
Patch Set: support lazy loading in <picture> Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « java/org/chromium/distiller/DomUtil.java ('k') | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
index 0064b9579eb32a3cf70f0907238d9ca1e18cee35..37a8d329ecb56b951ec3b3183165150c508ddb7e 100644
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
@@ -29,6 +29,7 @@ public class ImageExtractor implements EmbedExtractor {
static {
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions.
relevantTags.add("IMG");
+ relevantTags.add("PICTURE");
relevantTags.add("FIGURE");
}
@@ -47,29 +48,34 @@ public class ImageExtractor implements EmbedExtractor {
}
imgSrc = "";
- if ("IMG".equals(e.getTagName())) {
- extractImageAttributes(ImageElement.as(e));
- return new WebImage(e, width, height, imgSrc);
- } else if ("FIGURE".equals(e.getTagName())) {
- Element img = getFirstElementByTagName(e, "IMG");
- if (img != null) {
- extractImageAttributes(ImageElement.as(img));
- Element figcaption;
- Element cap = getFirstElementByTagName(e, "FIGCAPTION");
- if (cap != null) {
- // We look for links because some sites put non-caption
- // elements into <figcaption>. For example: image credit
- // could contain a link. So we get the whole DOM structure within
- // <figcaption> only when it contains links, otherwise we get the innerText.
- figcaption = getFirstElementByTagName(cap, "A") != null ?
- cap : createFigcaptionElement(cap);
- } else {
- figcaption = createFigcaptionElement(e);
- }
- return new WebFigure(img, width, height, imgSrc, figcaption);
+ ImageElement ie = ImageElement.as(DomUtil.getFirstElementByTagNameInc(e, "IMG"));
+
+ if ("FIGURE".equals(e.getTagName())) {
+ Element img = DomUtil.getFirstElementByTagName(e, "PICTURE");
+ if (img == null) {
+ img = DomUtil.getFirstElementByTagName(e, "IMG");
+ }
+ if (img == null) {
+ return null;
}
+ extractImageAttributes(ie);
+ Element figcaption;
+ Element cap = DomUtil.getFirstElementByTagName(e, "FIGCAPTION");
+ if (cap != null) {
+ // We look for links because some sites put non-caption
+ // elements into <figcaption>. For example: image credit
+ // could contain a link. So we get the whole DOM structure within
+ // <figcaption> only when it contains links, otherwise we get the innerText.
+ figcaption = DomUtil.getFirstElementByTagName(cap, "A") != null ?
+ cap : createFigcaptionElement(cap);
+ } else {
+ figcaption = createFigcaptionElement(e);
+ }
+ return new WebFigure(img, width, height, imgSrc, figcaption);
}
- return null;
+
+ extractImageAttributes(ie);
+ return new WebImage(e, width, height, imgSrc);
}
private void extractImageAttributes(ImageElement imageElement) {
@@ -98,14 +104,6 @@ public class ImageExtractor implements EmbedExtractor {
}
}
- private Element getFirstElementByTagName(Element e, String tagName) {
- NodeList<Element> elements = e.getElementsByTagName(tagName);
- if (elements.getLength() > 0) {
- return elements.getItem(0);
- }
- return null;
- }
-
private Element createFigcaptionElement(Element element) {
Element figcaption = Document.get().createElement("FIGCAPTION");
figcaption.setInnerText(DomUtil.getInnerText(element));
« no previous file with comments | « java/org/chromium/distiller/DomUtil.java ('k') | java/org/chromium/distiller/webdocument/WebImage.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698