| OLD | NEW |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.extractors.embeds; | 5 package org.chromium.distiller.extractors.embeds; |
| 6 | 6 |
| 7 import com.google.gwt.dom.client.Element; | 7 import com.google.gwt.dom.client.Element; |
| 8 import com.google.gwt.dom.client.ImageElement; | 8 import com.google.gwt.dom.client.ImageElement; |
| 9 import org.chromium.distiller.LogUtil; |
| 9 import org.chromium.distiller.webdocument.WebImage; | 10 import org.chromium.distiller.webdocument.WebImage; |
| 10 | 11 |
| 11 import java.util.HashSet; | 12 import java.util.HashSet; |
| 12 import java.util.Set; | 13 import java.util.Set; |
| 13 | 14 |
| 14 /** | 15 /** |
| 15 * This class treats images as another type of embed and provides heuristics for
lead image | 16 * This class treats images as another type of embed and provides heuristics for
lead image |
| 16 * candidacy. | 17 * candidacy. |
| 17 */ | 18 */ |
| 18 public class ImageExtractor implements EmbedExtractor { | 19 public class ImageExtractor implements EmbedExtractor { |
| 19 private static final Set<String> relevantTags = new HashSet<>(); | 20 private static final Set<String> relevantTags = new HashSet<>(); |
| 20 static { | 21 static { |
| 21 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap
tions. | 22 // TODO(mdjones): Add "DIV" to this list for css images and possibly cap
tions. |
| 22 relevantTags.add("IMG"); | 23 relevantTags.add("IMG"); |
| 23 } | 24 } |
| 25 private static final String[] LAZY_IMAGE_ATTRIBUTES = |
| 26 {"data-src", "data-original", "datasrc", "data-url"}; |
| 24 | 27 |
| 25 @Override | 28 @Override |
| 26 public Set<String> getRelevantTagNames() { | 29 public Set<String> getRelevantTagNames() { |
| 27 return relevantTags; | 30 return relevantTags; |
| 28 } | 31 } |
| 29 | 32 |
| 30 @Override | 33 @Override |
| 31 public WebImage extract(Element e) { | 34 public WebImage extract(Element e) { |
| 32 if (!relevantTags.contains(e.getTagName())) { | 35 if (!relevantTags.contains(e.getTagName())) { |
| 33 return null; | 36 return null; |
| 34 } | 37 } |
| 35 String imgSrc = ""; | 38 String imgSrc = ""; |
| 36 // Getting OffSetWidth/Height as default values, even they are | 39 // Getting OffSetWidth/Height as default values, even they are |
| 37 // affected by padding, border, etc. | 40 // affected by padding, border, etc. |
| 38 int width = e.getOffsetWidth(); | 41 int width = e.getOffsetWidth(); |
| 39 int height = e.getOffsetHeight(); | 42 int height = e.getOffsetHeight(); |
| 40 if ("IMG".equals(e.getTagName())) { | 43 if ("IMG".equals(e.getTagName())) { |
| 41 // This will get the absolute URL of the image and | 44 // This will get the absolute URL of the image and |
| 42 // the displayed image dimension. | 45 // the displayed image dimension. |
| 43 ImageElement imageElement = ImageElement.as(e); | 46 ImageElement imageElement = ImageElement.as(e); |
| 44 imgSrc = imageElement.getSrc(); | 47 // Try to get lazily-loaded images before falling back to get the sr
c attribute. |
| 45 // As an ImageElement is manipulated here, it is possible | 48 for(String attr: LAZY_IMAGE_ATTRIBUTES) { |
| 46 // to get the real dimensions. | 49 imgSrc = imageElement.getAttribute(attr); |
| 47 width = imageElement.getWidth(); | 50 if (!imgSrc.isEmpty()) |
| 48 height = imageElement.getHeight(); | 51 break; |
| 52 } |
| 53 if (!imgSrc.isEmpty()) { |
| 54 // We cannot trust the dimension if the image is not loaded yet. |
| 55 // In some cases there are 1x1 placeholder images. |
| 56 width = 0; |
| 57 height = 0; |
| 58 } else { |
| 59 imgSrc = imageElement.getSrc(); |
| 60 // As an ImageElement is manipulated here, it is possible |
| 61 // to get the real dimensions. |
| 62 width = imageElement.getWidth(); |
| 63 height = imageElement.getHeight(); |
| 64 } |
| 49 } | 65 } |
| 50 | 66 |
| 67 if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) { |
| 68 LogUtil.logToConsole("Extracted WebImage: " + imgSrc); |
| 69 } |
| 51 return new WebImage(e, width, height, imgSrc); | 70 return new WebImage(e, width, height, imgSrc); |
| 52 } | 71 } |
| 53 } | 72 } |
| OLD | NEW |