Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(333)

Unified Diff: java/org/chromium/distiller/ContentExtractor.java

Issue 2203563002: Extract image URLs in srcset as well (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: format Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | java/org/chromium/distiller/DocumentTitleGetter.java » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: java/org/chromium/distiller/ContentExtractor.java
diff --git a/java/org/chromium/distiller/ContentExtractor.java b/java/org/chromium/distiller/ContentExtractor.java
index 7d16a774dd4360a4ce2015ed5296b466d386e969..e5858b6e875b972790e0c1d040772e07a2b6c01e 100644
--- a/java/org/chromium/distiller/ContentExtractor.java
+++ b/java/org/chromium/distiller/ContentExtractor.java
@@ -34,7 +34,7 @@ public class ContentExtractor {
private final TimingInfo mTimingInfo;
private final StatisticsInfo mStatisticsInfo;
private final MarkupParser parser;
- private final List<String> imageUrls;
+ private List<String> imageUrls;
private String textDirection;
private class WebDocumentInfo {
@@ -47,7 +47,6 @@ public class ContentExtractor {
candidateTitles = new LinkedList<String>();
mTimingInfo = TimingInfo.create();
mStatisticsInfo = StatisticsInfo.create();
- imageUrls = new ArrayList<String>();
double startTime = DomUtil.getTime();
parser = new MarkupParser(root, mTimingInfo);
@@ -96,16 +95,14 @@ public class ContentExtractor {
LeadImageFinder.process(documentInfo.document);
NestedElementRetainer.process(documentInfo.document);
- List<WebImage> images = documentInfo.document.getContentImages();
- for (WebImage wi : images) {
- imageUrls.add(wi.getSrc());
- }
mTimingInfo.setArticleProcessingTime(DomUtil.getTime() - now);
now = DomUtil.getTime();
String html = documentInfo.document.generateOutput(textOnly);
mTimingInfo.setFormattingTime(DomUtil.getTime() - now);
+ imageUrls = documentInfo.document.getImageUrls();
+
if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_TIMING_INFO)) {
for (int i = 0; i < mTimingInfo.getOtherTimesCount(); i++) {
TimingEntry entry = mTimingInfo.getOtherTimes(i);
« no previous file with comments | « no previous file | java/org/chromium/distiller/DocumentTitleGetter.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698