Index: java/org/chromium/distiller/ContentExtractor.java |
diff --git a/java/org/chromium/distiller/ContentExtractor.java b/java/org/chromium/distiller/ContentExtractor.java |
index b9f0b2da0eab6d305cb30548856946b32e971228..7d16dd87f500b846f87830dab7eed78d237e5eac 100644 |
--- a/java/org/chromium/distiller/ContentExtractor.java |
+++ b/java/org/chromium/distiller/ContentExtractor.java |
@@ -21,6 +21,7 @@ import com.google.gwt.dom.client.Document; |
import com.google.gwt.dom.client.Element; |
import com.google.gwt.dom.client.Node; |
import com.google.gwt.dom.client.NodeList; |
+import org.chromium.distiller.webdocument.filters.WebTagStructureKeeper; |
import java.util.ArrayList; |
import java.util.LinkedList; |
@@ -92,6 +93,7 @@ public class ContentExtractor { |
now = DomUtil.getTime(); |
processDocument(documentInfo.document); |
RelevantElements.process(documentInfo.document); |
+ WebTagStructureKeeper.process(documentInfo.document); |
mdjones
2015/08/03 23:29:45
This should probably be the last filter to run sin
|
LeadImageFinder.process(documentInfo.document); |
List<WebImage> images = documentInfo.document.getContentImages(); |