Index: java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
diff --git a/java/org/chromium/distiller/webdocument/filters/RelevantElements.java b/java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
index 02ec74670b5dd6b98dd3767d0e40540326e41c48..d1dd9ba301928fe52f6adbf19e7d99dbbe28405a 100644 |
--- a/java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
+++ b/java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
@@ -4,10 +4,14 @@ |
package org.chromium.distiller.webdocument.filters; |
+import org.chromium.distiller.webdocument.PlaceHolder; |
import org.chromium.distiller.webdocument.WebDocument; |
import org.chromium.distiller.webdocument.WebElement; |
import org.chromium.distiller.webdocument.WebText; |
+import java.util.List; |
+import java.util.Stack; |
+ |
public class RelevantElements { |
public static boolean process(WebDocument document) { |
boolean changes = false; |
@@ -25,6 +29,46 @@ public class RelevantElements { |
} |
} |
} |
+ handlePlaceHolderElements(document.getElements()); |
return changes; |
} |
+ |
+ public static void handlePlaceHolderElements( |
wychen
2015/08/01 01:00:20
It makes sense to move the logic to a new file.
T
|
+ List<WebElement> elements) { |
+ class StackEntry { |
+ public StackEntry(WebElement start, boolean isContent) { |
+ this.start = start; |
+ this.isContent = isContent; |
+ } |
+ |
+ WebElement start; |
+ boolean isContent; |
+ } |
mdjones
2015/08/03 16:57:55
What if we use Set<PlaceHolder> and Stack<PlaceHol
dalmirdasilva
2015/08/03 17:13:18
Usually, we use stack when parsing such kinds of t
mdjones
2015/08/03 18:10:54
I don't, I think we should use both Set and Stack
|
+ boolean isContent = false; |
+ int stackMark = -1; |
+ Stack<StackEntry> holderStack = new Stack<>(); |
+ |
+ for (WebElement e : elements) { |
+ if (e instanceof WebText) { |
+ if (!isContent) { |
+ isContent = e.getIsContent(); |
+ } |
+ } else if (e instanceof PlaceHolder) { |
+ PlaceHolder ph = (PlaceHolder) e; |
+ if (ph.isStart()) { |
+ holderStack.push(new StackEntry(e, isContent)); |
+ isContent = false; |
+ } else { |
+ StackEntry stackEntry = holderStack.pop(); |
dalmirdasilva
2015/08/03 15:43:50
This might raise EmptyStackException if the HTML i
wychen
2015/08/04 02:37:00
I think Chrome fixes that for you when you access
|
+ boolean content = isContent || stackMark >= holderStack.size(); |
+ if (content) { |
+ stackMark = holderStack.size() - 1; |
+ } |
+ stackEntry.start.setIsContent(content); |
+ e.setIsContent(content); |
+ isContent = stackEntry.isContent; |
+ } |
+ } |
+ } |
+ } |
} |