Chromium Code Reviews| Index: java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
| diff --git a/java/org/chromium/distiller/webdocument/filters/RelevantElements.java b/java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
| index 02ec74670b5dd6b98dd3767d0e40540326e41c48..d1dd9ba301928fe52f6adbf19e7d99dbbe28405a 100644 |
| --- a/java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
| +++ b/java/org/chromium/distiller/webdocument/filters/RelevantElements.java |
| @@ -4,10 +4,14 @@ |
| package org.chromium.distiller.webdocument.filters; |
| +import org.chromium.distiller.webdocument.PlaceHolder; |
| import org.chromium.distiller.webdocument.WebDocument; |
| import org.chromium.distiller.webdocument.WebElement; |
| import org.chromium.distiller.webdocument.WebText; |
| +import java.util.List; |
| +import java.util.Stack; |
| + |
| public class RelevantElements { |
| public static boolean process(WebDocument document) { |
| boolean changes = false; |
| @@ -25,6 +29,46 @@ public class RelevantElements { |
| } |
| } |
| } |
| + handlePlaceHolderElements(document.getElements()); |
| return changes; |
| } |
| + |
| + public static void handlePlaceHolderElements( |
|
wychen
2015/08/01 01:00:20
It makes sense to move the logic to a new file.
T
|
| + List<WebElement> elements) { |
| + class StackEntry { |
| + public StackEntry(WebElement start, boolean isContent) { |
| + this.start = start; |
| + this.isContent = isContent; |
| + } |
| + |
| + WebElement start; |
| + boolean isContent; |
| + } |
|
mdjones
2015/08/03 16:57:55
What if we use Set<PlaceHolder> and Stack<PlaceHol
dalmirdasilva
2015/08/03 17:13:18
Usually, we use stack when parsing such kinds of t
mdjones
2015/08/03 18:10:54
I don't, I think we should use both Set and Stack
|
| + boolean isContent = false; |
| + int stackMark = -1; |
| + Stack<StackEntry> holderStack = new Stack<>(); |
| + |
| + for (WebElement e : elements) { |
| + if (e instanceof WebText) { |
| + if (!isContent) { |
| + isContent = e.getIsContent(); |
| + } |
| + } else if (e instanceof PlaceHolder) { |
| + PlaceHolder ph = (PlaceHolder) e; |
| + if (ph.isStart()) { |
| + holderStack.push(new StackEntry(e, isContent)); |
| + isContent = false; |
| + } else { |
| + StackEntry stackEntry = holderStack.pop(); |
|
dalmirdasilva
2015/08/03 15:43:50
This might raise EmptyStackException if the HTML i
wychen
2015/08/04 02:37:00
I think Chrome fixes that for you when you access
|
| + boolean content = isContent || stackMark >= holderStack.size(); |
| + if (content) { |
| + stackMark = holderStack.size() - 1; |
| + } |
| + stackEntry.start.setIsContent(content); |
| + e.setIsContent(content); |
| + isContent = stackEntry.isContent; |
| + } |
| + } |
| + } |
| + } |
| } |