Chromium Code Reviews| Index: java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java |
| diff --git a/java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java b/java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..3659765d34538a8699d1b4ea1d00aee66a6432ca |
| --- /dev/null |
| +++ b/java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java |
| @@ -0,0 +1,40 @@ |
| +package org.chromium.distiller.webdocument.filters; |
| + |
| +import org.chromium.distiller.webdocument.WebDocument; |
| +import org.chromium.distiller.webdocument.WebElement; |
| +import org.chromium.distiller.webdocument.WebTag; |
| +import org.chromium.distiller.webdocument.WebText; |
| + |
| +import java.util.Stack; |
| + |
| +public class WebTagStructureKeeper { |
|
mdjones
2015/08/03 23:29:45
How about NestedElement{Builder|Organizer|Retainer
|
| + public static void process(WebDocument document) { |
| + boolean isContent = false; |
| + int stackMark = -1; |
| + Stack<WebTag> stack = new Stack<>(); |
| + |
| + for (WebElement e : document.getElements()) { |
| + if (e instanceof WebText) { |
|
mdjones
2015/08/03 23:29:45
Though I'm not sure it is a common case, this does
|
| + if (!isContent) { |
| + isContent = e.getIsContent(); |
| + } |
| + } else if (e instanceof WebTag) { |
| + WebTag webTag = (WebTag) e; |
| + if (webTag.isStartTag()) { |
| + webTag.setIsContent(isContent); |
| + stack.push(webTag); |
| + isContent = false; |
| + } else { |
| + WebTag startWebTag = stack.pop(); |
| + boolean content = isContent || stackMark >= stack.size(); |
|
mdjones
2015/08/03 23:29:45
isContent |= stackMark >= stackSize();
Then just
|
| + if (content) { |
| + stackMark = stack.size() - 1; |
| + } |
| + startWebTag.setIsContent(content); |
| + webTag.setIsContent(content); |
| + isContent = startWebTag.getIsContent(); |
|
wychen
2015/08/04 02:37:01
Does this pass the test? Moving this line 2 lines
|
| + } |
| + } |
| + } |
| + } |
| +} |