Index: java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java |
diff --git a/java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java b/java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java |
new file mode 100644 |
index 0000000000000000000000000000000000000000..3659765d34538a8699d1b4ea1d00aee66a6432ca |
--- /dev/null |
+++ b/java/org/chromium/distiller/webdocument/filters/WebTagStructureKeeper.java |
@@ -0,0 +1,40 @@ |
+package org.chromium.distiller.webdocument.filters; |
+ |
+import org.chromium.distiller.webdocument.WebDocument; |
+import org.chromium.distiller.webdocument.WebElement; |
+import org.chromium.distiller.webdocument.WebTag; |
+import org.chromium.distiller.webdocument.WebText; |
+ |
+import java.util.Stack; |
+ |
+public class WebTagStructureKeeper { |
mdjones
2015/08/03 23:29:45
How about NestedElement{Builder|Organizer|Retainer
|
+ public static void process(WebDocument document) { |
+ boolean isContent = false; |
+ int stackMark = -1; |
+ Stack<WebTag> stack = new Stack<>(); |
+ |
+ for (WebElement e : document.getElements()) { |
+ if (e instanceof WebText) { |
mdjones
2015/08/03 23:29:45
Though I'm not sure it is a common case, this does
|
+ if (!isContent) { |
+ isContent = e.getIsContent(); |
+ } |
+ } else if (e instanceof WebTag) { |
+ WebTag webTag = (WebTag) e; |
+ if (webTag.isStartTag()) { |
+ webTag.setIsContent(isContent); |
+ stack.push(webTag); |
+ isContent = false; |
+ } else { |
+ WebTag startWebTag = stack.pop(); |
+ boolean content = isContent || stackMark >= stack.size(); |
mdjones
2015/08/03 23:29:45
isContent |= stackMark >= stackSize();
Then just
|
+ if (content) { |
+ stackMark = stack.size() - 1; |
+ } |
+ startWebTag.setIsContent(content); |
+ webTag.setIsContent(content); |
+ isContent = startWebTag.getIsContent(); |
wychen
2015/08/04 02:37:01
Does this pass the test? Moving this line 2 lines
|
+ } |
+ } |
+ } |
+ } |
+} |