OLD | NEW |
---|---|
1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.webdocument.filters; | 5 package org.chromium.distiller.webdocument.filters; |
6 | 6 |
7 import org.chromium.distiller.webdocument.PlaceHolder; | |
7 import org.chromium.distiller.webdocument.WebDocument; | 8 import org.chromium.distiller.webdocument.WebDocument; |
8 import org.chromium.distiller.webdocument.WebElement; | 9 import org.chromium.distiller.webdocument.WebElement; |
9 import org.chromium.distiller.webdocument.WebText; | 10 import org.chromium.distiller.webdocument.WebText; |
10 | 11 |
12 import java.util.List; | |
13 import java.util.Stack; | |
14 | |
11 public class RelevantElements { | 15 public class RelevantElements { |
12 public static boolean process(WebDocument document) { | 16 public static boolean process(WebDocument document) { |
13 boolean changes = false; | 17 boolean changes = false; |
14 boolean inContent = false; | 18 boolean inContent = false; |
15 | 19 |
16 for (WebElement e : document.getElements()) { | 20 for (WebElement e : document.getElements()) { |
17 if (e.getIsContent()) { | 21 if (e.getIsContent()) { |
18 inContent = true; | 22 inContent = true; |
19 } else if (e instanceof WebText) { | 23 } else if (e instanceof WebText) { |
20 inContent = false; | 24 inContent = false; |
21 } else { | 25 } else { |
22 if (inContent) { | 26 if (inContent) { |
23 e.setIsContent(true); | 27 e.setIsContent(true); |
24 changes = true; | 28 changes = true; |
25 } | 29 } |
26 } | 30 } |
27 } | 31 } |
32 handlePlaceHolderElements(document.getElements()); | |
28 return changes; | 33 return changes; |
29 } | 34 } |
35 | |
36 public static void handlePlaceHolderElements( | |
wychen
2015/08/01 01:00:20
It makes sense to move the logic to a new file.
T
| |
37 List<WebElement> elements) { | |
38 class StackEntry { | |
39 public StackEntry(WebElement start, boolean isContent) { | |
40 this.start = start; | |
41 this.isContent = isContent; | |
42 } | |
43 | |
44 WebElement start; | |
45 boolean isContent; | |
46 } | |
mdjones
2015/08/03 16:57:55
What if we use Set<PlaceHolder> and Stack<PlaceHol
dalmirdasilva
2015/08/03 17:13:18
Usually, we use stack when parsing such kinds of t
mdjones
2015/08/03 18:10:54
I don't, I think we should use both Set and Stack
| |
47 boolean isContent = false; | |
48 int stackMark = -1; | |
49 Stack<StackEntry> holderStack = new Stack<>(); | |
50 | |
51 for (WebElement e : elements) { | |
52 if (e instanceof WebText) { | |
53 if (!isContent) { | |
54 isContent = e.getIsContent(); | |
55 } | |
56 } else if (e instanceof PlaceHolder) { | |
57 PlaceHolder ph = (PlaceHolder) e; | |
58 if (ph.isStart()) { | |
59 holderStack.push(new StackEntry(e, isContent)); | |
60 isContent = false; | |
61 } else { | |
62 StackEntry stackEntry = holderStack.pop(); | |
dalmirdasilva
2015/08/03 15:43:50
This might raise EmptyStackException if the HTML i
wychen
2015/08/04 02:37:00
I think Chrome fixes that for you when you access
| |
63 boolean content = isContent || stackMark >= holderStack.size (); | |
64 if (content) { | |
65 stackMark = holderStack.size() - 1; | |
66 } | |
67 stackEntry.start.setIsContent(content); | |
68 e.setIsContent(content); | |
69 isContent = stackEntry.isContent; | |
70 } | |
71 } | |
72 } | |
73 } | |
30 } | 74 } |
OLD | NEW |