Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.webdocument.filters; | 5 package org.chromium.distiller.webdocument.filters; |
| 6 | 6 |
| 7 import org.chromium.distiller.webdocument.PlaceHolder; | |
| 7 import org.chromium.distiller.webdocument.WebDocument; | 8 import org.chromium.distiller.webdocument.WebDocument; |
| 8 import org.chromium.distiller.webdocument.WebElement; | 9 import org.chromium.distiller.webdocument.WebElement; |
| 9 import org.chromium.distiller.webdocument.WebText; | 10 import org.chromium.distiller.webdocument.WebText; |
| 10 | 11 |
| 12 import java.util.List; | |
| 13 import java.util.Stack; | |
| 14 | |
| 11 public class RelevantElements { | 15 public class RelevantElements { |
| 12 public static boolean process(WebDocument document) { | 16 public static boolean process(WebDocument document) { |
| 13 boolean changes = false; | 17 boolean changes = false; |
| 14 boolean inContent = false; | 18 boolean inContent = false; |
| 15 | 19 |
| 16 for (WebElement e : document.getElements()) { | 20 for (WebElement e : document.getElements()) { |
| 17 if (e.getIsContent()) { | 21 if (e.getIsContent()) { |
| 18 inContent = true; | 22 inContent = true; |
| 19 } else if (e instanceof WebText) { | 23 } else if (e instanceof WebText) { |
| 20 inContent = false; | 24 inContent = false; |
| 21 } else { | 25 } else { |
| 22 if (inContent) { | 26 if (inContent) { |
| 23 e.setIsContent(true); | 27 e.setIsContent(true); |
| 24 changes = true; | 28 changes = true; |
| 25 } | 29 } |
| 26 } | 30 } |
| 27 } | 31 } |
| 32 handlePlaceHolderElements(document.getElements()); | |
| 28 return changes; | 33 return changes; |
| 29 } | 34 } |
| 35 | |
| 36 public static void handlePlaceHolderElements( | |
|
wychen
2015/08/01 01:00:20
It makes sense to move the logic to a new file.
T
| |
| 37 List<WebElement> elements) { | |
| 38 class StackEntry { | |
| 39 public StackEntry(WebElement start, boolean isContent) { | |
| 40 this.start = start; | |
| 41 this.isContent = isContent; | |
| 42 } | |
| 43 | |
| 44 WebElement start; | |
| 45 boolean isContent; | |
| 46 } | |
|
mdjones
2015/08/03 16:57:55
What if we use Set<PlaceHolder> and Stack<PlaceHol
dalmirdasilva
2015/08/03 17:13:18
Usually, we use stack when parsing such kinds of t
mdjones
2015/08/03 18:10:54
I don't, I think we should use both Set and Stack
| |
| 47 boolean isContent = false; | |
| 48 int stackMark = -1; | |
| 49 Stack<StackEntry> holderStack = new Stack<>(); | |
| 50 | |
| 51 for (WebElement e : elements) { | |
| 52 if (e instanceof WebText) { | |
| 53 if (!isContent) { | |
| 54 isContent = e.getIsContent(); | |
| 55 } | |
| 56 } else if (e instanceof PlaceHolder) { | |
| 57 PlaceHolder ph = (PlaceHolder) e; | |
| 58 if (ph.isStart()) { | |
| 59 holderStack.push(new StackEntry(e, isContent)); | |
| 60 isContent = false; | |
| 61 } else { | |
| 62 StackEntry stackEntry = holderStack.pop(); | |
|
dalmirdasilva
2015/08/03 15:43:50
This might raise EmptyStackException if the HTML i
wychen
2015/08/04 02:37:00
I think Chrome fixes that for you when you access
| |
| 63 boolean content = isContent || stackMark >= holderStack.size (); | |
| 64 if (content) { | |
| 65 stackMark = holderStack.size() - 1; | |
| 66 } | |
| 67 stackEntry.start.setIsContent(content); | |
| 68 e.setIsContent(content); | |
| 69 isContent = stackEntry.isContent; | |
| 70 } | |
| 71 } | |
| 72 } | |
| 73 } | |
| 30 } | 74 } |
| OLD | NEW |