Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: java/org/chromium/distiller/webdocument/filters/RelevantElements.java

Issue 1230583006: Fix for keeping lists structure (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Small code refactor for more appropriate names. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.webdocument.filters; 5 package org.chromium.distiller.webdocument.filters;
6 6
7 import org.chromium.distiller.webdocument.PlaceHolder;
7 import org.chromium.distiller.webdocument.WebDocument; 8 import org.chromium.distiller.webdocument.WebDocument;
8 import org.chromium.distiller.webdocument.WebElement; 9 import org.chromium.distiller.webdocument.WebElement;
9 import org.chromium.distiller.webdocument.WebText; 10 import org.chromium.distiller.webdocument.WebText;
10 11
12 import java.util.List;
13 import java.util.Stack;
14
11 public class RelevantElements { 15 public class RelevantElements {
12 public static boolean process(WebDocument document) { 16 public static boolean process(WebDocument document) {
13 boolean changes = false; 17 boolean changes = false;
14 boolean inContent = false; 18 boolean inContent = false;
15 19
16 for (WebElement e : document.getElements()) { 20 for (WebElement e : document.getElements()) {
17 if (e.getIsContent()) { 21 if (e.getIsContent()) {
18 inContent = true; 22 inContent = true;
19 } else if (e instanceof WebText) { 23 } else if (e instanceof WebText) {
20 inContent = false; 24 inContent = false;
21 } else { 25 } else {
22 if (inContent) { 26 if (inContent) {
23 e.setIsContent(true); 27 e.setIsContent(true);
24 changes = true; 28 changes = true;
25 } 29 }
26 } 30 }
27 } 31 }
32 handlePlaceHolderElements(document.getElements());
28 return changes; 33 return changes;
29 } 34 }
35
36 public static void handlePlaceHolderElements(
wychen 2015/08/01 01:00:20 It makes sense to move the logic to a new file. T
37 List<WebElement> elements) {
38 class StackEntry {
39 public StackEntry(WebElement start, boolean isContent) {
40 this.start = start;
41 this.isContent = isContent;
42 }
43
44 WebElement start;
45 boolean isContent;
46 }
mdjones 2015/08/03 16:57:55 What if we use Set<PlaceHolder> and Stack<PlaceHol
dalmirdasilva 2015/08/03 17:13:18 Usually, we use stack when parsing such kinds of t
mdjones 2015/08/03 18:10:54 I don't, I think we should use both Set and Stack
47 boolean isContent = false;
48 int stackMark = -1;
49 Stack<StackEntry> holderStack = new Stack<>();
50
51 for (WebElement e : elements) {
52 if (e instanceof WebText) {
53 if (!isContent) {
54 isContent = e.getIsContent();
55 }
56 } else if (e instanceof PlaceHolder) {
57 PlaceHolder ph = (PlaceHolder) e;
58 if (ph.isStart()) {
59 holderStack.push(new StackEntry(e, isContent));
60 isContent = false;
61 } else {
62 StackEntry stackEntry = holderStack.pop();
dalmirdasilva 2015/08/03 15:43:50 This might raise EmptyStackException if the HTML i
wychen 2015/08/04 02:37:00 I think Chrome fixes that for you when you access
63 boolean content = isContent || stackMark >= holderStack.size ();
64 if (content) {
65 stackMark = holderStack.size() - 1;
66 }
67 stackEntry.start.setIsContent(content);
68 e.setIsContent(content);
69 isContent = stackEntry.isContent;
70 }
71 }
72 }
73 }
30 } 74 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698