OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 /** | 5 /** |
6 * boilerpipe | 6 * boilerpipe |
7 * | 7 * |
8 * Copyright (c) 2009 Christian Kohlschütter | 8 * Copyright (c) 2009 Christian Kohlschütter |
9 * | 9 * |
10 * The author licenses this file to You under the Apache License, Version 2.0 | 10 * The author licenses this file to You under the Apache License, Version 2.0 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 public void startElement(Element element) { | 53 public void startElement(Element element) { |
54 ElementAction a = ElementAction.getForElement(element); | 54 ElementAction a = ElementAction.getForElement(element); |
55 actionStack.push(a); | 55 actionStack.push(a); |
56 | 56 |
57 if (a.changesTagLevel) { | 57 if (a.changesTagLevel) { |
58 tagLevel++; | 58 tagLevel++; |
59 } | 59 } |
60 | 60 |
61 if (a.isAnchor) { | 61 if (a.isAnchor) { |
62 enterAnchor(); | 62 enterAnchor(); |
| 63 } else if (a.isList) { |
| 64 enterList(); |
63 } | 65 } |
64 | 66 |
65 flush |= a.flush; | 67 flush |= a.flush; |
66 } | 68 } |
67 | 69 |
68 @Override | 70 @Override |
69 public void endElement() { | 71 public void endElement() { |
70 ElementAction a = actionStack.peek(); | 72 ElementAction a = actionStack.peek(); |
71 | 73 |
72 if (a.changesTagLevel) { | 74 if (a.changesTagLevel) { |
73 tagLevel--; | 75 tagLevel--; |
74 } | 76 } |
75 | 77 |
76 if (flush || a.flush) { | 78 if (flush || a.flush) { |
77 flushBlock(groupNumber); | 79 flushBlock(groupNumber); |
78 groupNumber++; | 80 groupNumber++; |
79 } | 81 } |
80 | 82 |
81 if (a.isAnchor) { | 83 if (a.isAnchor) { |
82 exitAnchor(); | 84 exitAnchor(); |
| 85 } else if (a.isList) { |
| 86 exitList(); |
83 } | 87 } |
84 | 88 |
85 // Must be done after flushBlock() because the labels for the block come
from the | 89 // Must be done after flushBlock() because the labels for the block come
from the |
86 // actionStack. | 90 // actionStack. |
87 actionStack.pop(); | 91 actionStack.pop(); |
88 } | 92 } |
89 | 93 |
90 @Override | 94 @Override |
91 public void textNode(Text textNode) { | 95 public void textNode(Text textNode) { |
92 if (flush) { | 96 if (flush) { |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
139 | 143 |
140 protected void addWebText(final WebText tb) { | 144 protected void addWebText(final WebText tb) { |
141 for (ElementAction a : actionStack) { | 145 for (ElementAction a : actionStack) { |
142 for (int i = 0; i < a.labels.length(); i++) { | 146 for (int i = 0; i < a.labels.length(); i++) { |
143 tb.addLabel(a.labels.get(i)); | 147 tb.addLabel(a.labels.get(i)); |
144 } | 148 } |
145 } | 149 } |
146 document.addText(tb); | 150 document.addText(tb); |
147 } | 151 } |
148 | 152 |
| 153 public void enterList() { |
| 154 |
| 155 } |
| 156 |
| 157 public void exitList() { |
| 158 |
| 159 } |
| 160 |
149 /** | 161 /** |
150 * Returns a {@link WebDocument} containing the extracted {@link WebText} | 162 * Returns a {@link WebDocument} containing the extracted {@link WebText} |
151 * s. NOTE: Only call this after parsing. | 163 * s. NOTE: Only call this after parsing. |
152 */ | 164 */ |
153 public WebDocument toWebDocument() { | 165 public WebDocument toWebDocument() { |
154 // Just to be sure. | 166 // Just to be sure. |
155 flushBlock(groupNumber); | 167 flushBlock(groupNumber); |
156 return document; | 168 return document; |
157 } | 169 } |
158 | 170 |
159 } | 171 } |
OLD | NEW |