| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 /** | 5 /** |
| 6 * boilerpipe | 6 * boilerpipe |
| 7 * | 7 * |
| 8 * Copyright (c) 2009 Christian Kohlschütter | 8 * Copyright (c) 2009 Christian Kohlschütter |
| 9 * | 9 * |
| 10 * The author licenses this file to You under the Apache License, Version 2.0 | 10 * The author licenses this file to You under the Apache License, Version 2.0 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 public void startElement(Element element) { | 53 public void startElement(Element element) { |
| 54 ElementAction a = ElementAction.getForElement(element); | 54 ElementAction a = ElementAction.getForElement(element); |
| 55 actionStack.push(a); | 55 actionStack.push(a); |
| 56 | 56 |
| 57 if (a.changesTagLevel) { | 57 if (a.changesTagLevel) { |
| 58 tagLevel++; | 58 tagLevel++; |
| 59 } | 59 } |
| 60 | 60 |
| 61 if (a.isAnchor) { | 61 if (a.isAnchor) { |
| 62 enterAnchor(); | 62 enterAnchor(); |
| 63 } else if (a.isList) { |
| 64 enterList(); |
| 63 } | 65 } |
| 64 | 66 |
| 65 flush |= a.flush; | 67 flush |= a.flush; |
| 66 } | 68 } |
| 67 | 69 |
| 68 @Override | 70 @Override |
| 69 public void endElement() { | 71 public void endElement() { |
| 70 ElementAction a = actionStack.peek(); | 72 ElementAction a = actionStack.peek(); |
| 71 | 73 |
| 72 if (a.changesTagLevel) { | 74 if (a.changesTagLevel) { |
| 73 tagLevel--; | 75 tagLevel--; |
| 74 } | 76 } |
| 75 | 77 |
| 76 if (flush || a.flush) { | 78 if (flush || a.flush) { |
| 77 flushBlock(groupNumber); | 79 flushBlock(groupNumber); |
| 78 groupNumber++; | 80 groupNumber++; |
| 79 } | 81 } |
| 80 | 82 |
| 81 if (a.isAnchor) { | 83 if (a.isAnchor) { |
| 82 exitAnchor(); | 84 exitAnchor(); |
| 85 } else if (a.isList) { |
| 86 exitList(); |
| 83 } | 87 } |
| 84 | 88 |
| 85 // Must be done after flushBlock() because the labels for the block come
from the | 89 // Must be done after flushBlock() because the labels for the block come
from the |
| 86 // actionStack. | 90 // actionStack. |
| 87 actionStack.pop(); | 91 actionStack.pop(); |
| 88 } | 92 } |
| 89 | 93 |
| 90 @Override | 94 @Override |
| 91 public void textNode(Text textNode) { | 95 public void textNode(Text textNode) { |
| 92 if (flush) { | 96 if (flush) { |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 139 | 143 |
| 140 protected void addWebText(final WebText tb) { | 144 protected void addWebText(final WebText tb) { |
| 141 for (ElementAction a : actionStack) { | 145 for (ElementAction a : actionStack) { |
| 142 for (int i = 0; i < a.labels.length(); i++) { | 146 for (int i = 0; i < a.labels.length(); i++) { |
| 143 tb.addLabel(a.labels.get(i)); | 147 tb.addLabel(a.labels.get(i)); |
| 144 } | 148 } |
| 145 } | 149 } |
| 146 document.addText(tb); | 150 document.addText(tb); |
| 147 } | 151 } |
| 148 | 152 |
| 153 public void enterList() { |
| 154 |
| 155 } |
| 156 |
| 157 public void exitList() { |
| 158 |
| 159 } |
| 160 |
| 149 /** | 161 /** |
| 150 * Returns a {@link WebDocument} containing the extracted {@link WebText} | 162 * Returns a {@link WebDocument} containing the extracted {@link WebText} |
| 151 * s. NOTE: Only call this after parsing. | 163 * s. NOTE: Only call this after parsing. |
| 152 */ | 164 */ |
| 153 public WebDocument toWebDocument() { | 165 public WebDocument toWebDocument() { |
| 154 // Just to be sure. | 166 // Just to be sure. |
| 155 flushBlock(groupNumber); | 167 flushBlock(groupNumber); |
| 156 return document; | 168 return document; |
| 157 } | 169 } |
| 158 | 170 |
| 159 } | 171 } |
| OLD | NEW |