OLD | NEW |
---|---|
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
6 | 6 |
7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
9 import org.chromium.distiller.JavaScript; | |
10 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
11 import org.chromium.distiller.TableClassifier; | 10 import org.chromium.distiller.TableClassifier; |
12 | 11 |
12 import com.google.gwt.dom.client.AnchorElement; | |
13 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
14 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
15 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
16 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
17 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
19 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; | 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; |
21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; | 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; |
22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; | 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 | 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 |
149 return false; | 149 return false; |
150 } | 150 } |
151 | 151 |
152 // Create a placeholder for the elements we want to preserve. | 152 // Create a placeholder for the elements we want to preserve. |
153 if (WebTag.canBeNested(e.getTagName())) { | 153 if (WebTag.canBeNested(e.getTagName())) { |
154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); | 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); |
155 } | 155 } |
156 | 156 |
157 switch (e.getTagName()) { | 157 switch (e.getTagName()) { |
158 case "A": | |
159 String editPattern = "action=edit§ion="; | |
mdjones
2017/03/14 15:59:50
Is "action=edit" used for more than just editing?
wychen
2017/03/14 21:48:54
For "redlinks" (links to entries yet to be created
| |
160 boolean isEdit = AnchorElement.as(e).getHref().indexOf(editPatte rn) != -1; | |
161 if (isEdit) { | |
162 // Skip "edit section" on mediawiki. | |
163 // See crbug.com/647667. | |
164 return false; | |
165 } | |
166 break; | |
167 case "SPAN": | |
168 if (className.equals("mw-editsection")) { | |
169 // Skip "[edit]" on mediawiki desktop version. | |
170 // See crbug.com/647667. | |
171 return false; | |
172 } | |
173 break; | |
158 case "BR": | 174 case "BR": |
159 builder.lineBreak(e); | 175 builder.lineBreak(e); |
160 return false; | 176 return false; |
161 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder | 177 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder |
162 // later. | 178 // later. |
163 case "TABLE": | 179 case "TABLE": |
164 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); | 180 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); |
165 logTableInfo(e, type); | 181 logTableInfo(e, type); |
166 if (type == TableClassifier.Type.DATA) { | 182 if (type == TableClassifier.Type.DATA) { |
167 builder.dataTable(e); | 183 builder.dataTable(e); |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
227 Element parent = e.getParentElement(); | 243 Element parent = e.getParentElement(); |
228 LogUtil.logToConsole("TABLE: " + type + | 244 LogUtil.logToConsole("TABLE: " + type + |
229 ", id=" + e.getId() + | 245 ", id=" + e.getId() + |
230 ", class=" + e.getAttribute("class") + | 246 ", class=" + e.getAttribute("class") + |
231 ", parent=[" + parent.getTagName() + | 247 ", parent=[" + parent.getTagName() + |
232 ", id=" + parent.getId() + | 248 ", id=" + parent.getId() + |
233 ", class=" + parent.getAttribute("class") + | 249 ", class=" + parent.getAttribute("class") + |
234 "]"); | 250 "]"); |
235 } | 251 } |
236 } | 252 } |
OLD | NEW |