OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
6 | 6 |
7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
9 import org.chromium.distiller.JavaScript; | |
10 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
11 import org.chromium.distiller.TableClassifier; | 10 import org.chromium.distiller.TableClassifier; |
12 | 11 |
| 12 import com.google.gwt.dom.client.AnchorElement; |
13 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
14 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
15 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
16 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
17 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
19 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; | 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; |
21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; | 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; |
22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; | 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; |
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 | 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 |
149 return false; | 149 return false; |
150 } | 150 } |
151 | 151 |
152 // Create a placeholder for the elements we want to preserve. | 152 // Create a placeholder for the elements we want to preserve. |
153 if (WebTag.canBeNested(e.getTagName())) { | 153 if (WebTag.canBeNested(e.getTagName())) { |
154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); | 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); |
155 } | 155 } |
156 | 156 |
157 switch (e.getTagName()) { | 157 switch (e.getTagName()) { |
| 158 case "A": |
| 159 // The "section" parameter is to differentiate with "redlinks". |
| 160 // Ref: https://en.wikipedia.org/wiki/Wikipedia:Red_link |
| 161 String editPattern = "action=edit§ion="; |
| 162 boolean isEdit = AnchorElement.as(e).getHref().indexOf(editPatte
rn) != -1; |
| 163 if (isEdit) { |
| 164 // Skip "edit section" on mediawiki. |
| 165 // See crbug.com/647667. |
| 166 return false; |
| 167 } |
| 168 break; |
| 169 case "SPAN": |
| 170 if (className.equals("mw-editsection")) { |
| 171 // Skip "[edit]" on mediawiki desktop version. |
| 172 // See crbug.com/647667. |
| 173 return false; |
| 174 } |
| 175 break; |
158 case "BR": | 176 case "BR": |
159 builder.lineBreak(e); | 177 builder.lineBreak(e); |
160 return false; | 178 return false; |
161 // Skip data tables, keep track of them to be extracted by RelevantE
lementsFinder | 179 // Skip data tables, keep track of them to be extracted by RelevantE
lementsFinder |
162 // later. | 180 // later. |
163 case "TABLE": | 181 case "TABLE": |
164 TableClassifier.Type type = TableClassifier.table(TableElement.a
s(e)); | 182 TableClassifier.Type type = TableClassifier.table(TableElement.a
s(e)); |
165 logTableInfo(e, type); | 183 logTableInfo(e, type); |
166 if (type == TableClassifier.Type.DATA) { | 184 if (type == TableClassifier.Type.DATA) { |
167 builder.dataTable(e); | 185 builder.dataTable(e); |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
227 Element parent = e.getParentElement(); | 245 Element parent = e.getParentElement(); |
228 LogUtil.logToConsole("TABLE: " + type + | 246 LogUtil.logToConsole("TABLE: " + type + |
229 ", id=" + e.getId() + | 247 ", id=" + e.getId() + |
230 ", class=" + e.getAttribute("class") + | 248 ", class=" + e.getAttribute("class") + |
231 ", parent=[" + parent.getTagName() + | 249 ", parent=[" + parent.getTagName() + |
232 ", id=" + parent.getId() + | 250 ", id=" + parent.getId() + |
233 ", class=" + parent.getAttribute("class") + | 251 ", class=" + parent.getAttribute("class") + |
234 "]"); | 252 "]"); |
235 } | 253 } |
236 } | 254 } |
OLD | NEW |