Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
| 6 | 6 |
| 7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
| 8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
| 9 import org.chromium.distiller.JavaScript; | |
| 10 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
| 11 import org.chromium.distiller.TableClassifier; | 10 import org.chromium.distiller.TableClassifier; |
| 12 | 11 |
| 12 import com.google.gwt.dom.client.AnchorElement; | |
| 13 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
| 14 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
| 15 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
| 16 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
| 17 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
| 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
| 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
| 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; | 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; |
| 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; | 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; |
| 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; | 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 | 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 |
| 149 return false; | 149 return false; |
| 150 } | 150 } |
| 151 | 151 |
| 152 // Create a placeholder for the elements we want to preserve. | 152 // Create a placeholder for the elements we want to preserve. |
| 153 if (WebTag.canBeNested(e.getTagName())) { | 153 if (WebTag.canBeNested(e.getTagName())) { |
| 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); | 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); |
| 155 } | 155 } |
| 156 | 156 |
| 157 switch (e.getTagName()) { | 157 switch (e.getTagName()) { |
| 158 case "A": | |
| 159 String editPattern = "action=edit§ion="; | |
|
mdjones
2017/03/14 15:59:50
Is "action=edit" used for more than just editing?
wychen
2017/03/14 21:48:54
For "redlinks" (links to entries yet to be created
| |
| 160 boolean isEdit = AnchorElement.as(e).getHref().indexOf(editPatte rn) != -1; | |
| 161 if (isEdit) { | |
| 162 // Skip "edit section" on mediawiki. | |
| 163 // See crbug.com/647667. | |
| 164 return false; | |
| 165 } | |
| 166 break; | |
| 167 case "SPAN": | |
| 168 if (className.equals("mw-editsection")) { | |
| 169 // Skip "[edit]" on mediawiki desktop version. | |
| 170 // See crbug.com/647667. | |
| 171 return false; | |
| 172 } | |
| 173 break; | |
| 158 case "BR": | 174 case "BR": |
| 159 builder.lineBreak(e); | 175 builder.lineBreak(e); |
| 160 return false; | 176 return false; |
| 161 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder | 177 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder |
| 162 // later. | 178 // later. |
| 163 case "TABLE": | 179 case "TABLE": |
| 164 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); | 180 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); |
| 165 logTableInfo(e, type); | 181 logTableInfo(e, type); |
| 166 if (type == TableClassifier.Type.DATA) { | 182 if (type == TableClassifier.Type.DATA) { |
| 167 builder.dataTable(e); | 183 builder.dataTable(e); |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 227 Element parent = e.getParentElement(); | 243 Element parent = e.getParentElement(); |
| 228 LogUtil.logToConsole("TABLE: " + type + | 244 LogUtil.logToConsole("TABLE: " + type + |
| 229 ", id=" + e.getId() + | 245 ", id=" + e.getId() + |
| 230 ", class=" + e.getAttribute("class") + | 246 ", class=" + e.getAttribute("class") + |
| 231 ", parent=[" + parent.getTagName() + | 247 ", parent=[" + parent.getTagName() + |
| 232 ", id=" + parent.getId() + | 248 ", id=" + parent.getId() + |
| 233 ", class=" + parent.getAttribute("class") + | 249 ", class=" + parent.getAttribute("class") + |
| 234 "]"); | 250 "]"); |
| 235 } | 251 } |
| 236 } | 252 } |
| OLD | NEW |