| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 package org.chromium.distiller.webdocument; | 5 package org.chromium.distiller.webdocument; |
| 6 | 6 |
| 7 import org.chromium.distiller.DomUtil; | 7 import org.chromium.distiller.DomUtil; |
| 8 import org.chromium.distiller.DomWalker; | 8 import org.chromium.distiller.DomWalker; |
| 9 import org.chromium.distiller.JavaScript; | |
| 10 import org.chromium.distiller.LogUtil; | 9 import org.chromium.distiller.LogUtil; |
| 11 import org.chromium.distiller.TableClassifier; | 10 import org.chromium.distiller.TableClassifier; |
| 12 | 11 |
| 12 import com.google.gwt.dom.client.AnchorElement; |
| 13 import com.google.gwt.dom.client.Element; | 13 import com.google.gwt.dom.client.Element; |
| 14 import com.google.gwt.dom.client.Node; | 14 import com.google.gwt.dom.client.Node; |
| 15 import com.google.gwt.dom.client.Style; | 15 import com.google.gwt.dom.client.Style; |
| 16 import com.google.gwt.dom.client.TableElement; | 16 import com.google.gwt.dom.client.TableElement; |
| 17 import com.google.gwt.dom.client.Text; | 17 import com.google.gwt.dom.client.Text; |
| 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; | 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; |
| 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; | 19 import org.chromium.distiller.extractors.embeds.ImageExtractor; |
| 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; | 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; |
| 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; | 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; |
| 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; | 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 | 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 |
| 149 return false; | 149 return false; |
| 150 } | 150 } |
| 151 | 151 |
| 152 // Create a placeholder for the elements we want to preserve. | 152 // Create a placeholder for the elements we want to preserve. |
| 153 if (WebTag.canBeNested(e.getTagName())) { | 153 if (WebTag.canBeNested(e.getTagName())) { |
| 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); | 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); |
| 155 } | 155 } |
| 156 | 156 |
| 157 switch (e.getTagName()) { | 157 switch (e.getTagName()) { |
| 158 case "A": |
| 159 // The "section" parameter is to differentiate with "redlinks". |
| 160 // Ref: https://en.wikipedia.org/wiki/Wikipedia:Red_link |
| 161 String editPattern = "action=edit§ion="; |
| 162 boolean isEdit = AnchorElement.as(e).getHref().indexOf(editPatte
rn) != -1; |
| 163 if (isEdit) { |
| 164 // Skip "edit section" on mediawiki. |
| 165 // See crbug.com/647667. |
| 166 return false; |
| 167 } |
| 168 break; |
| 169 case "SPAN": |
| 170 if (className.equals("mw-editsection")) { |
| 171 // Skip "[edit]" on mediawiki desktop version. |
| 172 // See crbug.com/647667. |
| 173 return false; |
| 174 } |
| 175 break; |
| 158 case "BR": | 176 case "BR": |
| 159 builder.lineBreak(e); | 177 builder.lineBreak(e); |
| 160 return false; | 178 return false; |
| 161 // Skip data tables, keep track of them to be extracted by RelevantE
lementsFinder | 179 // Skip data tables, keep track of them to be extracted by RelevantE
lementsFinder |
| 162 // later. | 180 // later. |
| 163 case "TABLE": | 181 case "TABLE": |
| 164 TableClassifier.Type type = TableClassifier.table(TableElement.a
s(e)); | 182 TableClassifier.Type type = TableClassifier.table(TableElement.a
s(e)); |
| 165 logTableInfo(e, type); | 183 logTableInfo(e, type); |
| 166 if (type == TableClassifier.Type.DATA) { | 184 if (type == TableClassifier.Type.DATA) { |
| 167 builder.dataTable(e); | 185 builder.dataTable(e); |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 227 Element parent = e.getParentElement(); | 245 Element parent = e.getParentElement(); |
| 228 LogUtil.logToConsole("TABLE: " + type + | 246 LogUtil.logToConsole("TABLE: " + type + |
| 229 ", id=" + e.getId() + | 247 ", id=" + e.getId() + |
| 230 ", class=" + e.getAttribute("class") + | 248 ", class=" + e.getAttribute("class") + |
| 231 ", parent=[" + parent.getTagName() + | 249 ", parent=[" + parent.getTagName() + |
| 232 ", id=" + parent.getId() + | 250 ", id=" + parent.getId() + |
| 233 ", class=" + parent.getAttribute("class") + | 251 ", class=" + parent.getAttribute("class") + |
| 234 "]"); | 252 "]"); |
| 235 } | 253 } |
| 236 } | 254 } |
| OLD | NEW |