Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(20)

Side by Side Diff: java/org/chromium/distiller/webdocument/DomConverter.java

Issue 2729233002: Ignore the "Edit" links in wiki pages (Closed)
Patch Set: fix redlink and desktop Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | javatests/org/chromium/distiller/webdocument/DomConverterTest.java » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.webdocument; 5 package org.chromium.distiller.webdocument;
6 6
7 import org.chromium.distiller.DomUtil; 7 import org.chromium.distiller.DomUtil;
8 import org.chromium.distiller.DomWalker; 8 import org.chromium.distiller.DomWalker;
9 import org.chromium.distiller.JavaScript;
10 import org.chromium.distiller.LogUtil; 9 import org.chromium.distiller.LogUtil;
11 import org.chromium.distiller.TableClassifier; 10 import org.chromium.distiller.TableClassifier;
12 11
12 import com.google.gwt.dom.client.AnchorElement;
13 import com.google.gwt.dom.client.Element; 13 import com.google.gwt.dom.client.Element;
14 import com.google.gwt.dom.client.Node; 14 import com.google.gwt.dom.client.Node;
15 import com.google.gwt.dom.client.Style; 15 import com.google.gwt.dom.client.Style;
16 import com.google.gwt.dom.client.TableElement; 16 import com.google.gwt.dom.client.TableElement;
17 import com.google.gwt.dom.client.Text; 17 import com.google.gwt.dom.client.Text;
18 import org.chromium.distiller.extractors.embeds.EmbedExtractor; 18 import org.chromium.distiller.extractors.embeds.EmbedExtractor;
19 import org.chromium.distiller.extractors.embeds.ImageExtractor; 19 import org.chromium.distiller.extractors.embeds.ImageExtractor;
20 import org.chromium.distiller.extractors.embeds.TwitterExtractor; 20 import org.chromium.distiller.extractors.embeds.TwitterExtractor;
21 import org.chromium.distiller.extractors.embeds.VimeoExtractor; 21 import org.chromium.distiller.extractors.embeds.VimeoExtractor;
22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; 22 import org.chromium.distiller.extractors.embeds.YouTubeExtractor;
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557 148 // See crbug.com/692553, crbug.com/696556, and crbug.com/674557
149 return false; 149 return false;
150 } 150 }
151 151
152 // Create a placeholder for the elements we want to preserve. 152 // Create a placeholder for the elements we want to preserve.
153 if (WebTag.canBeNested(e.getTagName())) { 153 if (WebTag.canBeNested(e.getTagName())) {
154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START)); 154 builder.tag(new WebTag(e.getTagName(), WebTag.TagType.START));
155 } 155 }
156 156
157 switch (e.getTagName()) { 157 switch (e.getTagName()) {
158 case "A":
159 String editPattern = "action=edit&section=";
mdjones 2017/03/14 15:59:50 Is "action=edit" used for more than just editing?
wychen 2017/03/14 21:48:54 For "redlinks" (links to entries yet to be created
160 boolean isEdit = AnchorElement.as(e).getHref().indexOf(editPatte rn) != -1;
161 if (isEdit) {
162 // Skip "edit section" on mediawiki.
163 // See crbug.com/647667.
164 return false;
165 }
166 break;
167 case "SPAN":
168 if (className.equals("mw-editsection")) {
169 // Skip "[edit]" on mediawiki desktop version.
170 // See crbug.com/647667.
171 return false;
172 }
173 break;
158 case "BR": 174 case "BR":
159 builder.lineBreak(e); 175 builder.lineBreak(e);
160 return false; 176 return false;
161 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder 177 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder
162 // later. 178 // later.
163 case "TABLE": 179 case "TABLE":
164 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); 180 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e));
165 logTableInfo(e, type); 181 logTableInfo(e, type);
166 if (type == TableClassifier.Type.DATA) { 182 if (type == TableClassifier.Type.DATA) {
167 builder.dataTable(e); 183 builder.dataTable(e);
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
227 Element parent = e.getParentElement(); 243 Element parent = e.getParentElement();
228 LogUtil.logToConsole("TABLE: " + type + 244 LogUtil.logToConsole("TABLE: " + type +
229 ", id=" + e.getId() + 245 ", id=" + e.getId() +
230 ", class=" + e.getAttribute("class") + 246 ", class=" + e.getAttribute("class") +
231 ", parent=[" + parent.getTagName() + 247 ", parent=[" + parent.getTagName() +
232 ", id=" + parent.getId() + 248 ", id=" + parent.getId() +
233 ", class=" + parent.getAttribute("class") + 249 ", class=" + parent.getAttribute("class") +
234 "]"); 250 "]");
235 } 251 }
236 } 252 }
OLDNEW
« no previous file with comments | « no previous file | javatests/org/chromium/distiller/webdocument/DomConverterTest.java » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698