Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(214)

Side by Side Diff: java/org/chromium/distiller/webdocument/DomConverter.java

Issue 1230583006: Fix for keeping lists structure (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Using a new approach to keep list structure. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.webdocument; 5 package org.chromium.distiller.webdocument;
6 6
7 import org.chromium.distiller.DomUtil; 7 import org.chromium.distiller.DomUtil;
8 import org.chromium.distiller.DomWalker; 8 import org.chromium.distiller.DomWalker;
9 import org.chromium.distiller.LogUtil; 9 import org.chromium.distiller.LogUtil;
10 import org.chromium.distiller.TableClassifier; 10 import org.chromium.distiller.TableClassifier;
11 11
12 import com.google.gwt.dom.client.Element; 12 import com.google.gwt.dom.client.Element;
13 import com.google.gwt.dom.client.Node; 13 import com.google.gwt.dom.client.Node;
14 import com.google.gwt.dom.client.Style; 14 import com.google.gwt.dom.client.Style;
15 import com.google.gwt.dom.client.TableElement; 15 import com.google.gwt.dom.client.TableElement;
16 import com.google.gwt.dom.client.Text; 16 import com.google.gwt.dom.client.Text;
17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; 17 import org.chromium.distiller.extractors.embeds.EmbedExtractor;
18 import org.chromium.distiller.extractors.embeds.ImageExtractor; 18 import org.chromium.distiller.extractors.embeds.ImageExtractor;
19 import org.chromium.distiller.extractors.embeds.TwitterExtractor; 19 import org.chromium.distiller.extractors.embeds.TwitterExtractor;
20 import org.chromium.distiller.extractors.embeds.VimeoExtractor; 20 import org.chromium.distiller.extractors.embeds.VimeoExtractor;
21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; 21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor;
22
23 import java.util.ArrayList; 22 import java.util.ArrayList;
24 import java.util.HashSet; 23 import java.util.HashSet;
25 import java.util.List; 24 import java.util.List;
26 import java.util.Set; 25 import java.util.Set;
27 26
28 /** 27 /**
29 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other 28 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other
30 * elements that should not be in the created document. Some of these skipped el ements (hidden 29 * elements that should not be in the created document. Some of these skipped el ements (hidden
31 * elements and data tables) are available for retrieval after processing. 30 * elements and data tables) are available for retrieval after processing.
32 */ 31 */
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 for (EmbedExtractor extractor : extractors) { 92 for (EmbedExtractor extractor : extractors) {
94 WebElement embed = extractor.extract(e); 93 WebElement embed = extractor.extract(e);
95 if (embed != null) { 94 if (embed != null) {
96 builder.embed(embed); 95 builder.embed(embed);
97 return false; 96 return false;
98 } 97 }
99 } 98 }
100 } 99 }
101 100
102 switch (e.getTagName()) { 101 switch (e.getTagName()) {
102 case "OL":
103 builder.embed(new OLStart());
mdjones 2015/07/29 17:12:16 I think using the "embed" method here is a bit amb
104 break;
105 case "UL":
106 builder.embed(new ULStart());
107 break;
108 case "LI":
109 builder.embed(new LIStart());
110 break;
103 case "BR": 111 case "BR":
104 builder.lineBreak(e); 112 builder.lineBreak(e);
105 return false; 113 return false;
106 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder 114 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder
107 // later. 115 // later.
108 case "TABLE": 116 case "TABLE":
109 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); 117 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e));
110 logTableInfo(e, type); 118 logTableInfo(e, type);
111 if (type == TableClassifier.Type.DATA) { 119 if (type == TableClassifier.Type.DATA) {
112 builder.dataTable(e); 120 builder.dataTable(e);
(...skipping 22 matching lines...) Expand all
135 case "LINK": 143 case "LINK":
136 case "NOSCRIPT": 144 case "NOSCRIPT":
137 return false; 145 return false;
138 } 146 }
139 builder.startElement(e); 147 builder.startElement(e);
140 return true; 148 return true;
141 } 149 }
142 150
143 @Override 151 @Override
144 public void exit(Node n) { 152 public void exit(Node n) {
153 if (n.getNodeType() == Node.ELEMENT_NODE) {
154 Element e = Element.as(n);
155 switch (e.getTagName()) {
156 case "OL":
157 builder.embed(new OLEnd());
158 break;
159 case "UL":
160 builder.embed(new ULEnd());
161 break;
162 case "LI":
163 builder.embed(new LIEnd());
164 break;
165 }
166 }
145 builder.endElement(); 167 builder.endElement();
146 } 168 }
147 169
148 private static void logVisibilityInfo(Element e, boolean visible) { 170 private static void logVisibilityInfo(Element e, boolean visible) {
149 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; 171 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return;
150 Style style = DomUtil.getComputedStyle(e); 172 Style style = DomUtil.getComputedStyle(e);
151 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() + 173 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() +
152 ": id=" + e.getId() + 174 ": id=" + e.getId() +
153 ", dsp=" + style.getDisplay() + 175 ", dsp=" + style.getDisplay() +
154 ", vis=" + style.getVisibility() + 176 ", vis=" + style.getVisibility() +
155 ", opaq=" + style.getOpacity()); 177 ", opaq=" + style.getOpacity());
156 } 178 }
157 179
158 private static void logTableInfo(Element e, TableClassifier.Type type) { 180 private static void logTableInfo(Element e, TableClassifier.Type type) {
159 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; 181 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return;
160 Element parent = e.getParentElement(); 182 Element parent = e.getParentElement();
161 LogUtil.logToConsole("TABLE: " + type + 183 LogUtil.logToConsole("TABLE: " + type +
162 ", id=" + e.getId() + 184 ", id=" + e.getId() +
163 ", class=" + e.getClassName() + 185 ", class=" + e.getClassName() +
164 ", parent=[" + parent.getTagName() + 186 ", parent=[" + parent.getTagName() +
165 ", id=" + parent.getId() + 187 ", id=" + parent.getId() +
166 ", class=" + parent.getClassName() + 188 ", class=" + parent.getClassName() +
167 "]"); 189 "]");
168 } 190 }
169 } 191 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698