Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(540)

Side by Side Diff: java/org/chromium/distiller/webdocument/DomConverter.java

Issue 1230583006: Fix for keeping lists structure (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: Small code refactor for more appropriate names. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 package org.chromium.distiller.webdocument; 5 package org.chromium.distiller.webdocument;
6 6
7 import org.chromium.distiller.DomUtil; 7 import org.chromium.distiller.DomUtil;
8 import org.chromium.distiller.DomWalker; 8 import org.chromium.distiller.DomWalker;
9 import org.chromium.distiller.LogUtil; 9 import org.chromium.distiller.LogUtil;
10 import org.chromium.distiller.TableClassifier; 10 import org.chromium.distiller.TableClassifier;
11 11
12 import com.google.gwt.dom.client.Element; 12 import com.google.gwt.dom.client.Element;
13 import com.google.gwt.dom.client.Node; 13 import com.google.gwt.dom.client.Node;
14 import com.google.gwt.dom.client.Style; 14 import com.google.gwt.dom.client.Style;
15 import com.google.gwt.dom.client.TableElement; 15 import com.google.gwt.dom.client.TableElement;
16 import com.google.gwt.dom.client.Text; 16 import com.google.gwt.dom.client.Text;
17 import org.chromium.distiller.extractors.embeds.EmbedExtractor; 17 import org.chromium.distiller.extractors.embeds.EmbedExtractor;
18 import org.chromium.distiller.extractors.embeds.ImageExtractor; 18 import org.chromium.distiller.extractors.embeds.ImageExtractor;
19 import org.chromium.distiller.extractors.embeds.TwitterExtractor; 19 import org.chromium.distiller.extractors.embeds.TwitterExtractor;
20 import org.chromium.distiller.extractors.embeds.VimeoExtractor; 20 import org.chromium.distiller.extractors.embeds.VimeoExtractor;
21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor; 21 import org.chromium.distiller.extractors.embeds.YouTubeExtractor;
22
23 import java.util.ArrayList; 22 import java.util.ArrayList;
wychen 2015/08/01 01:00:20 nitpick: why deleting the empty line above?
24 import java.util.HashSet; 23 import java.util.HashSet;
25 import java.util.List; 24 import java.util.List;
26 import java.util.Set; 25 import java.util.Set;
27 26
28 /** 27 /**
29 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other 28 * This DomWalker.Visitor creates a WebDocument from the walked DOM. It skips hi dden and other
30 * elements that should not be in the created document. Some of these skipped el ements (hidden 29 * elements that should not be in the created document. Some of these skipped el ements (hidden
31 * elements and data tables) are available for retrieval after processing. 30 * elements and data tables) are available for retrieval after processing.
32 */ 31 */
33 public class DomConverter implements DomWalker.Visitor { 32 public class DomConverter implements DomWalker.Visitor {
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
93 for (EmbedExtractor extractor : extractors) { 92 for (EmbedExtractor extractor : extractors) {
94 WebElement embed = extractor.extract(e); 93 WebElement embed = extractor.extract(e);
95 if (embed != null) { 94 if (embed != null) {
96 builder.embed(embed); 95 builder.embed(embed);
97 return false; 96 return false;
98 } 97 }
99 } 98 }
100 } 99 }
101 100
102 switch (e.getTagName()) { 101 switch (e.getTagName()) {
102 case "OL":
103 case "UL":
104 case "LI":
105 builder.list(new PlaceHolder(e.getTagName(), PlaceHolder.TagType .START));
106 break;
103 case "BR": 107 case "BR":
104 builder.lineBreak(e); 108 builder.lineBreak(e);
105 return false; 109 return false;
106 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder 110 // Skip data tables, keep track of them to be extracted by RelevantE lementsFinder
107 // later. 111 // later.
108 case "TABLE": 112 case "TABLE":
109 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e)); 113 TableClassifier.Type type = TableClassifier.table(TableElement.a s(e));
110 logTableInfo(e, type); 114 logTableInfo(e, type);
111 if (type == TableClassifier.Type.DATA) { 115 if (type == TableClassifier.Type.DATA) {
112 builder.dataTable(e); 116 builder.dataTable(e);
(...skipping 22 matching lines...) Expand all
135 case "LINK": 139 case "LINK":
136 case "NOSCRIPT": 140 case "NOSCRIPT":
137 return false; 141 return false;
138 } 142 }
139 builder.startElement(e); 143 builder.startElement(e);
140 return true; 144 return true;
141 } 145 }
142 146
143 @Override 147 @Override
144 public void exit(Node n) { 148 public void exit(Node n) {
149 if (n.getNodeType() == Node.ELEMENT_NODE) {
150 Element e = Element.as(n);
151 switch (e.getTagName()) {
152 case "OL":
153 case "UL":
154 case "LI":
155 builder.list(new PlaceHolder(e.getTagName(), PlaceHolder.Tag Type.END));
156 break;
157 }
158 }
145 builder.endElement(); 159 builder.endElement();
146 } 160 }
147 161
148 private static void logVisibilityInfo(Element e, boolean visible) { 162 private static void logVisibilityInfo(Element e, boolean visible) {
149 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; 163 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return;
150 Style style = DomUtil.getComputedStyle(e); 164 Style style = DomUtil.getComputedStyle(e);
151 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() + 165 LogUtil.logToConsole((visible ? "KEEP " : "SKIP ") + e.getTagName() +
152 ": id=" + e.getId() + 166 ": id=" + e.getId() +
153 ", dsp=" + style.getDisplay() + 167 ", dsp=" + style.getDisplay() +
154 ", vis=" + style.getVisibility() + 168 ", vis=" + style.getVisibility() +
155 ", opaq=" + style.getOpacity()); 169 ", opaq=" + style.getOpacity());
156 } 170 }
157 171
158 private static void logTableInfo(Element e, TableClassifier.Type type) { 172 private static void logTableInfo(Element e, TableClassifier.Type type) {
159 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return; 173 if (!LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) return;
160 Element parent = e.getParentElement(); 174 Element parent = e.getParentElement();
161 LogUtil.logToConsole("TABLE: " + type + 175 LogUtil.logToConsole("TABLE: " + type +
162 ", id=" + e.getId() + 176 ", id=" + e.getId() +
163 ", class=" + e.getClassName() + 177 ", class=" + e.getClassName() +
164 ", parent=[" + parent.getTagName() + 178 ", parent=[" + parent.getTagName() +
165 ", id=" + parent.getId() + 179 ", id=" + parent.getId() +
166 ", class=" + parent.getClassName() + 180 ", class=" + parent.getClassName() +
167 "]"); 181 "]");
168 } 182 }
169 } 183 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698